From d2aa213fffbad830edfc6729a241e5eced61ca7e Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 7 Mar 2026 12:21:32 +0000
Subject: [PATCH 1/2] Add mandatory PoC requirements to playbook, templates,
 and data model

Every finding must now include a reproducible Proof of Concept with:
- Numbered reproduction steps
- Exact payload/command (copy-pasteable)
- Raw evidence output proving exploitation
- Impact demonstration (shown, not described)
- Screenshot evidence where applicable

Changes:
- claude_playbook.md: Add PoC Requirements section with per-vuln-class
  minimums and validation checklist
- VulnerabilityEntry: Add poc_steps (list[str]) and poc_payload (str)
  fields with descriptive metadata
- All 8 prompt templates: Add PoC mandate to report sections and
  guidelines (findings without PoC downgraded to info)
- Agent prompts: Ingestion extracts PoC from tool output, Processing
  preserves PoC during dedup, Synthesis validates PoC completeness

https://claude.ai/code/session_01UC8hHFgeUeMFboP3grei4t
---
 blhackbox/models/aggregated_payload.py        | 32 ++++++-
 blhackbox/prompts/agents/ingestionagent.md    | 17 +++-
 blhackbox/prompts/agents/processingagent.md   | 20 ++++-
 blhackbox/prompts/agents/synthesisagent.md    | 20 ++++-
 blhackbox/prompts/claude_playbook.md          | 83 ++++++++++++++++++-
 blhackbox/prompts/templates/api-security.md   | 26 ++++--
 blhackbox/prompts/templates/bug-bounty.md     | 28 +++++--
 .../prompts/templates/full-attack-chain.md    | 42 +++++++---
 blhackbox/prompts/templates/full-pentest.md   | 31 ++++++-
 .../templates/network-infrastructure.md       | 23 ++++-
 blhackbox/prompts/templates/quick-scan.md     |  8 +-
 .../prompts/templates/vuln-assessment.md      | 26 +++++-
 .../prompts/templates/web-app-assessment.md   | 25 +++++-
 13 files changed, 329 insertions(+), 52 deletions(-)

diff --git a/blhackbox/models/aggregated_payload.py b/blhackbox/models/aggregated_payload.py
index a5d93b9..be8d0d9 100644
--- a/blhackbox/models/aggregated_payload.py
+++ b/blhackbox/models/aggregated_payload.py
@@ -54,7 +54,12 @@ class ServiceEntry(BaseModel):
 
 
 class VulnerabilityEntry(BaseModel):
-    """A single vulnerability finding."""
+    """A single vulnerability finding with mandatory PoC data.
+
+    Every finding MUST include proof-of-concept information.  A finding
+    without a reproducible PoC is not valid and should be downgraded to
+    severity "info" with a note that exploitation could not be confirmed.
+    """
 
     id: str = ""
     title: str = ""
@@ -64,7 +69,30 @@ class VulnerabilityEntry(BaseModel):
     port: int = 0
     description: str = ""
     references: list[str] = Field(default_factory=list)
-    evidence: str = ""
+    evidence: str = Field(
+        default="",
+        description=(
+            "Raw tool output, HTTP response, or terminal output proving "
+            "the vulnerability exists.  Must be concrete, not theoretical."
+        ),
+    )
+    poc_steps: list[str] = Field(
+        default_factory=list,
+        description=(
+            "Ordered reproduction steps that allow an independent tester "
+            "to confirm the finding.  Example: "
+            '["1. Navigate to /login", '
+            '"2. Enter \\' OR 1=1-- in username", '
+            '"3. Observe 302 redirect to /admin"]'
+        ),
+    )
+    poc_payload: str = Field(
+        default="",
+        description=(
+            "The exact payload, command, or HTTP request used to exploit "
+            "the vulnerability.  Must be copy-pasteable."
+        ),
+    )
     tool_source: str = ""
     likely_false_positive: bool = False
 
diff --git a/blhackbox/prompts/agents/ingestionagent.md b/blhackbox/prompts/agents/ingestionagent.md
index 2e37458..2e98d89 100644
--- a/blhackbox/prompts/agents/ingestionagent.md
+++ b/blhackbox/prompts/agents/ingestionagent.md
@@ -69,7 +69,9 @@ explanation text. The JSON must match this schema exactly:
       "port": 80,
       "description": "Path traversal allowing file read outside webroot",
       "references": ["https://nvd.nist.gov/vuln/detail/CVE-2021-12345"],
-      "evidence": "GET /..%2f..%2fetc/passwd returned 200",
+      "evidence": "GET /..%2f..%2fetc/passwd returned 200 with body: root:x:0:0:root:/root:/bin/bash ...",
+      "poc_steps": ["1. Send GET request to /..%2f..%2fetc/passwd", "2. Observe HTTP 200 response with /etc/passwd contents"],
+      "poc_payload": "curl -k 'https://192.168.1.1/..%2f..%2fetc/passwd'",
       "tool_source": "nikto"
     }
   ],
@@ -143,12 +145,16 @@ explanation text. The JSON must match this schema exactly:
 - Extract the HTTP method and URL from each finding
 - Note outdated server versions as vulnerabilities (severity: "info" or "low")
 - Extract missing security headers and map to `http_headers[].missing_security_headers`
+- **PoC**: Use the nikto finding URL + method as `poc_payload`, the full nikto output
+  line as `evidence`
 
 ### sqlmap
 - Extract confirmed injection points as critical vulnerabilities
 - Include the injection type (blind, error-based, time-based, UNION)
 - Include the DBMS type and version if detected
 - Each confirmed injection point = severity "critical"
+- **PoC**: Extract the sqlmap command as `poc_payload`, the injection point URL + parameter
+  as step 1 of `poc_steps`, the DBMS confirmation as `evidence`
 
 ### wpscan
 - Map plugin/theme vulnerabilities to `vulnerabilities[]` with CVE IDs
@@ -158,6 +164,7 @@ explanation text. The JSON must match this schema exactly:
 ### hydra/medusa
 - Each successful login goes in `credentials[]`
 - Include the service type (ssh, ftp, http-form, etc.)
+- **PoC**: The hydra/medusa command as `poc_payload`, "Successful login: user:pass" as `evidence`
 
 ### SSL/TLS scans
 - Map to `ssl_certs[]`
@@ -175,6 +182,14 @@ explanation text. The JSON must match this schema exactly:
 7. Treat informational findings as severity "info" — do not skip them.
 8. Arrays that have no data should be `[]`, objects with no data should be `{}`.
 9. Output ONLY valid JSON — no markdown fences, no commentary.
+10. **Extract PoC data for every vulnerability:**
+    - `evidence`: Raw tool output or HTTP response proving the finding (never empty for confirmed vulns).
+    - `poc_steps`: Ordered list of steps to reproduce. Extract from tool output where possible
+      (e.g., sqlmap shows injection steps, nikto shows the request path).
+    - `poc_payload`: The exact command, payload, or HTTP request used. Extract from tool
+      invocation or output (e.g., the sqlmap command line, the nikto finding URL).
+    - If PoC data is not available from the tool output, set `poc_steps: []` and `poc_payload: ""`
+      but ALWAYS populate `evidence` with the raw tool output that detected the finding.
 
 ## Example
 
diff --git a/blhackbox/prompts/agents/processingagent.md b/blhackbox/prompts/agents/processingagent.md
index 8a642f7..96912dd 100644
--- a/blhackbox/prompts/agents/processingagent.md
+++ b/blhackbox/prompts/agents/processingagent.md
@@ -79,6 +79,9 @@ explanation text. The JSON must match this schema:
   increase confidence in the vulnerability.
 - Correlate technology detection (whatweb) with vulnerability reports — if a CVE
   applies to a detected technology version, flag it.
+- **When merging duplicate findings, preserve the best PoC data:** keep the entry
+  with the most complete `poc_steps`, `poc_payload`, and `evidence`. Merge evidence
+  from both tools (e.g., "Detected by: nikto, nuclei. nikto output: ... nuclei output: ...").
 
 ### 4. Severity Assessment
 Reassess severity using these pentesting-specific rules:
@@ -127,11 +130,22 @@ Populate `attack_surface` by counting:
 - `ssl_issues`: SSL/TLS problems (expired, weak cipher, old protocol)
 - `high_value_targets`: List of the most interesting targets for further exploitation
 
-### 8. Data Preservation
+### 8. PoC Data Preservation
+**Never discard PoC data.** Every vulnerability entry must retain its `evidence`,
+`poc_steps`, and `poc_payload` fields through processing. A finding without PoC
+evidence is not a valid finding.
+
+- When deduplicating, keep the PoC with the most detail.
+- When compressing low-severity findings, still preserve at least the `evidence` field.
+- If a finding has empty `poc_steps` and `poc_payload`, it must be flagged with
+  `"likely_false_positive": true` unless the `evidence` field alone is sufficient
+  to confirm the vulnerability.
+
+### 9. Data Preservation
 Never discard data with security value. If an error or anomaly could indicate a
 security control (WAF, IDS, rate limiter, geo-block), keep it in error_log.
 
-### 9. Output
+### 10. Output
 Output ONLY valid JSON — no markdown fences, no commentary.
 
 ## Example
@@ -161,7 +175,7 @@ Output ONLY valid JSON — no markdown fences, no commentary.
     "hosts": [{"ip": "10.0.0.1", "hostname": "target.com", "os": "Linux 5.4", "ports": [{"port": 80, "protocol": "tcp", "state": "open", "service": "http", "version": "nginx/1.18.0", "banner": "", "nse_scripts": {"http-title": "Login Page"}}, {"port": 443, "protocol": "tcp", "state": "filtered", "service": "", "version": "", "banner": "", "nse_scripts": {}}]}],
     "ports": [],
     "services": [],
-    "vulnerabilities": [{"id": "CVE-2021-3449", "title": "OpenSSL DoS", "severity": "high", "cvss": 7.5, "host": "10.0.0.1", "port": 443, "description": "NULL pointer dereference in signature_algorithms processing. Confirmed by multiple tools.", "references": ["https://nvd.nist.gov/vuln/detail/CVE-2021-3449"], "evidence": "Detected by: nikto, nuclei", "tool_source": "nikto,nuclei", "likely_false_positive": false}],
+    "vulnerabilities": [{"id": "CVE-2021-3449", "title": "OpenSSL DoS", "severity": "high", "cvss": 7.5, "host": "10.0.0.1", "port": 443, "description": "NULL pointer dereference in signature_algorithms processing. Confirmed by multiple tools.", "references": ["https://nvd.nist.gov/vuln/detail/CVE-2021-3449"], "evidence": "Detected by: nikto, nuclei. nikto: + OpenSSL/1.1.1j appears vulnerable to CVE-2021-3449. nuclei: [CVE-2021-3449] [high] https://10.0.0.1:443", "poc_steps": ["1. Run nikto against target on port 443", "2. Run nuclei with CVE-2021-3449 template against target", "3. Both tools confirm the vulnerability"], "poc_payload": "nuclei -u https://10.0.0.1 -t CVE-2021-3449.yaml", "tool_source": "nikto,nuclei", "likely_false_positive": false}],
     "endpoints": [{"url": "/admin", "method": "GET", "status_code": 200, "content_length": 5432, "redirect": ""}, {"url": "/api/v1/users", "method": "GET", "status_code": 401, "content_length": 45, "redirect": ""}],
     "subdomains": ["mail.target.com", "dev.target.com", "staging.target.com"],
     "technologies": [],
diff --git a/blhackbox/prompts/agents/synthesisagent.md b/blhackbox/prompts/agents/synthesisagent.md
index 1082f0d..425d467 100644
--- a/blhackbox/prompts/agents/synthesisagent.md
+++ b/blhackbox/prompts/agents/synthesisagent.md
@@ -122,6 +122,9 @@ No preamble, no markdown fences, no explanation text.
 - If the same host appears with different port lists, merge the port lists (union).
 - If tool_source differs, combine them ("nikto,nuclei").
 - For version strings, prefer the more specific version (e.g., "1.18.0" over "1.18").
+- **When merging vulnerabilities, keep the most complete PoC data** — prefer the entry
+  with non-empty `poc_steps`, `poc_payload`, and `evidence`. If both have PoC data,
+  merge the evidence from both tools.
 
 ### 3. Error Log Merging
 - Take error_log from Processing Agent output.
@@ -163,13 +166,22 @@ Generate prioritized remediation steps:
   - `architecture`: Design-level change (network segmentation, auth system overhaul)
   - `process`: Operational change (credential rotation, monitoring, incident response)
 
-### 7. Completeness
+### 7. PoC Validation
+- **Every vulnerability with severity > "info" MUST have PoC data.**
+- Check that `evidence` is non-empty for all confirmed vulnerabilities.
+- Check that `poc_steps` has at least one step for critical and high findings.
+- If a vulnerability has severity ≥ "low" but empty `evidence`, `poc_steps`, and
+  `poc_payload`, downgrade it to "info" and add a note in the description:
+  "Downgraded: exploitation could not be confirmed — no PoC evidence available."
+- A finding without a PoC is not a valid finding.
+
+### 8. Completeness
 - Every field in the schema MUST be present.
 - Missing arrays → `[]`. Missing strings → `""`. Missing numbers → `0`.
 - Metadata: populate what you can from the input. Set fields you cannot determine
   to their zero values.
 
-### 8. Output
+### 9. Output
 Output ONLY valid JSON — no markdown fences, no commentary.
 
 ## Example
@@ -189,7 +201,7 @@ Output ONLY valid JSON — no markdown fences, no commentary.
     "findings": {
       "hosts": [{"ip": "10.0.0.1", "hostname": "target.com", "os": "Linux 5.4", "ports": [{"port": 80, "protocol": "tcp", "state": "open", "service": "http", "version": "nginx/1.18.0", "banner": "", "nse_scripts": {"http-title": "Login Page"}}]}],
       "subdomains": ["mail.target.com", "dev.target.com"],
-      "vulnerabilities": [{"id": "CVE-2021-3449", "title": "OpenSSL DoS", "severity": "high", "cvss": 7.5, "host": "10.0.0.1", "port": 443, "description": "OpenSSL denial of service. Confirmed by multiple tools.", "references": [], "evidence": "Detected by: nikto, nuclei", "tool_source": "nikto,nuclei"}],
+      "vulnerabilities": [{"id": "CVE-2021-3449", "title": "OpenSSL DoS", "severity": "high", "cvss": 7.5, "host": "10.0.0.1", "port": 443, "description": "OpenSSL denial of service. Confirmed by multiple tools.", "references": [], "evidence": "Detected by: nikto, nuclei. nikto: OpenSSL/1.1.1j vulnerable. nuclei: [CVE-2021-3449] [high] confirmed", "poc_steps": ["1. Run nikto against target on port 443", "2. Run nuclei with CVE-2021-3449 template", "3. Both tools confirm vulnerability in OpenSSL 1.1.1j"], "poc_payload": "nuclei -u https://10.0.0.1 -t CVE-2021-3449.yaml", "tool_source": "nikto,nuclei"}],
       "endpoints": [{"url": "/admin", "method": "GET", "status_code": 200, "content_length": 5432, "redirect": ""}],
       "http_headers": [{"host": "target.com", "port": 80, "missing_security_headers": ["X-Frame-Options", "Content-Security-Policy", "Strict-Transport-Security"], "server": "nginx/1.18.0", "x_powered_by": ""}],
       "ports": [], "services": [], "technologies": [], "ssl_certs": [], "credentials": [], "whois": {}, "dns_records": []
@@ -207,7 +219,7 @@ Output ONLY valid JSON — no markdown fences, no commentary.
     "hosts": [{"ip": "10.0.0.1", "hostname": "target.com", "os": "Linux 5.4", "ports": [{"port": 80, "protocol": "tcp", "state": "open", "service": "http", "version": "nginx/1.18.0", "banner": "", "nse_scripts": {"http-title": "Login Page"}}]}],
     "ports": [],
     "services": [],
-    "vulnerabilities": [{"id": "CVE-2021-3449", "title": "OpenSSL DoS", "severity": "high", "cvss": 7.5, "host": "10.0.0.1", "port": 443, "description": "OpenSSL denial of service. Confirmed by multiple tools.", "references": [], "evidence": "Detected by: nikto, nuclei", "tool_source": "nikto,nuclei"}],
+    "vulnerabilities": [{"id": "CVE-2021-3449", "title": "OpenSSL DoS", "severity": "high", "cvss": 7.5, "host": "10.0.0.1", "port": 443, "description": "OpenSSL denial of service. Confirmed by multiple tools.", "references": [], "evidence": "Detected by: nikto, nuclei. nikto: OpenSSL/1.1.1j vulnerable. nuclei: [CVE-2021-3449] [high] confirmed", "poc_steps": ["1. Run nikto against target on port 443", "2. Run nuclei with CVE-2021-3449 template", "3. Both tools confirm vulnerability in OpenSSL 1.1.1j"], "poc_payload": "nuclei -u https://10.0.0.1 -t CVE-2021-3449.yaml", "tool_source": "nikto,nuclei"}],
     "endpoints": [{"url": "/admin", "method": "GET", "status_code": 200, "content_length": 5432, "redirect": ""}],
     "subdomains": ["mail.target.com", "dev.target.com"],
     "technologies": [],
diff --git a/blhackbox/prompts/claude_playbook.md b/blhackbox/prompts/claude_playbook.md
index a22f5b4..cde6be6 100644
--- a/blhackbox/prompts/claude_playbook.md
+++ b/blhackbox/prompts/claude_playbook.md
@@ -68,10 +68,11 @@ Append every raw output to the same `raw_outputs` dict.
 
 ---
 
-## Phase 3 -- Enumeration
+## Phase 3 -- Enumeration & Exploitation
 
 **Objective:** Deep-dive into web services, directories, technologies, and
-application-layer weaknesses.
+application-layer weaknesses. Validate every finding with a concrete Proof of
+Concept (PoC).
 
 | Task |
 |------|
@@ -84,6 +85,12 @@ application-layer weaknesses.
 | Exploit validation |
 | Credential extraction from traffic |
 | Web application reconnaissance |
+| **PoC development for every confirmed finding** |
+| **Screenshot evidence capture for visual proof** |
+
+For every vulnerability or finding discovered, you **MUST** produce a PoC before
+moving to Phase 4. A finding without a PoC is not a valid finding. See the
+[PoC Requirements](#poc-requirements) section below.
 
 Append every raw output to `raw_outputs`.
 
@@ -105,6 +112,8 @@ Append every raw output to `raw_outputs`.
    - **Deduplicate** findings across tools (same CVE from nikto + nuclei → one entry)
    - **Correlate** cross-tool evidence (nmap version + nikto CVE → higher confidence)
    - **Assess severity** using pentesting rules (RCE = critical, XSS = medium, etc.)
+   - **Attach PoC data** to every vulnerability — populate `evidence`,
+     `poc_steps`, and `poc_payload` fields (see [PoC Requirements](#poc-requirements))
    - **Extract errors** (timeouts, WAF blocks, rate limits) into `error_log`
      with `security_relevance` ratings
    - **Generate executive summary** with risk level, top findings, and attack chains
@@ -153,10 +162,19 @@ For each finding include:
 - Title / CVE (if available)
 - Affected host(s) and port(s)
 - CVSS score (if available)
-- Description of the vulnerability
-- Evidence / proof of concept
+- Description of the vulnerability (root cause, not just the symptom)
+- **Proof of Concept (MANDATORY)** — see [PoC Requirements](#poc-requirements)
+  - Numbered steps to reproduce
+  - Exact command, payload, or request used
+  - Tool output or HTTP response proving exploitation
+  - Screenshot evidence (where applicable)
+  - Impact demonstration (what the attacker gained)
 - References
 
+> **A finding without a PoC is not a valid finding.** If you cannot produce a
+> reproducible PoC, downgrade the finding to "info" severity and note that
+> exploitation could not be confirmed.
+
 ### 4. Anomalies & Scan Artifacts
 
 Pull entries from `payload.error_log` where `security_relevance` is `medium` or
@@ -191,6 +209,63 @@ Provide prioritized, actionable remediation guidance:
 
 ---
 
+## PoC Requirements
+
+**Every vulnerability and finding MUST include a Proof of Concept (PoC).** A
+report with findings that only describe a vulnerability without demonstrating
+it is not valid. An administrator who was not present during the test must be
+able to independently reproduce and confirm each finding using only the PoC.
+
+### Required PoC Elements
+
+For **every** finding (critical through low severity), provide:
+
+| Element | Description |
+|---------|-------------|
+| **Reproduction steps** | Numbered, chronological steps to replicate the finding |
+| **Exact command/payload** | Copy-pasteable tool commands, HTTP requests, or exploit payloads |
+| **Raw output/response** | Terminal output, HTTP response body, or tool output proving the exploit worked |
+| **Impact demonstration** | What the attacker gained — not theoretical, but shown (e.g., data returned, shell obtained, privilege escalated) |
+| **Screenshot evidence** | Visual proof via `take_screenshot` / `take_element_screenshot` where applicable |
+
+### PoC by Vulnerability Class
+
+| Vulnerability Class | Minimum PoC Requirement |
+|---------------------|-------------------------|
+| SQL Injection | Injection payload, DBMS response, extracted sample data (max 5 rows) |
+| XSS (Reflected/Stored) | Payload, reflected/stored output in response body, screenshot of rendered payload |
+| RCE / Command Injection | Payload, command output (e.g., `id`, `whoami`), proof of execution |
+| LFI / Path Traversal | Traversal payload, file contents returned (e.g., `/etc/passwd`) |
+| SSRF | Request to internal endpoint, response proving internal access |
+| Authentication Bypass | Steps showing unauthenticated access to protected resource |
+| IDOR | Two requests showing access to another user's data via ID manipulation |
+| Default/Weak Credentials | Service, username:password pair, screenshot of authenticated session |
+| Missing Security Headers | HTTP response headers dump, list of missing headers with risk explanation |
+| SSL/TLS Issues | SSL scan output showing weak ciphers, expired certs, or outdated protocols |
+| Information Disclosure | Exact endpoint and response body containing sensitive data |
+
+### Storing PoC Data in AggregatedPayload
+
+When building the `AggregatedPayload`, populate these `VulnerabilityEntry` fields:
+
+- `evidence`: Raw tool output, HTTP response, or terminal output proving the finding
+- `poc_steps`: Ordered list of reproduction steps (e.g., `["1. Navigate to /login", "2. Enter payload ' OR 1=1-- in username field", "3. Observe 302 redirect to /admin"]`)
+- `poc_payload`: The exact payload, command, or request used (e.g., `"sqlmap -u 'http://target/page?id=1' --dbs --batch"` or the raw HTTP request)
+
+### PoC Validation Checklist
+
+Before including a finding in the report, verify:
+
+- [ ] Can someone reproduce this with only the PoC steps provided?
+- [ ] Is the exact payload/command included and copy-pasteable?
+- [ ] Does the evidence (output/response) clearly prove the vulnerability exists?
+- [ ] Is the impact demonstrated, not just described?
+- [ ] Are screenshots captured for visual findings (XSS, exposed panels, error pages)?
+
+If any check fails, the PoC is incomplete — go back and gather the missing evidence.
+
+---
+
 ## Notes
 
 - If any tool call fails, log the error and continue with remaining tools.
diff --git a/blhackbox/prompts/templates/api-security.md b/blhackbox/prompts/templates/api-security.md
index 542927d..bba9bf1 100644
--- a/blhackbox/prompts/templates/api-security.md
+++ b/blhackbox/prompts/templates/api-security.md
@@ -128,17 +128,30 @@ Look for:
 
 ### Step 9: API Security Report
 
-Using the `AggregatedPayload`, produce a detailed report:
+Using the `AggregatedPayload`, produce a detailed report.
+
+> **Every finding MUST include a Proof of Concept.** A finding that only
+> describes a vulnerability without demonstrating it is not valid.
+
+For each finding, include a complete PoC:
+- Numbered reproduction steps (independently reproducible)
+- Exact API request (method, URL, headers, body — copy-pasteable as cURL)
+- Raw API response proving exploitation
+- Impact demonstration (what data was accessed, what action was performed — shown, not described)
+
+Findings without PoC must be downgraded to "info" severity.
+
+Report sections:
 
 1. **Executive Summary** — API security posture overview
 2. **API Inventory** — all discovered endpoints with methods and response codes
-3. **Authentication Assessment** — auth mechanism analysis and weaknesses
-4. **Authorization Issues** — BOLA, IDOR, privilege escalation findings
-5. **Injection Vulnerabilities** — SQL, NoSQL, command injection findings
+3. **Authentication Assessment** — auth mechanism analysis and weaknesses, with PoC
+4. **Authorization Issues** — BOLA, IDOR, privilege escalation with PoC for each
+5. **Injection Vulnerabilities** — SQL, NoSQL, command injection with PoC for each
 6. **OWASP API Top 10 Mapping** — findings mapped to API-specific risks
 7. **Traffic Analysis** — API traffic insights, credential findings, data flow analysis
 8. **Data Exposure** — sensitive data leaks, verbose errors, missing protections
-9. **Configuration Issues** — CORS, rate limiting, security headers
+9. **Configuration Issues** — CORS, rate limiting, security headers with evidence
 10. **Attack Chains** — combined API vulnerability paths
 11. **Remediation Priorities** — ordered by severity and exploitability
 
@@ -151,3 +164,6 @@ Using the `AggregatedPayload`, produce a detailed report:
 - Check both authenticated and unauthenticated access
 - Record every tool output for post-processing
 - Map findings to OWASP API Security Top 10
+- **Every finding MUST have a PoC** — reproduction steps, exact request, raw response, and impact proof
+- Findings without PoC are not valid and must be downgraded to "info" severity
+- Populate `poc_steps`, `poc_payload`, and `evidence` fields in every `VulnerabilityEntry`
diff --git a/blhackbox/prompts/templates/bug-bounty.md b/blhackbox/prompts/templates/bug-bounty.md
index 7021227..9ceb8a1 100644
--- a/blhackbox/prompts/templates/bug-bounty.md
+++ b/blhackbox/prompts/templates/bug-bounty.md
@@ -151,16 +151,29 @@ Using the `AggregatedPayload`, produce findings in bug bounty format:
 
 For EACH vulnerability, provide:
 
+> **A finding without a PoC will be rejected by any bug bounty program.**
+> The PoC must be complete enough that the program's security team can
+> reproduce the issue without asking for clarification.
+
 1. **Title** — clear, descriptive vulnerability title
 2. **Severity** — Critical / High / Medium / Low (using CVSS if applicable)
-3. **Summary** — one-paragraph description of the vulnerability
-4. **Steps to Reproduce** — numbered, exact steps to reproduce
-5. **Impact** — what an attacker can achieve (data access, account takeover, RCE, etc.)
-6. **Proof of Concept** — tool output, request/response pairs, traffic captures, annotated screenshots
-7. **Affected Endpoint** — exact URL, parameter, and method
-8. **Remediation** — how to fix the vulnerability
+3. **Summary** — one-paragraph description of the root cause (not just the symptom)
+4. **Steps to Reproduce (MANDATORY)** — numbered, exact steps to reproduce from scratch.
+   A program triager must be able to follow these steps and confirm the vulnerability.
+5. **Proof of Concept (MANDATORY):**
+   - Exact payload, command, or cURL request (copy-pasteable)
+   - Raw HTTP request and response showing the exploit
+   - Tool output proving exploitation succeeded
+   - Annotated screenshots showing the vulnerability in the browser/response
+   - For chained bugs: PoC for each step in the chain
+6. **Impact** — what an attacker can achieve, **demonstrated not described**
+   (e.g., "extracted user PII" with sample data, not "could potentially access data")
+7. **Affected Endpoint** — exact URL, parameter, HTTP method
+8. **Remediation** — how to fix the vulnerability (specific, not generic)
 9. **References** — CVEs, CWEs, OWASP categories
 
+Populate `poc_steps`, `poc_payload`, and `evidence` in every `VulnerabilityEntry`.
+
 Sort findings by severity (critical first) and potential bounty value.
 
 ---
@@ -173,3 +186,6 @@ Sort findings by severity (critical first) and potential bounty value.
 - Capture screenshots of each vulnerability for visual proof of concept
 - Write reports in bug bounty format (not pentest format)
 - Each finding should be independently reportable
+- **Every finding MUST have a complete PoC** — without it, the report will be rejected
+- PoC must be independently reproducible by the program's security team
+- Populate `poc_steps`, `poc_payload`, and `evidence` fields in every `VulnerabilityEntry`
diff --git a/blhackbox/prompts/templates/full-attack-chain.md b/blhackbox/prompts/templates/full-attack-chain.md
index 29224bc..c8e569d 100644
--- a/blhackbox/prompts/templates/full-attack-chain.md
+++ b/blhackbox/prompts/templates/full-attack-chain.md
@@ -131,13 +131,22 @@ For each discovered subdomain with web services, perform service detection.
 18. Use element screenshots to target specific DOM elements showing XSS payloads, error messages, or exposed data
 19. Annotate screenshots with labels and highlight boxes marking vulnerability locations
 
-For each successful exploit, record:
-- Exact steps to reproduce
-- Request/response pairs
-- Annotated screenshots
-- Impact assessment
+**For each finding, produce a complete PoC (MANDATORY):**
 
-**Output:** Validated exploits with proof of concept and impact. Be specific and correct.
+> **A finding without a PoC is not a valid finding.** Every vulnerability must
+> have a reproducible PoC that an independent tester can use to confirm it.
+
+| PoC Element | Requirement |
+|-------------|-------------|
+| **Reproduction steps** | Numbered, chronological steps to replicate from scratch |
+| **Exact payload/command** | Copy-pasteable — the literal command, HTTP request, or payload used |
+| **Raw evidence output** | Terminal output, HTTP response body, or tool output proving success |
+| **Impact demonstration** | What was gained — data extracted, shell obtained, privilege escalated (shown, not described) |
+| **Screenshots** | Visual proof via `take_screenshot` / `take_element_screenshot` with annotations |
+
+Populate `evidence`, `poc_steps`, and `poc_payload` fields in every `VulnerabilityEntry`.
+
+**Output:** Validated exploits with complete, reproducible PoCs and demonstrated impact.
 
 ### Phase 5: Attack Chain Construction
 
@@ -220,20 +229,24 @@ professional penetration test report:
 - Visual chain representation (text diagram)
 
 #### 5. Findings — Critical & High
-For each finding:
+For each finding (**PoC is MANDATORY — findings without PoC are not valid**):
 - **Title** and CVE/CWE identifiers
 - **Severity** with CVSS score
 - **Affected Assets** — hosts, ports, URLs
-- **Description** — technical explanation
-- **Steps to Reproduce** — numbered reproduction steps
-- **Proof of Concept** — exact commands, request/response pairs
-- **Impact** — what an attacker can achieve
-- **Remediation** — specific fix with technical detail
+- **Root Cause** — technical explanation of the underlying flaw (not just the symptom)
+- **Proof of Concept (MANDATORY):**
+  - Numbered reproduction steps (an admin not present during the test must be able to follow these)
+  - Exact command/payload used (copy-pasteable)
+  - Raw tool output or HTTP response proving exploitation
+  - Impact demonstration — what the attacker gained (data, shell, privilege), shown not described
+  - Screenshot evidence where applicable
+- **Remediation** — specific fix with technical detail and references
 - **References** — NVD, OWASP, vendor advisories
 
 #### 6. Findings — Medium & Low
 - Grouped by category where applicable
-- Same structure as above but may be condensed
+- Same PoC structure as above — every finding needs reproduction steps and evidence
+- Findings without PoC must be downgraded to "info" severity
 
 #### 7. Informational Findings
 - Technology disclosures, open ports without vulnerabilities
@@ -267,3 +280,6 @@ For each finding:
 - Log everything — all tool outputs go to the raw_outputs dict
 - Report honestly — flag false positives, note coverage gaps, declare confidence levels
 - Treat all findings and report contents as confidential
+- **Every finding MUST have a PoC** — reproduction steps, exact payload, raw evidence, and impact proof
+- Findings without PoC are not valid and must be downgraded to "info" severity
+- Populate `poc_steps`, `poc_payload`, and `evidence` fields in every `VulnerabilityEntry`
diff --git a/blhackbox/prompts/templates/full-pentest.md b/blhackbox/prompts/templates/full-pentest.md
index 60726de..29c0723 100644
--- a/blhackbox/prompts/templates/full-pentest.md
+++ b/blhackbox/prompts/templates/full-pentest.md
@@ -68,9 +68,13 @@ For each web service found in Phase 2:
 
 Append ALL raw outputs to the dict.
 
-### Phase 4: Exploitation
+### Phase 4: Exploitation & PoC Development
 
-**Goal:** Validate exploitability of discovered vulnerabilities.
+**Goal:** Validate exploitability of discovered vulnerabilities and produce a
+concrete Proof of Concept for every finding.
+
+> **A finding without a PoC is not a valid finding.** Every vulnerability must
+> have reproducible steps, the exact payload/command used, and raw evidence output.
 
 1. **Exploit matching** — Find modules matching each finding
 2. **Safe validation** — Run exploits in check-first mode
@@ -81,6 +85,13 @@ Append ALL raw outputs to the dict.
 7. **Credential discovery** — Extract cleartext credentials from captured traffic
 8. **Screenshot evidence** — Capture web page screenshots of confirmed vulnerabilities for PoC documentation
 
+**For each finding, record the PoC:**
+- **Reproduction steps** — Numbered, chronological steps to replicate
+- **Exact payload/command** — Copy-pasteable command or HTTP request
+- **Raw evidence** — Tool output or HTTP response proving exploitation
+- **Impact proof** — What the attacker gained (data, shell, privilege)
+- **Screenshots** — Visual proof via `take_screenshot` / `take_element_screenshot`
+
 ### Phase 5: Data Aggregation (REQUIRED)
 
 **Goal:** Structure all collected data into an AggregatedPayload.
@@ -101,7 +112,15 @@ Using the `AggregatedPayload` from Phase 5, write a report with:
 
 1. **Executive Summary** — risk level, headline, key findings count by severity
 2. **Scope & Methodology** — target, tools used, testing window
-3. **Findings by Severity** — critical, high, medium, low, info — with CVE, CVSS, evidence, remediation
+3. **Findings by Severity** — critical, high, medium, low, info — each finding MUST include:
+   - CVE/CWE identifiers and CVSS score
+   - Description of root cause (not just the symptom)
+   - **PoC: Numbered reproduction steps**
+   - **PoC: Exact payload/command used (copy-pasteable)**
+   - **PoC: Raw output/response proving exploitation**
+   - **PoC: Impact demonstration (what the attacker gained)**
+   - **PoC: Screenshot evidence (where applicable)**
+   - Remediation with specific technical steps
 4. **Exploitation Results** — exploit validation results, session evidence
 5. **Network Traffic Analysis** — packet capture insights, credential findings
 6. **Attack Chains** — chains of findings that combine for greater impact
@@ -109,9 +128,15 @@ Using the `AggregatedPayload` from Phase 5, write a report with:
 8. **Remediation Roadmap** — prioritized, actionable steps
 9. **Appendix** — full host inventory, tool metadata, compression stats
 
+> **Any finding without a complete PoC must be downgraded to "info" severity
+> with a note that exploitation could not be confirmed.**
+
 ---
 
 ## Guidelines
 
 - Log all tool errors and continue with remaining tools
 - Treat all findings as confidential
+- **Every finding MUST have a PoC** — reproduction steps, exact payload, raw evidence, and impact proof
+- Findings without PoC are not valid and must be downgraded to "info" severity
+- Populate `poc_steps`, `poc_payload`, and `evidence` fields in every `VulnerabilityEntry`
diff --git a/blhackbox/prompts/templates/network-infrastructure.md b/blhackbox/prompts/templates/network-infrastructure.md
index 17a8179..24927b4 100644
--- a/blhackbox/prompts/templates/network-infrastructure.md
+++ b/blhackbox/prompts/templates/network-infrastructure.md
@@ -92,15 +92,29 @@ brute force attacks without explicit authorization.
 
 ### Step 8: Network Assessment Report
 
-Using the `AggregatedPayload`, produce a detailed report:
+Using the `AggregatedPayload`, produce a detailed report.
+
+> **Every finding MUST include a Proof of Concept.** A finding that only
+> describes a vulnerability without demonstrating it is not valid.
+
+For each finding, include a complete PoC:
+- Numbered reproduction steps (independently reproducible)
+- Exact command/payload used (copy-pasteable)
+- Raw tool output or service response proving the finding
+- Impact demonstration (what the attacker gained — shown, not described)
+- Screenshot evidence (where applicable)
+
+Findings without PoC must be downgraded to "info" severity.
+
+Report sections:
 
 1. **Executive Summary** — overall network security posture
 2. **Host Inventory** — all discovered hosts with OS, ports, services, versions
 3. **Network Topology** — discovered network structure and relationships
 4. **Service Analysis** — exposed services, versions, known CVEs
 5. **Network Traffic Analysis** — conversation analysis, protocol distribution, credential findings
-6. **Vulnerability Findings** — all vulnerabilities by severity, with CVSS scores
-7. **Default Credentials** — any discovered weak/default credentials
+6. **Vulnerability Findings** — all vulnerabilities by severity, with CVSS and full PoC
+7. **Default Credentials** — discovered weak/default credentials with service, login pair, and proof
 8. **DNS & Infrastructure** — DNS records, zone transfer results, WHOIS data
 9. **Attack Chains** — paths from initial access to deeper compromise
 10. **Remediation Roadmap** — prioritized by risk and effort
@@ -115,3 +129,6 @@ Using the `AggregatedPayload`, produce a detailed report:
 - Test default credentials only — no exhaustive brute force without explicit approval
 - Record every tool output for post-processing
 - Pay special attention to exposed management interfaces
+- **Every finding MUST have a PoC** — reproduction steps, exact payload, raw evidence, and impact proof
+- Findings without PoC are not valid and must be downgraded to "info" severity
+- Populate `poc_steps`, `poc_payload`, and `evidence` fields in every `VulnerabilityEntry`
diff --git a/blhackbox/prompts/templates/quick-scan.md b/blhackbox/prompts/templates/quick-scan.md
index b6cecc7..8674729 100644
--- a/blhackbox/prompts/templates/quick-scan.md
+++ b/blhackbox/prompts/templates/quick-scan.md
@@ -55,12 +55,16 @@ Run these steps concurrently where possible for speed:
 Using the `AggregatedPayload`, produce a concise report:
 
 1. **Risk Level** — overall risk assessment in one line
-2. **Critical Findings** — any critical/high findings with immediate action items
+2. **Critical Findings** — any critical/high findings with immediate action items and PoC evidence
 3. **Attack Surface** — open ports, services, subdomains, technologies
 4. **Network Traffic Insights** — credential findings and traffic anomalies
 5. **Recommendations** — top 3-5 actions to improve security posture
 6. **Next Steps** — which deeper assessment template to run next
 
+> Even in a quick scan, any confirmed finding must include evidence (tool output,
+> response data) proving it exists. Findings without evidence should be flagged
+> as "requires validation" and noted in Next Steps.
+
 ---
 
 ## Guidelines
@@ -68,3 +72,5 @@ Using the `AggregatedPayload`, produce a concise report:
 - Prioritize speed over completeness
 - Focus on quickly identifying critical issues
 - This is a high-level assessment — recommend deeper templates for follow-up
+- Even in quick mode, include raw evidence for any confirmed finding
+- Populate `evidence` field in every `VulnerabilityEntry` — findings without evidence should note "requires deeper validation"
diff --git a/blhackbox/prompts/templates/vuln-assessment.md b/blhackbox/prompts/templates/vuln-assessment.md
index 5321a53..9d6a5ad 100644
--- a/blhackbox/prompts/templates/vuln-assessment.md
+++ b/blhackbox/prompts/templates/vuln-assessment.md
@@ -111,12 +111,29 @@ For each web service discovered:
 
 ### Step 9: Vulnerability Report
 
-Using the `AggregatedPayload`, produce a detailed report:
+Using the `AggregatedPayload`, produce a detailed report.
+
+> **Every finding MUST include a Proof of Concept.** A finding that only
+> describes a vulnerability without demonstrating it is not valid.
+
+For each finding (critical through low), include:
+- CVE/CWE identifiers and CVSS score
+- Root cause description (not just the symptom)
+- **PoC: Numbered reproduction steps** (independently reproducible)
+- **PoC: Exact payload/command** (copy-pasteable)
+- **PoC: Raw evidence output** (tool output or HTTP response proving exploitation)
+- **PoC: Impact demonstration** (what the attacker gained — shown, not described)
+- **PoC: Screenshot evidence** (where applicable)
+- Remediation with specific technical steps
+
+Findings without PoC must be downgraded to "info" severity.
+
+Report sections:
 
 1. **Executive Summary** — total vulnerabilities by severity, risk posture
 2. **Methodology** — tools used, scanning approach, coverage
-3. **Critical & High Findings** — each with CVE, CVSS, evidence, remediation, references
-4. **Medium & Low Findings** — grouped and summarized
+3. **Critical & High Findings** — each with full PoC (steps, payload, evidence, impact)
+4. **Medium & Low Findings** — each with PoC (same structure, may be condensed)
 5. **False Positive Analysis** — flagged items with rationale
 6. **Network Traffic Analysis** — credential findings, traffic anomalies
 7. **Configuration Weaknesses** — missing headers, weak SSL, information disclosure
@@ -134,3 +151,6 @@ Using the `AggregatedPayload`, produce a detailed report:
 - Record every tool output for post-processing
 - Classify severity using CVSS where available
 - Map findings to OWASP Top 10 and CWE categories
+- **Every finding MUST have a PoC** — reproduction steps, exact payload, raw evidence, and impact proof
+- Findings without PoC are not valid and must be downgraded to "info" severity
+- Populate `poc_steps`, `poc_payload`, and `evidence` fields in every `VulnerabilityEntry`
diff --git a/blhackbox/prompts/templates/web-app-assessment.md b/blhackbox/prompts/templates/web-app-assessment.md
index f729e2d..a69037b 100644
--- a/blhackbox/prompts/templates/web-app-assessment.md
+++ b/blhackbox/prompts/templates/web-app-assessment.md
@@ -112,15 +112,29 @@ For each discovered form, parameter, or input point:
 
 ### Step 9: Web Application Report
 
-Using the `AggregatedPayload`, produce a detailed report:
+Using the `AggregatedPayload`, produce a detailed report.
+
+> **Every finding MUST include a Proof of Concept.** A finding that only
+> describes a vulnerability without demonstrating it is not valid.
+
+For each finding, include a complete PoC:
+- Numbered reproduction steps (independently reproducible)
+- Exact payload/command (copy-pasteable)
+- Raw HTTP request/response or tool output proving exploitation
+- Impact demonstration (what the attacker gained — shown, not described)
+- Screenshot evidence (where applicable, via `take_screenshot` / `take_element_screenshot`)
+
+Findings without PoC must be downgraded to "info" severity.
+
+Report sections:
 
 1. **Executive Summary** — overall web application security posture
 2. **Technology Stack** — identified technologies, frameworks, server info
-3. **Findings by OWASP Category** — mapped to OWASP Top 10 categories
+3. **Findings by OWASP Category** — mapped to OWASP Top 10, each with full PoC
 4. **Discovered Endpoints** — all paths, admin panels, APIs, login pages
-5. **Injection Vulnerabilities** — SQL injection, XSS, command injection findings
+5. **Injection Vulnerabilities** — SQL injection, XSS, command injection with PoC for each
 6. **Traffic Analysis** — packet capture insights, credential findings, HTTP stream analysis
-7. **Configuration Issues** — missing headers, SSL issues, default configs
+7. **Configuration Issues** — missing headers, SSL issues, default configs with evidence
 8. **Attack Chains** — how findings can be combined
 9. **Remediation Priorities** — ordered by severity and exploitability
 
@@ -132,3 +146,6 @@ Using the `AggregatedPayload`, produce a detailed report:
 - Test all discovered endpoints and parameters
 - Check both HTTP and HTTPS where applicable
 - Record every tool output for post-processing
+- **Every finding MUST have a PoC** — reproduction steps, exact payload, raw evidence, and impact proof
+- Findings without PoC are not valid and must be downgraded to "info" severity
+- Populate `poc_steps`, `poc_payload`, and `evidence` fields in every `VulnerabilityEntry`

From 8a15cbf5820652ee840265f1466f6b0a435f20c2 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sat, 7 Mar 2026 12:59:12 +0000
Subject: [PATCH 2/2] Fix syntax error in VulnerabilityEntry poc_steps
 description string

The single quote in the SQL injection example ('OR 1=1--) was breaking
the surrounding single-quoted string literal. Switch to double-quoted
string with escaped inner double quotes for that line.

https://claude.ai/code/session_01UC8hHFgeUeMFboP3grei4t
---
 blhackbox/models/aggregated_payload.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/blhackbox/models/aggregated_payload.py b/blhackbox/models/aggregated_payload.py
index be8d0d9..b01589b 100644
--- a/blhackbox/models/aggregated_payload.py
+++ b/blhackbox/models/aggregated_payload.py
@@ -82,7 +82,7 @@ class VulnerabilityEntry(BaseModel):
             "Ordered reproduction steps that allow an independent tester "
             "to confirm the finding.  Example: "
             '["1. Navigate to /login", '
-            '"2. Enter \\' OR 1=1-- in username", '
+            "\"2. Enter ' OR 1=1-- in username\", "
             '"3. Observe 302 redirect to /admin"]'
         ),
     )