Add adapters, --import flag, firewall docs update (v0.9.1)

dgerog · claude · dgerog · commit 2f00edf41100 · 2026-03-26T23:29:07.000+02:00
- Move PromptFoo/PyRIT adapters from fw lib to CLI
- Add --import flag with auto-format detection
- Document importing in firewall integration docs
- Bump to 0.9.1

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/docs/docs/integrations/firewall.md b/docs/docs/integrations/firewall.md
@@ -387,14 +387,40 @@ hb firewall train --model detectors/setfit_classifier.py
 | `--until DATE` | Filter experiments until this date. |
 | `--min-samples N` | Minimum conversations required (default: 30). |
 | `--output PATH` | Output .hbfw file path (default: `firewall_<project>.hbfw`). |
+| `--import FILE` | Import external logs (repeatable). Auto-detects format. |
 
 The command:
 
 1. Fetches your adversarial and QA experiment logs
-2. Curates attack data (failed adversarial turns, stratified by fail category)
-3. Curates benign data (passed QA turns, stratified by user persona)
-4. Trains your AgentClassifier
-5. Saves the model as a `.hbfw` file
+2. Imports external logs if `--import` provided (PromptFoo, PyRIT)
+3. Curates attack data (failed adversarial turns, stratified by fail category)
+4. Curates benign data (passed QA turns, stratified by user persona)
+5. Trains your AgentClassifier
+6. Saves the model as a `.hbfw` file
+
+### Importing External Logs
+
+Combine data from other red-teaming frameworks with your Humanbound test data:
+
+```bash
+# Auto-detect format from file structure
+hb firewall train --import pyrit_results.json
+
+# Explicit format
+hb firewall train --import results.json:promptfoo
+
+# Multiple sources
+hb firewall train --import pyrit.json --import promptfoo.json
+```
+
+Supported frameworks:
+
+| Framework | Format | Auto-detected by |
+|-----------|--------|-----------------|
+| [PyRIT](https://github.com/Azure/PyRIT) (Microsoft) | JSON scan output | `redteaming_data` key |
+| [PromptFoo](https://github.com/promptfoo/promptfoo) | JSON eval export | `evalId` + `results` keys |
+
+Imported logs are merged with Humanbound logs before training. More data sources → better Tier 2 coverage.
 
 ### Show
 
diff --git a/humanbound_cli/adapters/__init__.py b/humanbound_cli/adapters/__init__.py
@@ -0,0 +1,66 @@
+"""Log adapters — convert external framework results to hb-firewall format.
+
+Auto-detects format from file structure. Add new adapters by creating a module
+with SIGNATURES (list of keys to match) and convert(data) → list[dict].
+"""
+
+import json
+from pathlib import Path
+
+from . import promptfoo, pyrit
+
+_ADAPTERS = {
+    "promptfoo": promptfoo,
+    "pyrit": pyrit,
+}
+
+
+def detect_format(data: dict) -> str:
+    """Auto-detect framework from file structure."""
+    for name, adapter in _ADAPTERS.items():
+        sigs = getattr(adapter, "SIGNATURES", [])
+        if sigs and all(k in data for k in sigs):
+            return name
+    return ""
+
+
+def convert_file(file_path: str, format_tag: str = "") -> list[dict]:
+    """Convert an external log file to hb-firewall standard format.
+
+    Args:
+        file_path: path to JSON file
+        format_tag: explicit format (e.g. "promptfoo", "pyrit"). Auto-detects if empty.
+
+    Returns:
+        list of logs in standard format:
+        [{"conversation": [...], "result": "pass"|"fail", "test_category": "...",
+          "fail_category": "...", "severity": float, "confidence": float}, ...]
+    """
+    path = Path(file_path)
+
+    if path.suffix == ".jsonl":
+        with open(path) as f:
+            lines = [json.loads(line) for line in f if line.strip()]
+        # JSONL: treat as list of individual results
+        data = {"_jsonl_entries": lines}
+    else:
+        with open(path) as f:
+            data = json.load(f)
+
+    tag = format_tag or detect_format(data)
+    if not tag:
+        available = ", ".join(_ADAPTERS.keys())
+        raise ValueError(
+            f"Unrecognized format in '{file_path}'. "
+            f"Specify format: --import {file_path}:<format> "
+            f"(available: {available})")
+
+    if tag not in _ADAPTERS:
+        available = ", ".join(_ADAPTERS.keys())
+        raise ValueError(f"Unknown format '{tag}'. Available: {available}")
+
+    return _ADAPTERS[tag].convert(data)
+
+
+def list_formats() -> list[str]:
+    return list(_ADAPTERS.keys())
diff --git a/humanbound_cli/adapters/promptfoo.py b/humanbound_cli/adapters/promptfoo.py
@@ -0,0 +1,56 @@
+"""PromptFoo adapter — converts evaluation results to hb-firewall format.
+
+Handles the export format from: promptfoo export eval <id> -o results.json
+"""
+
+SIGNATURES = ["results", "evalId"]
+
+
+def convert(data: dict) -> list[dict]:
+    logs = []
+    results = data.get("results", {})
+
+    for entry in results.get("results", []):
+        # Extract prompt text
+        prompt_obj = entry.get("prompt", {})
+        if isinstance(prompt_obj, dict):
+            prompt = prompt_obj.get("raw", prompt_obj.get("label", ""))
+        else:
+            prompt = str(prompt_obj)
+
+        # Extract response text
+        response_obj = entry.get("response", {})
+        if isinstance(response_obj, dict):
+            response = response_obj.get("output", "")
+        else:
+            response = str(response_obj)
+
+        if not prompt:
+            continue
+
+        success = entry.get("success", True)
+        score = entry.get("score", 1.0 if success else 0.0)
+
+        # Extract fail reason from grading
+        fail_category = ""
+        grading = entry.get("gradingResult", {})
+        if grading and not success:
+            fail_category = grading.get("reason", "")[:80]
+
+        # Map score to severity (0 = benign, 1 = severe)
+        if isinstance(score, (int, float)) and score <= 1.0:
+            severity = float((1.0 - score) * 100)
+        else:
+            severity = 50.0
+
+        logs.append({
+            "conversation": [{"u": prompt, "a": response}],
+            "result": "pass" if success else "fail",
+            "test_category": "adversarial",
+            "fail_category": fail_category,
+            "severity": severity,
+            "confidence": 80.0,
+            "gen_category": "promptfoo",
+        })
+
+    return logs
diff --git a/humanbound_cli/adapters/pyrit.py b/humanbound_cli/adapters/pyrit.py
@@ -0,0 +1,50 @@
+"""PyRIT (Microsoft) adapter — converts red teaming scan results to hb-firewall format."""
+
+SIGNATURES = ["redteaming_data"]
+
+SEVERITY_MAP = {
+    "very low": 10,
+    "low": 30,
+    "medium": 50,
+    "high": 75,
+    "very high": 95,
+}
+
+
+def convert(data: dict) -> list[dict]:
+    logs = []
+    for entry in data.get("redteaming_data", []):
+        conv = []
+        for msg in entry.get("conversation", []):
+            role = msg.get("role", "")
+            content = msg.get("content", "")
+            if role == "user":
+                conv.append({"u": content, "a": ""})
+            elif role == "assistant" and conv:
+                conv[-1]["a"] = content
+
+        if not conv:
+            continue
+
+        attack_success = entry.get("attack_success", False)
+        risk_cat = entry.get("risk_category", "")
+
+        # Extract max severity from risk_assessment
+        severity = 0
+        risk_assessment = entry.get("risk_assessment", {})
+        for cat_info in risk_assessment.values():
+            if isinstance(cat_info, dict):
+                label = cat_info.get("severity_label", "").lower()
+                severity = max(severity, SEVERITY_MAP.get(label, 0))
+
+        logs.append({
+            "conversation": conv,
+            "result": "fail" if attack_success else "pass",
+            "test_category": "adversarial",
+            "fail_category": risk_cat if attack_success else "",
+            "severity": float(severity),
+            "confidence": 90.0,
+            "gen_category": entry.get("attack_technique", ""),
+        })
+
+    return logs
diff --git a/humanbound_cli/commands/firewall.py b/humanbound_cli/commands/firewall.py
@@ -101,7 +101,7 @@ def train_command(model_path, last_n, from_date, until_date, min_samples,
 
         # Import external logs if provided
         if import_files:
-            from hb_firewall.adapters import convert_file
+            from humanbound_cli.adapters import convert_file
             for import_arg in import_files:
                 # Parse file:format syntax
                 if ":" in import_arg and not import_arg.startswith("/"):
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "humanbound-cli"
-version = "0.9.0"
+version = "0.9.1"
 authors = [
   { name="Kostas Siabanis", email="hello@humanbound.ai" },
   { name="Demetris Gerogiannis", email="hello@humanbound.ai" },