fix(security): harden justification scoring against prompt injection (v3.2.1)

jovanSAPFIONEER · jovanSAPFIONEER · commit fa35ba4b1756 · 2026-02-17T14:43:21.000+01:00
- Replace simplistic keyword matching in score_justification() with
  multi-layered defense: injection pattern detection (16 patterns),
  keyword-stuffing detection, repetition/padding detection, length cap,
  minimum word count, and structural coherence scoring
- Add detect_injection() function to catch prompt-injection attempts
- Fix test-security.ts gateway audit integrity test by isolating log files
- All 315 tests passing pristine (0 failures)
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,21 @@ All notable changes to Network-AI will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [3.2.1] - 2026-02-17
+
+### Security
+- **Hardened `score_justification()` in `check_permission.py`** -- Fixed prompt-injection bypass vulnerability flagged by ClawHub scanner; simplistic keyword matching replaced with multi-layered defense
+- **Added `detect_injection()` function** -- 16 regex patterns detect prompt-injection attempts (ignore previous, override policy, bypass security, admin mode, sudo, jailbreak, etc.)
+- **Keyword-stuffing detection** -- Penalizes justifications where >50% of words are scoring keywords
+- **Repetition/padding detection** -- Rejects justifications with <40% unique words
+- **Maximum length cap (500 chars)** -- Prevents obfuscation in excessively long justifications
+- **Minimum word count (3)** -- Rejects trivially short justifications
+- **Structural coherence scoring** -- Requires verb + noun-object structure for full score; prevents keyword-only strings from scoring high
+
+### Fixed
+- **Security test isolation** -- Gateway audit integrity test (Test 7) now uses isolated log file, preventing cross-run HMAC signature mismatches that caused false failures
+- **All 315 tests now pass pristine** -- 0 failures across all 4 suites
+
 ## [3.2.0] - 2026-02-17
 
 ### Added -- Phase 3: Priority & Preemption
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 **The plug-and-play AI agent orchestrator for TypeScript/Node.js -- connect 12 agent frameworks with zero glue code**
 
-[![Release](https://img.shields.io/badge/release-v3.2.0-blue.svg)](https://github.com/jovanSAPFIONEER/Network-AI/releases)
+[![Release](https://img.shields.io/badge/release-v3.2.1-blue.svg)](https://github.com/jovanSAPFIONEER/Network-AI/releases)
 [![ClawHub](https://img.shields.io/badge/ClawHub-network--ai-orange.svg)](https://clawhub.ai/skills/network-ai)
 [![Node.js](https://img.shields.io/badge/node-%3E%3D18.0.0-brightgreen.svg)](https://nodejs.org)
 [![TypeScript](https://img.shields.io/badge/TypeScript-5.x-3178C6.svg)](https://typescriptlang.org)
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "network-ai",
-  "version": "3.2.0",
+  "version": "3.2.1",
   "description": "AI agent orchestration framework for TypeScript/Node.js - plug-and-play multi-agent coordination with 12 frameworks (LangChain, AutoGen, CrewAI, OpenAI Assistants, LlamaIndex, Semantic Kernel, Haystack, DSPy, Agno, MCP, OpenClaw). Built-in security, swarm intelligence, and agentic workflow patterns.",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",
diff --git a/scripts/check_permission.py b/scripts/check_permission.py
@@ -61,31 +61,135 @@ def ensure_data_dir():
     return data_dir
 
 
+def detect_injection(justification: str) -> bool:
+    """
+    Detect prompt-injection and manipulation patterns in justifications.
+
+    Returns True if the justification looks like a prompt-injection attempt.
+    """
+    injection_patterns = [
+        r'ignore\s+(previous|above|prior|all)',
+        r'override\s+(policy|restriction|rule|permission|security)',
+        r'system\s*prompt',
+        r'you\s+are\s+(now|a)',
+        r'act\s+as\s+(if|a|an)',
+        r'pretend\s+(to|that|you)',
+        r'bypass\s+(security|check|restriction|auth)',
+        r'grant\s+(me|access|permission)\s+(anyway|regardless|now)',
+        r'disregard\s+(policy|rule|restriction|previous)',
+        r'admin\s+(mode|access|override)',
+        r'sudo\b',
+        r'jailbreak',
+        r'do\s+not\s+(check|verify|validate|restrict)',
+        r'skip\s+(validation|verification|check)',
+        r'trust\s+level\s*[:=]',
+        r'score\s*[:=]+\s*[\d.]',
+    ]
+    text = justification.lower()
+    for pattern in injection_patterns:
+        if re.search(pattern, text):
+            return True
+    return False
+
+
 def score_justification(justification: str) -> float:
     """
-    Score the quality of a justification.
-    
-    Criteria:
-    - Length (more detail = better)
-    - Contains task-related keywords
-    - Contains specificity keywords
-    - Doesn't contain test/debug keywords
+    Score the quality of a justification with hardened validation.
+
+    Defenses against prompt injection and keyword stuffing:
+    - Injection pattern detection (immediate reject)
+    - Maximum length cap (prevents obfuscation in long text)
+    - Keyword-stuffing detection (penalises unnatural keyword density)
+    - Unique-word ratio check (catches copy-paste padding)
+    - Structural coherence (requires natural sentence structure)
+
+    Criteria (after safety checks):
+    - Length (more detail = better, but capped)
+    - Contains task-related keywords (capped contribution)
+    - Contains specificity keywords (capped contribution)
+    - No test/debug keywords
+    - Structural coherence bonus
     """
+    # ----- Hard reject: injection patterns -----
+    if detect_injection(justification):
+        return 0.0
+
+    # ----- Hard reject: empty or whitespace-only -----
+    stripped = justification.strip()
+    if not stripped:
+        return 0.0
+
+    # ----- Hard cap: excessively long justifications are suspicious -----
+    MAX_JUSTIFICATION_LENGTH = 500
+    if len(stripped) > MAX_JUSTIFICATION_LENGTH:
+        return 0.1  # Suspiciously long — allow re-submission with concise text
+
+    words = stripped.split()
+    word_count = len(words)
+
+    # ----- Hard reject: too few words to be meaningful -----
+    if word_count < 3:
+        return 0.1
+
+    # ----- Repetition / padding detection -----
+    unique_words = set(w.lower() for w in words)
+    unique_ratio = len(unique_words) / word_count if word_count > 0 else 0
+    if unique_ratio < 0.4:
+        return 0.1  # More than 60% repeated words — likely padding
+
+    # ----- Keyword-stuffing detection -----
+    task_keywords = re.findall(
+        r'\b(task|purpose|need|require|generate|analyze|create|process)\b',
+        stripped, re.IGNORECASE,
+    )
+    specificity_keywords = re.findall(
+        r'\b(specific|particular|exact|quarterly|annual|report|summary)\b',
+        stripped, re.IGNORECASE,
+    )
+    total_matched = len(task_keywords) + len(specificity_keywords)
+    keyword_density = total_matched / word_count if word_count > 0 else 0
+    if keyword_density > 0.5:
+        return 0.1  # More than half the words are scoring keywords — stuffing
+
+    # ----- Scoring (defensive caps per category) -----
     score = 0.0
-    
-    if len(justification) > 20:
-        score += 0.2
-    if len(justification) > 50:
-        score += 0.2
-    if re.search(r'\b(task|purpose|need|require|generate|analyze|create|process)\b', 
-                 justification, re.IGNORECASE):
-        score += 0.2
-    if re.search(r'\b(specific|particular|exact|quarterly|annual|report|summary)\b',
-                 justification, re.IGNORECASE):
-        score += 0.2
-    if not re.search(r'\b(test|debug|try|experiment)\b', justification, re.IGNORECASE):
-        score += 0.2
-    
+
+    # Length contribution (max 0.25)
+    if len(stripped) > 20:
+        score += 0.15
+    if len(stripped) > 50:
+        score += 0.10
+
+    # Task keyword presence (max 0.20, but only first match counts)
+    if task_keywords:
+        score += 0.20
+
+    # Specificity keyword presence (max 0.20, but only first match counts)
+    if specificity_keywords:
+        score += 0.20
+
+    # No test/debug markers (max 0.15)
+    if not re.search(r'\b(test|debug|try|experiment)\b', stripped, re.IGNORECASE):
+        score += 0.15
+
+    # Structural coherence: sentence-like structure (max 0.20)
+    # Must contain at least one verb-like pattern and read like prose
+    has_verb = bool(re.search(
+        r'\b(is|are|was|were|need|needs|require|requires|must|should|will|'
+        r'generate|generating|analyze|analyzing|create|creating|process|processing|'
+        r'prepare|preparing|compile|compiling|review|reviewing|access|accessing|'
+        r'retrieve|retrieving|export|exporting|send|sending|run|running)\b',
+        stripped, re.IGNORECASE,
+    ))
+    has_noun_object = bool(re.search(
+        r'\b(data|report|records|invoices?|orders?|customers?|accounts?|'
+        r'transactions?|files?|emails?|results?|metrics?|statistics?|'
+        r'analysis|documents?|exports?|payments?|entries|logs?|summaries)\b',
+        stripped, re.IGNORECASE,
+    ))
+    if has_verb and has_noun_object:
+        score += 0.20
+
     return min(score, 1.0)
 
 
diff --git a/swarm-blackboard.md b/swarm-blackboard.md
@@ -1,5 +1,5 @@
 # Swarm Blackboard
-Last Updated: 2026-02-16T12:02:21.543Z
+Last Updated: 2026-02-17T13:41:42.467Z
 
 ## Active Tasks
 | TaskID | Agent | Status | Started | Description |
@@ -18,7 +18,7 @@ Last Updated: 2026-02-16T12:02:21.543Z
     "status": "complete"
   },
   "sourceAgent": "code_writer",
-  "timestamp": "2026-02-16T12:02:21.512Z",
+  "timestamp": "2026-02-17T13:41:42.457Z",
   "ttl": null
 }
 
@@ -34,7 +34,7 @@ Last Updated: 2026-02-16T12:02:21.543Z
     "reviewer": "code_reviewer"
   },
   "sourceAgent": "code_reviewer",
-  "timestamp": "2026-02-16T12:02:21.517Z",
+  "timestamp": "2026-02-17T13:41:42.461Z",
   "ttl": null
 }
 
@@ -49,7 +49,7 @@ Last Updated: 2026-02-16T12:02:21.543Z
     "duration": 3200
   },
   "sourceAgent": "test_runner",
-  "timestamp": "2026-02-16T12:02:21.523Z",
+  "timestamp": "2026-02-17T13:41:42.462Z",
   "ttl": null
 }
 
@@ -60,7 +60,7 @@ Last Updated: 2026-02-16T12:02:21.543Z
     "replicas": 3
   },
   "sourceAgent": "devops_agent",
-  "timestamp": "2026-02-16T12:02:21.543Z",
+  "timestamp": "2026-02-17T13:41:42.467Z",
   "ttl": null
 }
 
diff --git a/test-security.ts b/test-security.ts
@@ -424,8 +424,13 @@ async function testPermissionHardening() {
 async function testSecureAuditLogger() {
   header('TEST 6: Secure Audit Logger');
   
+  // Clean stale log to avoid cross-run integrity mismatches
+  const auditLogPath = './test-security-audit.log';
+  if (require('fs').existsSync(auditLogPath)) {
+    require('fs').unlinkSync(auditLogPath);
+  }
   const auditLogger = new SecureAuditLogger({
-    auditLogPath: './test-security-audit.log',
+    auditLogPath,
     signAuditLogs: true,
     tokenSecret: 'audit-secret-key',
   });
@@ -464,10 +469,17 @@ async function testSecureAuditLogger() {
 async function testSecureSwarmGateway() {
   header('TEST 7: Secure Swarm Gateway (Integration)');
   
+  // Use isolated audit log so stale entries from prior runs don't
+  // break integrity verification (different tokenSecret = different HMAC).
+  const gatewayLogPath = './test-gateway-audit.log';
+  if (require('fs').existsSync(gatewayLogPath)) {
+    require('fs').unlinkSync(gatewayLogPath);
+  }
   const gateway = new SecureSwarmGateway({
     maxRequestsPerMinute: 10,
     maxFailedAuthAttempts: 3,
     tokenSecret: 'gateway-test-secret',
+    auditLogPath: gatewayLogPath,
   });
   
   // Test: Valid request processing

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "network-ai",`
`3`		`- "version": "3.2.0",`
	`3`	`+ "version": "3.2.1",`
`4`	`4`	`"description": "AI agent orchestration framework for TypeScript/Node.js - plug-and-play multi-agent coordination with 12 frameworks (LangChain, AutoGen, CrewAI, OpenAI Assistants, LlamaIndex, Semantic Kernel, Haystack, DSPy, Agno, MCP, OpenClaw). Built-in security, swarm intelligence, and agentic workflow patterns.",`
`5`	`5`	`"main": "dist/index.js",`
`6`	`6`	`"types": "dist/index.d.ts",`