From b394ee360a49d8054b5e2e4925acc2e3c2b50685 Mon Sep 17 00:00:00 2001
From: Mykhailo Chalyi <mike@chaliy.name>
Date: Wed, 1 Apr 2026 23:02:32 +0000
Subject: [PATCH] test(security): add adversarial tests for sparse arrays,
 extreme indices, expansion bombs

Add 5 targeted security tests inspired by zapcode's adversarial test suite:
- Sparse array huge index (TM-DOS-060): verify no mass allocation for arr[999999999]
- Extreme negative array index (TM-DOS-060): verify no panic/crash
- Array entry exhaustion under load (TM-DOS-060): verify max_array_entries enforcement
- Brace expansion bomb via printf (TM-DOS-041): verify {1..999999999} is capped
- Parameter expansion replacement bomb (TM-DOS-059): verify multiplicative amplification is bounded

Updates threat model (specs/006, docs/threat-model.md) with TM-DOS-059 and TM-DOS-060.

Closes #934
---
 crates/bashkit/docs/threat-model.md        |   2 +
 crates/bashkit/tests/threat_model_tests.rs | 177 +++++++++++++++++++++
 specs/006-threat-model.md                  |   2 +
 3 files changed, 181 insertions(+)

diff --git a/crates/bashkit/docs/threat-model.md b/crates/bashkit/docs/threat-model.md
index 98923761..f23fc267 100644
--- a/crates/bashkit/docs/threat-model.md
+++ b/crates/bashkit/docs/threat-model.md
@@ -107,6 +107,8 @@ through configurable limits.
 | source self-recursion (TM-DOS-056) | Script that sources itself | Track source depth | **OPEN** |
 | sleep bypasses timeout (TM-DOS-057) | `sleep N` ignores `ExecutionLimits::timeout` | Implement tokio timeout wrapper | **OPEN** |
 | Unbounded builtin output (TM-DOS-058) | `seq 1 1000000` produces 1M lines | Add `max_stdout_bytes` limit | **OPEN** |
+| Param expansion bomb (TM-DOS-059) | `${x//a/bigstring}` multiplicative amplification | `max_total_variable_bytes` + `max_stdout_bytes` | MITIGATED |
+| Sparse array huge-index (TM-DOS-060) | `arr[999999999]=x` | HashMap storage; `max_array_entries` | MITIGATED |
 
 **Configuration:**
 ```rust
diff --git a/crates/bashkit/tests/threat_model_tests.rs b/crates/bashkit/tests/threat_model_tests.rs
index 7c1f985d..e36512c7 100644
--- a/crates/bashkit/tests/threat_model_tests.rs
+++ b/crates/bashkit/tests/threat_model_tests.rs
@@ -3841,6 +3841,7 @@ mod trace_events {
     }
 }
 
+// =============================================================================
 // =============================================================================
 // TYPESCRIPT / ZAPCODE SECURITY (TM-TS)
 //
@@ -3967,3 +3968,179 @@ mod typescript_security {
         );
     }
 }
+
+// =============================================================================
+// ADVERSARIAL TESTS — SPARSE ARRAYS, EXTREME INDICES, EXPANSION BOMBS
+// Inspired by zapcode's adversarial test suite (issue #934)
+// =============================================================================
+
+mod zapcode_inspired_adversarial {
+    use super::*;
+
+    /// TM-DOS-060: Huge sparse index allocation.
+    /// Assigning to index 999999999 must not allocate ~1B empty slots.
+    /// bashkit uses HashMap internally, so the storage is O(1) per entry,
+    /// but the max_array_entries limit should still cap total entries.
+    #[tokio::test]
+    async fn sparse_array_huge_index() {
+        let mem = MemoryLimits::new().max_array_entries(100);
+        let limits = ExecutionLimits::new().max_commands(1_000);
+        let mut bash = Bash::builder()
+            .limits(limits)
+            .memory_limits(mem)
+            .session_limits(SessionLimits::unlimited())
+            .build();
+
+        let result = bash
+            .exec("declare -a arr; arr[999999999]=x; echo ${#arr[@]}")
+            .await
+            .unwrap();
+        // Should succeed (HashMap-based, only 1 entry) or be capped — no OOM
+        assert_eq!(result.exit_code, 0);
+        // The array has at most 1 entry — certainly not ~1B
+        let count: usize = result.stdout.trim().parse().unwrap_or(0);
+        assert!(
+            count <= 100,
+            "Sparse index must not cause mass allocation, got count={}",
+            count
+        );
+    }
+
+    /// TM-DOS-060: Extreme negative array index.
+    /// Must not panic, no OOB memory access, no memory corruption.
+    /// The key security property is: no panic, no crash, no unbounded allocation.
+    /// Bash wraps negative indices modulo array length, so some element may be returned.
+    #[tokio::test]
+    async fn sparse_array_extreme_negative_index() {
+        let limits = ExecutionLimits::new().max_commands(1_000);
+        let mut bash = Bash::builder().limits(limits).build();
+
+        let result = bash
+            .exec("declare -a arr=(a b c); echo \"${arr[-999999999]}\"")
+            .await
+            .unwrap();
+        // Security property: no panic, no crash, graceful completion
+        assert_eq!(result.exit_code, 0);
+        // Output should be either empty or one of the valid elements (wrapping is acceptable)
+        let out = result.stdout.trim();
+        assert!(
+            out.is_empty() || ["a", "b", "c"].contains(&out),
+            "Extreme negative index should return empty or valid element, got: {:?}",
+            out
+        );
+    }
+
+    /// TM-DOS-060: Array entry exhaustion under load.
+    /// Populating 200K entries via loop must be stopped by max_array_entries (100K default)
+    /// or by the loop iteration limit — whichever fires first.
+    #[tokio::test]
+    async fn array_entry_exhaustion_under_load() {
+        let mem = MemoryLimits::new().max_array_entries(100);
+        let limits = ExecutionLimits::new()
+            .max_commands(500_000)
+            .max_loop_iterations(500_000)
+            .max_total_loop_iterations(500_000);
+        let mut bash = Bash::builder()
+            .limits(limits)
+            .memory_limits(mem)
+            .session_limits(SessionLimits::unlimited())
+            .build();
+
+        let script = r#"
+declare -a arr
+i=0
+while [ $i -lt 200 ]; do
+    arr[$i]=x
+    i=$((i+1))
+done
+echo ${#arr[@]}
+"#;
+        let result = bash.exec(script).await.unwrap();
+        assert_eq!(result.exit_code, 0);
+        let count: usize = result.stdout.trim().parse().unwrap_or(0);
+        // max_array_entries=100 means at most 100 entries created
+        assert!(
+            count <= 100,
+            "Array entries should be capped at max_array_entries, got {}",
+            count
+        );
+    }
+
+    /// TM-DOS-041: Printf format repeat via brace expansion.
+    /// `{1..999999999}` must be rejected by the brace expansion cap before
+    /// printf ever runs. Without the cap, this would generate ~1B arguments.
+    #[tokio::test]
+    async fn brace_expansion_bomb_printf() {
+        let limits = ExecutionLimits::new()
+            .max_commands(1_000)
+            .max_stdout_bytes(1_000_000);
+        let mut bash = Bash::builder().limits(limits).build();
+
+        let result = bash.exec("printf '%0.s-' {1..999999999}").await;
+        // Either the brace expansion is capped (producing truncated output)
+        // or the parser rejects it statically. Either way: no OOM, no hang.
+        match result {
+            Ok(r) => {
+                // If it succeeded, the output must be bounded (cap at 100K expansions or stdout limit)
+                assert!(
+                    r.stdout.len() <= 1_000_000,
+                    "Brace expansion bomb produced {} bytes — should be capped",
+                    r.stdout.len()
+                );
+            }
+            Err(e) => {
+                // Static rejection by parser budget is also acceptable
+                let msg = e.to_string();
+                assert!(
+                    msg.contains("brace")
+                        || msg.contains("range")
+                        || msg.contains("too large")
+                        || msg.contains("exceeded")
+                        || msg.contains("budget"),
+                    "Expected brace/range limit error, got: {}",
+                    msg
+                );
+            }
+        }
+    }
+
+    /// TM-DOS-059: Parameter expansion replacement bomb.
+    /// `${x//a/$(echo bbbbbbbb)}` replaces each 'a' with 'bbbbbbbb'.
+    /// At scale (10K 'a's × 1K 'b's = 10MB), this must be caught by
+    /// max_total_variable_bytes or max_stdout_bytes.
+    #[tokio::test]
+    async fn parameter_expansion_replacement_bomb() {
+        let mem = MemoryLimits::new().max_total_variable_bytes(100_000);
+        let limits = ExecutionLimits::new()
+            .max_commands(50_000)
+            .max_loop_iterations(50_000)
+            .max_total_loop_iterations(50_000)
+            .max_stdout_bytes(1_000_000);
+        let mut bash = Bash::builder()
+            .limits(limits)
+            .memory_limits(mem)
+            .session_limits(SessionLimits::unlimited())
+            .build();
+
+        // Create a string of 10K 'a' chars, then replace each with 1K 'b' chars
+        // This attempts 10K × 1K = 10MB output
+        let script = r#"
+x=$(printf 'a%.0s' {1..10000})
+echo "${x//a/$(printf 'b%.0s' {1..1000})}"
+"#;
+        let result = bash.exec(script).await;
+        match result {
+            Ok(r) => {
+                // If it completes, output must be bounded by limits
+                assert!(
+                    r.stdout.len() <= 1_000_000,
+                    "Expansion bomb produced {} bytes of stdout — should be capped",
+                    r.stdout.len()
+                );
+            }
+            Err(_) => {
+                // Limit enforcement error is also acceptable
+            }
+        }
+    }
+}
diff --git a/specs/006-threat-model.md b/specs/006-threat-model.md
index 7a270a91..ea8a1fee 100644
--- a/specs/006-threat-model.md
+++ b/specs/006-threat-model.md
@@ -268,6 +268,8 @@ max_ast_depth: 100,           // Parser recursion (TM-DOS-022)
 | TM-DOS-056 | `source` self-recursion stack overflow | Script that sources itself causes unbounded recursion; function depth limit doesn't apply to `source` | — | **OPEN** |
 | TM-DOS-057 | `sleep` bypasses execution timeout | `sleep`, `(sleep N)`, `echo x \| sleep N`, `sleep N & wait`, `timeout N sleep N` all ignore `ExecutionLimits::timeout` | — | **OPEN** |
 | TM-DOS-058 | Single-builtin unbounded output | `seq 1 1000000` produces 1M lines despite command limit; single builtin call generates unbounded output (see also #648) | — | **OPEN** |
+| TM-DOS-059 | Parameter expansion replacement bomb | `${x//a/$(printf 'b%.0s' {1..1000})}` on large `x` amplifies output multiplicatively (10K × 1K = 10MB) | `max_total_variable_bytes` + `max_stdout_bytes` | **MITIGATED** |
+| TM-DOS-060 | Sparse array huge-index allocation | `arr[999999999]=x` could allocate ~1B empty slots if arrays are Vec-backed; negative indices could cause OOB | HashMap-based arrays; `max_array_entries` caps total entries | **MITIGATED** |
 
 **TM-DOS-051**: `builtins/yaml.rs` — `parse_yaml_block`, `parse_yaml_map`, `parse_yaml_list` recurse
 on nested YAML structures with no depth counter. Crafted YAML with 1000+ nesting levels causes stack