From 088f6ca375ce398254824fc8e6ed2c63370eccab Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Mon, 6 Apr 2026 09:46:23 +0000 Subject: [PATCH] fix(interpreter): add max_subst_depth limit to prevent OOM from nested $() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each command substitution level clones the full interpreter state (variables, arrays, functions, etc.), so memory grows as depth × state_size. Previously, command substitution shared the function depth counter (max_function_depth=100), which allowed deep nesting to exhaust memory. Add a dedicated max_subst_depth limit (default 32) with its own counter. This bounds memory consumption from nested $(...) chains while keeping the function depth limit independent. - Add max_subst_depth field to ExecutionLimits (default 32) - Add subst_depth counter to ExecutionCounters with push_subst/pop_subst - Switch CommandSubstitution handler to use subst depth tracking - Harden arithmetic_fuzz target with tighter limits - Update blackbox_security_tests to set max_subst_depth - Add regression tests with fuzz crash input from #1088 - Add TM-DOS-088 to threat model Closes #1088 --- .../fuzz/fuzz_targets/arithmetic_fuzz.rs | 4 + crates/bashkit/src/interpreter/mod.rs | 15 +-- crates/bashkit/src/limits.rs | 40 ++++++- .../bashkit/tests/blackbox_security_tests.rs | 2 + .../bashkit/tests/subst_depth_limit_tests.rs | 102 ++++++++++++++++++ specs/006-threat-model.md | 2 + 6 files changed, 158 insertions(+), 7 deletions(-) create mode 100644 crates/bashkit/tests/subst_depth_limit_tests.rs diff --git a/crates/bashkit/fuzz/fuzz_targets/arithmetic_fuzz.rs b/crates/bashkit/fuzz/fuzz_targets/arithmetic_fuzz.rs index cddf673a..df94a5ba 100644 --- a/crates/bashkit/fuzz/fuzz_targets/arithmetic_fuzz.rs +++ b/crates/bashkit/fuzz/fuzz_targets/arithmetic_fuzz.rs @@ -52,6 +52,10 @@ fuzz_target!(|data: &[u8]| { .limits( bashkit::ExecutionLimits::new() .max_commands(100) + .max_function_depth(10) + .max_subst_depth(5) + .max_stdout_bytes(4096) + .max_stderr_bytes(4096) .timeout(std::time::Duration::from_millis(100)), ) .build(); diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index 2d3dc7bc..a3ab3588 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -6212,10 +6212,13 @@ impl Interpreter { } } WordPart::CommandSubstitution(commands) => { - // THREAT[TM-DOS-044]: Track substitution depth to prevent stack overflow - if self.counters.push_function(&self.limits).is_err() { + // THREAT[TM-DOS-088]: Track substitution depth separately from + // function depth. Each level clones the full interpreter state, + // so memory ≈ depth × state_size. A tighter limit than + // max_function_depth prevents OOM. + if self.counters.push_subst(&self.limits).is_err() { return Err(crate::error::Error::Execution( - "maximum nesting depth exceeded in command substitution".to_string(), + "maximum command substitution depth exceeded".to_string(), )); } // Command substitution runs in a subshell: snapshot all @@ -6260,7 +6263,7 @@ impl Interpreter { self.aliases = saved_aliases; self.cwd = saved_cwd; self.memory_budget = saved_memory_budget; - self.counters.pop_function(); + self.counters.pop_subst(); self.subst_generation += 1; let trimmed = stdout.trim_end_matches('\n'); result.push_str(trimmed); @@ -8436,7 +8439,7 @@ impl Interpreter { ); match parser.parse() { Ok(script) => { - if self.counters.push_function(&self.limits).is_err() { + if self.counters.push_subst(&self.limits).is_err() { result.push('0'); } else { let saved_vars = self.variables.clone(); @@ -8457,7 +8460,7 @@ impl Interpreter { self.aliases = saved_aliases; self.cwd = saved_cwd; self.memory_budget = saved_memory_budget; - self.counters.pop_function(); + self.counters.pop_subst(); let trimmed = cmd_result.stdout.trim_end_matches('\n'); if trimmed.is_empty() { result.push('0'); diff --git a/crates/bashkit/src/limits.rs b/crates/bashkit/src/limits.rs index 99c43913..3c1f9057 100644 --- a/crates/bashkit/src/limits.rs +++ b/crates/bashkit/src/limits.rs @@ -84,6 +84,14 @@ pub struct ExecutionLimits { /// Prevents unbounded error output accumulation. pub max_stderr_bytes: usize, + // THREAT[TM-DOS-088]: Command substitutions clone the entire interpreter + // state (variables, arrays, functions, etc.) per nesting level. At depth N, + // memory ≈ N × state_size. A separate, tighter limit than max_function_depth + // prevents OOM from deeply nested $(...) chains. + /// Maximum command substitution nesting depth. + /// Default: 32 + pub max_subst_depth: usize, + /// Whether to capture the final environment state in ExecResult. /// Default: false (opt-in to avoid cloning cost when not needed) pub capture_final_env: bool, @@ -103,6 +111,7 @@ impl Default for ExecutionLimits { max_parser_operations: 100_000, max_stdout_bytes: 1_048_576, // 1MB max_stderr_bytes: 1_048_576, // 1MB + max_subst_depth: 32, capture_final_env: false, } } @@ -201,6 +210,12 @@ impl ExecutionLimits { self } + /// Set maximum command substitution nesting depth. + pub fn max_subst_depth(mut self, depth: usize) -> Self { + self.max_subst_depth = depth; + self + } + /// Enable capturing final environment state in ExecResult pub fn capture_final_env(mut self, capture: bool) -> Self { self.capture_final_env = capture; @@ -287,6 +302,9 @@ pub struct ExecutionCounters { /// Total loop iterations across all loops (never reset) pub total_loop_iterations: usize, + /// Current command substitution nesting depth. + pub subst_depth: usize, + // THREAT[TM-DOS-059]: Session-level cumulative counters. // These persist across exec() calls (never reset by reset_for_execution). /// Total commands across all exec() calls in this session. @@ -309,9 +327,10 @@ impl ExecutionCounters { self.commands = 0; self.loop_iterations = 0; self.total_loop_iterations = 0; - // function_depth should already be 0 between exec() calls, + // function_depth and subst_depth should already be 0 between exec() calls, // but reset defensively to avoid stuck state self.function_depth = 0; + self.subst_depth = 0; } /// Increment command counter, returns error if limit exceeded @@ -444,6 +463,22 @@ impl ExecutionCounters { self.function_depth -= 1; } } + + /// Push command substitution, returns error if depth exceeded. + /// THREAT[TM-DOS-088]: Command substitutions clone interpreter state, + /// so their nesting depth must be bounded more tightly than functions. + pub fn push_subst(&mut self, limits: &ExecutionLimits) -> Result<(), LimitExceeded> { + if self.subst_depth >= limits.max_subst_depth { + return Err(LimitExceeded::MaxSubstDepth(limits.max_subst_depth)); + } + self.subst_depth += 1; + Ok(()) + } + + /// Pop command substitution + pub fn pop_subst(&mut self) { + self.subst_depth = self.subst_depth.saturating_sub(1); + } } /// Error returned when a resource limit is exceeded @@ -461,6 +496,9 @@ pub enum LimitExceeded { #[error("maximum function depth exceeded ({0})")] MaxFunctionDepth(usize), + #[error("maximum command substitution depth exceeded ({0})")] + MaxSubstDepth(usize), + #[error("execution timeout ({0:?})")] Timeout(Duration), diff --git a/crates/bashkit/tests/blackbox_security_tests.rs b/crates/bashkit/tests/blackbox_security_tests.rs index a43ea38a..8b84d710 100644 --- a/crates/bashkit/tests/blackbox_security_tests.rs +++ b/crates/bashkit/tests/blackbox_security_tests.rs @@ -24,6 +24,7 @@ fn tight_bash() -> Bash { .max_loop_iterations(100) .max_total_loop_iterations(500) .max_function_depth(20) + .max_subst_depth(15) .timeout(Duration::from_secs(5)), ) .build() @@ -38,6 +39,7 @@ fn dos_bash() -> Bash { .max_loop_iterations(10) .max_total_loop_iterations(50) .max_function_depth(5) + .max_subst_depth(3) .timeout(Duration::from_secs(3)), ) .build() diff --git a/crates/bashkit/tests/subst_depth_limit_tests.rs b/crates/bashkit/tests/subst_depth_limit_tests.rs new file mode 100644 index 00000000..a4d55c7b --- /dev/null +++ b/crates/bashkit/tests/subst_depth_limit_tests.rs @@ -0,0 +1,102 @@ +// THREAT[TM-DOS-088]: Command substitution depth limit tests. +// Each $(...) level clones the full interpreter state, so memory ≈ depth × state_size. +// A dedicated max_subst_depth limit (default 32) prevents OOM from deeply nested +// command substitution chains. + +use bashkit::{Bash, ExecutionLimits}; + +/// Deeply nested command substitution must be capped by max_subst_depth. +/// Regression test for issue #1088. +#[tokio::test] +async fn subst_depth_limit_prevents_oom() { + // Build a deeply nested $(...) expression: $(echo $(echo $(echo ...))) + let depth = 40; // exceeds default max_subst_depth of 32 + let mut script = "echo hi".to_string(); + for _ in 0..depth { + script = format!("echo $({})", script); + } + + let mut bash = Bash::builder() + .limits( + ExecutionLimits::new() + .max_commands(500) + .max_subst_depth(5) + .timeout(std::time::Duration::from_secs(5)), + ) + .build(); + + // Must not OOM — should return an error or truncated result + let result = bash.exec(&script).await; + // The execution should either error or succeed with limited depth, + // but must NOT panic or OOM. + assert!(result.is_ok() || result.is_err()); +} + +/// With a low subst depth limit, nested substitutions produce an error. +#[tokio::test] +async fn subst_depth_limit_returns_error() { + let mut script = "echo hi".to_string(); + for _ in 0..10 { + script = format!("echo $({})", script); + } + + let mut bash = Bash::builder() + .limits( + ExecutionLimits::new() + .max_commands(500) + .max_subst_depth(3) + .timeout(std::time::Duration::from_secs(5)), + ) + .build(); + + let result = bash.exec(&script).await; + match result { + Ok(r) => { + // Execution succeeded but depth was limited (error message in stderr + // or result was truncated) + assert!( + r.exit_code != 0 || r.stderr.contains("substitution depth"), + "expected error from deep nesting, got exit_code={} stderr={:?}", + r.exit_code, + r.stderr + ); + } + Err(e) => { + let msg = e.to_string(); + assert!( + msg.contains("substitution depth") || msg.contains("nesting"), + "unexpected error: {}", + msg + ); + } + } +} + +/// Fuzz crash input from issue #1088 (decoded from base64). +/// Original: agsfXzpfX19fX19nX19fJChbXTBfBQUFBQUfXzpfX19fX19nX18FBQUFBQQFBQUFBQUFBQUFBQUFBQUFBQUFBQU= +#[tokio::test] +async fn fuzz_crash_1088_oom_input() { + use base64::Engine; + let b64 = + "agsfXzpfX19fX19nX19fJChbXTBfBQUFBQUfXzpfX19fX19nX18FBQUFBQQFBQUFBQUFBQUFBQUFBQUFBQUFBQU="; + let decoded = base64::engine::general_purpose::STANDARD + .decode(b64) + .unwrap_or_default(); + let input = String::from_utf8_lossy(&decoded); + let script = format!("echo $(({})) 2>/dev/null", input); + + let mut bash = Bash::builder() + .limits( + ExecutionLimits::new() + .max_commands(100) + .max_function_depth(10) + .max_subst_depth(5) + .max_stdout_bytes(4096) + .max_stderr_bytes(4096) + .timeout(std::time::Duration::from_millis(500)), + ) + .build(); + + // Must not panic or OOM + let _ = bash.exec(&script).await; +} diff --git a/specs/006-threat-model.md b/specs/006-threat-model.md index 88846589..e37800c1 100644 --- a/specs/006-threat-model.md +++ b/specs/006-threat-model.md @@ -1228,6 +1228,7 @@ This section maps former vulnerability IDs to the new threat ID scheme and track | TM-ISO-024 | `$?` leaks into VFS subprocess | Parent `last_exit_code` visible inside VFS script subprocess, causing false `set -e` failures | Reset `last_exit_code = 0` and `nounset_error = None` in `execute_script_content` subprocess isolation | | TM-INT-007 | `/dev/urandom` empty with `head -c` | Weak randomness — `head -c 16 /dev/urandom` returns empty string | Fix virtual device pipe handling in head builtin | | TM-DOS-044 | Nested `$()` stack overflow (regression) | Process crash (SIGABRT) at depth ~50 despite #492 fix | Interpreter execution path may need separate depth tracking from lexer fix | +| TM-DOS-088 | Command substitution OOM via state cloning | OOM at depth N (memory ≈ N × state_size) | Dedicated `max_subst_depth` limit (default 32), separate from `max_function_depth` — **FIXED** via #1088 | ### Accepted (Low Priority) @@ -1322,6 +1323,7 @@ This section maps former vulnerability IDs to the new threat ID scheme and track | Brace expansion DoS | TM-DOS-041, TM-DOS-042 | Cap range size and total expansion count | **NEEDED** | | Arithmetic overflow in compound assignment | TM-DOS-043 | Use `wrapping_*` ops in `execute_arithmetic_with_side_effects` | **NEEDED** | | Lexer stack overflow | TM-DOS-044 | Depth tracking in `read_command_subst_into` | **NEEDED** | +| Cmd subst OOM via state cloning | TM-DOS-088 | `max_subst_depth` limit in `ExecutionLimits` | **DONE** | | OverlayFs symlink limit bypass | TM-DOS-045 | `check_write_limits()` + `validate_path()` in `symlink()` | **NEEDED** | | MountableFs path validation gap | TM-DOS-046 | `validate_path()` in all MountableFs methods | **NEEDED** | | VFS copy/rename semantic bugs | TM-DOS-047, TM-DOS-048 | Fix limit check in copy(), type check in rename() | **NEEDED** |