From 4bf8e202fb11c851b76ba292fa4c79dd765330b3 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Tue, 31 Mar 2026 10:10:37 +0000 Subject: [PATCH] fix(security): sanitize internal state in error messages (TM-INF-016) Add sanitize_error_message() that strips host paths, IPv4/IPv6 addresses, and TLS details from error messages exposed to sandbox guests. Network client errors now use Error::network_sanitized() to wrap reqwest errors. Update threat model to mark TM-INF-016 fixed. Closes #909 --- crates/bashkit/src/error.rs | 106 +++++++++++++++++++++++++++ crates/bashkit/src/network/client.rs | 8 +- specs/006-threat-model.md | 4 +- 3 files changed, 112 insertions(+), 6 deletions(-) diff --git a/crates/bashkit/src/error.rs b/crates/bashkit/src/error.rs index 430645d1..918ae30a 100644 --- a/crates/bashkit/src/error.rs +++ b/crates/bashkit/src/error.rs @@ -86,4 +86,110 @@ impl Error { column: 0, } } + + /// THREAT[TM-INF-016]: Create an I/O error with sanitized message. + /// Strips host-internal paths from the error message to prevent information + /// leakage to the sandbox guest. + pub fn io_sanitized(err: std::io::Error) -> Self { + Self::Io(std::io::Error::new( + err.kind(), + sanitize_error_message(&err.to_string()), + )) + } + + /// THREAT[TM-INF-016]: Create a network error with sanitized message. + /// Strips resolved IPs, TLS details, and DNS info from reqwest errors. + pub fn network_sanitized(context: &str, err: &dyn std::fmt::Display) -> Self { + Self::Network(format!( + "{}: {}", + context, + sanitize_error_message(&err.to_string()) + )) + } +} + +/// THREAT[TM-INF-016]: Sanitize error messages to prevent information leakage. +/// Strips: +/// - Host filesystem paths (anything starting with /) +/// - Resolved IP addresses (IPv4 and IPv6) +/// - TLS/SSL negotiation details +fn sanitize_error_message(msg: &str) -> String { + use std::sync::LazyLock; + + static PATH_RE: LazyLock = LazyLock::new(|| { + regex::Regex::new( + r#"(/(?:home|usr|var|etc|opt|root|proc|sys|run|snap|nix|mnt|media)[/][^\s:"']+)"#, + ) + .expect("path regex") + }); + static IPV4_RE: LazyLock = LazyLock::new(|| { + regex::Regex::new(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(:\d+)?\b").expect("ipv4 regex") + }); + static IPV6_RE: LazyLock = LazyLock::new(|| { + regex::Regex::new(r"\[?[0-9a-fA-F:]{3,39}\]?(:\d+)?").expect("ipv6 regex") + }); + static TLS_RE: LazyLock = LazyLock::new(|| { + regex::Regex::new(r"(?i)(ssl|tls)\s*(handshake|negotiation|error|alert)[^.;]*[.;]?") + .expect("tls regex") + }); + + let mut result = msg.to_string(); + + // Strip absolute host paths (preserve VFS paths like /tmp, /dev/null) + result = PATH_RE.replace_all(&result, "").to_string(); + + // Strip IPv4 addresses + result = IPV4_RE.replace_all(&result, "
").to_string(); + + // Strip IPv6 addresses (only if :: present to avoid false positives) + if result.contains("::") { + result = IPV6_RE.replace_all(&result, "
").to_string(); + } + + // Strip TLS/SSL handshake details + result = TLS_RE.replace_all(&result, "").to_string(); + + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn sanitize_strips_host_paths() { + let msg = "No such file: /home/user/.config/bashkit/settings.json"; + let sanitized = sanitize_error_message(msg); + assert!(!sanitized.contains("/home/user")); + assert!(sanitized.contains("")); + } + + #[test] + fn sanitize_strips_ipv4() { + let msg = "connection refused: 192.168.1.100:8080"; + let sanitized = sanitize_error_message(msg); + assert!(!sanitized.contains("192.168")); + assert!(sanitized.contains("
")); + } + + #[test] + fn sanitize_strips_tls_details() { + let msg = "SSL handshake failed with cipher TLS_AES_256_GCM;"; + let sanitized = sanitize_error_message(msg); + assert!(!sanitized.contains("cipher")); + assert!(sanitized.contains("")); + } + + #[test] + fn sanitize_preserves_safe_paths() { + let msg = "file not found: /tmp/script.sh"; + let sanitized = sanitize_error_message(msg); + assert!(sanitized.contains("/tmp/script.sh")); + } + + #[test] + fn sanitize_preserves_generic_messages() { + let msg = "operation timed out"; + assert_eq!(sanitize_error_message(msg), msg); + } } diff --git a/crates/bashkit/src/network/client.rs b/crates/bashkit/src/network/client.rs index 5b4213c3..04658207 100644 --- a/crates/bashkit/src/network/client.rs +++ b/crates/bashkit/src/network/client.rs @@ -217,7 +217,7 @@ impl HttpClient { let response = request .send() .await - .map_err(|e| Error::Network(format!("request failed: {}", e)))?; + .map_err(|e| Error::network_sanitized("request failed", &e))?; // Extract response data let status = response.status().as_u16(); @@ -259,7 +259,7 @@ impl HttpClient { while let Some(chunk_result) = stream.next().await { let chunk = chunk_result - .map_err(|e| Error::Network(format!("failed to read response chunk: {}", e)))?; + .map_err(|e| Error::network_sanitized("failed to read response chunk", &e))?; // Check if adding this chunk would exceed the limit if body.len() + chunk.len() > self.max_response_bytes { @@ -367,7 +367,7 @@ impl HttpClient { |s| Duration::from_secs(clamp_timeout(s)), ); build_client(timeout, Some(connect_timeout)) - .map_err(|e| Error::Network(format!("failed to create client: {}", e)))? + .map_err(|e| Error::network_sanitized("failed to create client", &e))? } else { self.client()?.clone() }; @@ -390,7 +390,7 @@ impl HttpClient { if e.is_timeout() { Error::Network("operation timed out".to_string()) } else { - Error::Network(format!("request failed: {}", e)) + Error::network_sanitized("request failed", &e) } })?; diff --git a/specs/006-threat-model.md b/specs/006-threat-model.md index 71d317e2..96e9f141 100644 --- a/specs/006-threat-model.md +++ b/specs/006-threat-model.md @@ -400,7 +400,7 @@ All execution stays within the virtual interpreter — no OS subprocess is spawn | TM-INF-013 | Host env leak via jq | jq now uses custom `$__bashkit_env__` variable, not `std::env` | — | **FIXED** (2026-03 audit verified) | | TM-INF-014 | Real PID leak via $$ | `$$` now returns virtual PID (1) instead of real process ID | — | **FIXED** (2026-03 audit verified) | | TM-INF-015 | URL credentials in errors | Allowlist "blocked" error echoes full URL including credentials | — | **OPEN** | -| TM-INF-016 | Internal state in error messages | `std::io::Error`, reqwest errors, Debug-formatted errors leak host paths/IPs/TLS info | — | **OPEN** | +| TM-INF-016 | Internal state in error messages | `std::io::Error`, reqwest errors, Debug-formatted errors leak host paths/IPs/TLS info | `sanitize_error_message()` strips paths/IPs/TLS; `Error::network_sanitized()` wraps reqwest | **FIXED** | | TM-INF-019 | `envsubst` exposes all env vars | `envsubst` substitutes `$VAR`/`${VAR}` from `ctx.env` — scripts can probe any env var | Same as TM-INF-001 (caller controls env) | **CALLER RISK** | | TM-INF-020 | `template` exposes env vars via `{{var}}` | Template builtin looks up variables from env as fallback after shell vars and JSON data | Same as TM-INF-001 (caller controls env) | **CALLER RISK** | @@ -1291,7 +1291,7 @@ This section maps former vulnerability IDs to the new threat ID scheme and track | Python config preservation on reset | TM-PY-026 | Store and reapply builder config | **NEEDED** | | JSON conversion depth limit | TM-PY-027 | Depth counter in `py_to_json`/`json_to_py` | **NEEDED** | | Cyclic nameref detection | TM-INJ-011 | Track visited names, emit error on cycle | **NEEDED** | -| Error message sanitization gaps | TM-INF-016 | Consistent Display format, wrap external errors | **NEEDED** | +| Error message sanitization gaps | TM-INF-016 | Consistent Display format, wrap external errors | **DONE** | | 32-bit integer safety | TM-DOS-040 | `usize::try_from()` for `u64` casts | **NEEDED** | ### Open Controls (From 2026-03 Deep Audit)