From 4bf8e202fb11c851b76ba292fa4c79dd765330b3 Mon Sep 17 00:00:00 2001
From: Mykhailo Chalyi <mike@chaliy.name>
Date: Tue, 31 Mar 2026 10:10:37 +0000
Subject: [PATCH] fix(security): sanitize internal state in error messages
 (TM-INF-016)

Add sanitize_error_message() that strips host paths, IPv4/IPv6
addresses, and TLS details from error messages exposed to sandbox
guests. Network client errors now use Error::network_sanitized()
to wrap reqwest errors. Update threat model to mark TM-INF-016 fixed.

Closes #909
---
 crates/bashkit/src/error.rs          | 106 +++++++++++++++++++++++++++
 crates/bashkit/src/network/client.rs |   8 +-
 specs/006-threat-model.md            |   4 +-
 3 files changed, 112 insertions(+), 6 deletions(-)
diff --git a/crates/bashkit/src/error.rs b/crates/bashkit/src/error.rs
index 430645d1..918ae30a 100644
--- a/crates/bashkit/src/error.rs
+++ b/crates/bashkit/src/error.rs
@@ -86,4 +86,110 @@ impl Error {
             column: 0,
         }
     }
+
+    /// THREAT[TM-INF-016]: Create an I/O error with sanitized message.
+    /// Strips host-internal paths from the error message to prevent information
+    /// leakage to the sandbox guest.
+    pub fn io_sanitized(err: std::io::Error) -> Self {
+        Self::Io(std::io::Error::new(
+            err.kind(),
+            sanitize_error_message(&err.to_string()),
+        ))
+    }
+
+    /// THREAT[TM-INF-016]: Create a network error with sanitized message.
+    /// Strips resolved IPs, TLS details, and DNS info from reqwest errors.
+    pub fn network_sanitized(context: &str, err: &dyn std::fmt::Display) -> Self {
+        Self::Network(format!(
+            "{}: {}",
+            context,
+            sanitize_error_message(&err.to_string())
+        ))
+    }
+}
+
+/// THREAT[TM-INF-016]: Sanitize error messages to prevent information leakage.
+/// Strips:
+/// - Host filesystem paths (anything starting with /)
+/// - Resolved IP addresses (IPv4 and IPv6)
+/// - TLS/SSL negotiation details
+fn sanitize_error_message(msg: &str) -> String {
+    use std::sync::LazyLock;
+
+    static PATH_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+        regex::Regex::new(
+            r#"(/(?:home|usr|var|etc|opt|root|proc|sys|run|snap|nix|mnt|media)[/][^\s:"']+)"#,
+        )
+        .expect("path regex")
+    });
+    static IPV4_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+        regex::Regex::new(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(:\d+)?\b").expect("ipv4 regex")
+    });
+    static IPV6_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+        regex::Regex::new(r"\[?[0-9a-fA-F:]{3,39}\]?(:\d+)?").expect("ipv6 regex")
+    });
+    static TLS_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
+        regex::Regex::new(r"(?i)(ssl|tls)\s*(handshake|negotiation|error|alert)[^.;]*[.;]?")
+            .expect("tls regex")
+    });
+
+    let mut result = msg.to_string();
+
+    // Strip absolute host paths (preserve VFS paths like /tmp, /dev/null)
+    result = PATH_RE.replace_all(&result, "<path>").to_string();
+
+    // Strip IPv4 addresses
+    result = IPV4_RE.replace_all(&result, "<address>").to_string();
+
+    // Strip IPv6 addresses (only if :: present to avoid false positives)
+    if result.contains("::") {
+        result = IPV6_RE.replace_all(&result, "<address>").to_string();
+    }
+
+    // Strip TLS/SSL handshake details
+    result = TLS_RE.replace_all(&result, "<tls-error>").to_string();
+
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn sanitize_strips_host_paths() {
+        let msg = "No such file: /home/user/.config/bashkit/settings.json";
+        let sanitized = sanitize_error_message(msg);
+        assert!(!sanitized.contains("/home/user"));
+        assert!(sanitized.contains("<path>"));
+    }
+
+    #[test]
+    fn sanitize_strips_ipv4() {
+        let msg = "connection refused: 192.168.1.100:8080";
+        let sanitized = sanitize_error_message(msg);
+        assert!(!sanitized.contains("192.168"));
+        assert!(sanitized.contains("<address>"));
+    }
+
+    #[test]
+    fn sanitize_strips_tls_details() {
+        let msg = "SSL handshake failed with cipher TLS_AES_256_GCM;";
+        let sanitized = sanitize_error_message(msg);
+        assert!(!sanitized.contains("cipher"));
+        assert!(sanitized.contains("<tls-error>"));
+    }
+
+    #[test]
+    fn sanitize_preserves_safe_paths() {
+        let msg = "file not found: /tmp/script.sh";
+        let sanitized = sanitize_error_message(msg);
+        assert!(sanitized.contains("/tmp/script.sh"));
+    }
+
+    #[test]
+    fn sanitize_preserves_generic_messages() {
+        let msg = "operation timed out";
+        assert_eq!(sanitize_error_message(msg), msg);
+    }
 }
diff --git a/crates/bashkit/src/network/client.rs b/crates/bashkit/src/network/client.rs
index 5b4213c3..04658207 100644
--- a/crates/bashkit/src/network/client.rs
+++ b/crates/bashkit/src/network/client.rs
@@ -217,7 +217,7 @@ impl HttpClient {
         let response = request
             .send()
             .await
-            .map_err(|e| Error::Network(format!("request failed: {}", e)))?;
+            .map_err(|e| Error::network_sanitized("request failed", &e))?;
 
         // Extract response data
         let status = response.status().as_u16();
@@ -259,7 +259,7 @@ impl HttpClient {
 
         while let Some(chunk_result) = stream.next().await {
             let chunk = chunk_result
-                .map_err(|e| Error::Network(format!("failed to read response chunk: {}", e)))?;
+                .map_err(|e| Error::network_sanitized("failed to read response chunk", &e))?;
 
             // Check if adding this chunk would exceed the limit
             if body.len() + chunk.len() > self.max_response_bytes {
@@ -367,7 +367,7 @@ impl HttpClient {
                 |s| Duration::from_secs(clamp_timeout(s)),
             );
             build_client(timeout, Some(connect_timeout))
-                .map_err(|e| Error::Network(format!("failed to create client: {}", e)))?
+                .map_err(|e| Error::network_sanitized("failed to create client", &e))?
         } else {
             self.client()?.clone()
         };
@@ -390,7 +390,7 @@ impl HttpClient {
             if e.is_timeout() {
                 Error::Network("operation timed out".to_string())
             } else {
-                Error::Network(format!("request failed: {}", e))
+                Error::network_sanitized("request failed", &e)
             }
         })?;
 
diff --git a/specs/006-threat-model.md b/specs/006-threat-model.md
index 71d317e2..96e9f141 100644
--- a/specs/006-threat-model.md
+++ b/specs/006-threat-model.md
@@ -400,7 +400,7 @@ All execution stays within the virtual interpreter — no OS subprocess is spawn
 | TM-INF-013 | Host env leak via jq | jq now uses custom `$__bashkit_env__` variable, not `std::env` | — | **FIXED** (2026-03 audit verified) |
 | TM-INF-014 | Real PID leak via $$ | `$$` now returns virtual PID (1) instead of real process ID | — | **FIXED** (2026-03 audit verified) |
 | TM-INF-015 | URL credentials in errors | Allowlist "blocked" error echoes full URL including credentials | — | **OPEN** |
-| TM-INF-016 | Internal state in error messages | `std::io::Error`, reqwest errors, Debug-formatted errors leak host paths/IPs/TLS info | — | **OPEN** |
+| TM-INF-016 | Internal state in error messages | `std::io::Error`, reqwest errors, Debug-formatted errors leak host paths/IPs/TLS info | `sanitize_error_message()` strips paths/IPs/TLS; `Error::network_sanitized()` wraps reqwest | **FIXED** |
 | TM-INF-019 | `envsubst` exposes all env vars | `envsubst` substitutes `$VAR`/`${VAR}` from `ctx.env` — scripts can probe any env var | Same as TM-INF-001 (caller controls env) | **CALLER RISK** |
 | TM-INF-020 | `template` exposes env vars via `{{var}}` | Template builtin looks up variables from env as fallback after shell vars and JSON data | Same as TM-INF-001 (caller controls env) | **CALLER RISK** |
 
@@ -1291,7 +1291,7 @@ This section maps former vulnerability IDs to the new threat ID scheme and track
 | Python config preservation on reset | TM-PY-026 | Store and reapply builder config | **NEEDED** |
 | JSON conversion depth limit | TM-PY-027 | Depth counter in `py_to_json`/`json_to_py` | **NEEDED** |
 | Cyclic nameref detection | TM-INJ-011 | Track visited names, emit error on cycle | **NEEDED** |
-| Error message sanitization gaps | TM-INF-016 | Consistent Display format, wrap external errors | **NEEDED** |
+| Error message sanitization gaps | TM-INF-016 | Consistent Display format, wrap external errors | **DONE** |
 | 32-bit integer safety | TM-DOS-040 | `usize::try_from()` for `u64` casts | **NEEDED** |
 
 ### Open Controls (From 2026-03 Deep Audit)