From c219d638a0be61960527a4acc8871e63a2df4ecc Mon Sep 17 00:00:00 2001
From: Mykhailo Chalyi <mike@chaliy.name>
Date: Thu, 26 Mar 2026 13:49:09 +0000
Subject: [PATCH] fix(vfs): preserve raw bytes when reading /dev/urandom
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #811 — /dev/urandom now returns raw bytes instead of UTF-8
replacement characters. Changed file content conversion from
String::from_utf8_lossy (which replaces bytes > 0x7F with U+FFFD) to
Latin-1 mapping (each byte 0x00-0xFF maps to its Unicode code point).
---
 crates/bashkit/src/builtins/headtail.rs |  7 ++++---
 crates/bashkit/src/interpreter/mod.rs   | 25 ++++++++++++++++---------
 crates/bashkit/tests/urandom_tests.rs   | 21 +++++++++++++++++++++
 3 files changed, 41 insertions(+), 12 deletions(-)
 create mode 100644 crates/bashkit/tests/urandom_tests.rs

diff --git a/crates/bashkit/src/builtins/headtail.rs b/crates/bashkit/src/builtins/headtail.rs
index 37083b6d..d2d3ce32 100644
--- a/crates/bashkit/src/builtins/headtail.rs
+++ b/crates/bashkit/src/builtins/headtail.rs
@@ -55,11 +55,12 @@ impl Builtin for Head {
                 match ctx.fs.read_file(&path).await {
                     Ok(content) => {
                         if byte_mode {
-                            // Byte mode: take first N bytes, lossy convert
+                            // Byte mode: take first N bytes, preserve raw byte values
                             let bytes = &content[..content.len().min(count)];
-                            output.push_str(&String::from_utf8_lossy(bytes));
+                            let s: String = bytes.iter().map(|&b| b as char).collect();
+                            output.push_str(&s);
                         } else {
-                            let text = String::from_utf8_lossy(&content);
+                            let text: String = content.iter().map(|&b| b as char).collect();
                             output.push_str(&take_first_lines(&text, count));
                         }
                     }
diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs
index 50e377d9..a90b249a 100644
--- a/crates/bashkit/src/interpreter/mod.rs
+++ b/crates/bashkit/src/interpreter/mod.rs
@@ -256,6 +256,13 @@ fn command_not_found_message(name: &str, known_commands: &[&str]) -> String {
 
 /// Check if a path refers to /dev/null after normalization.
 /// Handles attempts to bypass via paths like `/dev/../dev/null`.
+/// Convert bytes to string preserving all byte values (Latin-1/ISO 8859-1 mapping).
+/// Each byte 0x00-0xFF maps to the corresponding Unicode code point.
+/// This avoids the lossy UTF-8 conversion that replaces bytes > 0x7F with U+FFFD.
+fn bytes_to_latin1_string(bytes: &[u8]) -> String {
+    bytes.iter().map(|&b| b as char).collect()
+}
+
 fn is_dev_null(path: &Path) -> bool {
     // Normalize the path to handle .. and . components
     let mut normalized = PathBuf::new();
@@ -2624,7 +2631,7 @@ impl Interpreter {
         } else if let Some(ref file) = script_file {
             let path = self.resolve_path(file);
             match self.fs.read_file(&path).await {
-                Ok(content) => String::from_utf8_lossy(&content).to_string(),
+                Ok(content) => bytes_to_latin1_string(&content),
                 Err(_) => {
                     return Ok(ExecResult::err(
                         format!("{}: {}: No such file or directory\n", shell_name, file),
@@ -3265,7 +3272,7 @@ impl Interpreter {
         for (path_str, commands) in deferred {
             let path = Path::new(&path_str);
             let stdin_data = if let Ok(bytes) = self.fs.read_file(path).await {
-                let s = String::from_utf8_lossy(&bytes).to_string();
+                let s = bytes_to_latin1_string(&bytes);
                 if s.is_empty() { None } else { Some(s) }
             } else {
                 None
@@ -3630,7 +3637,7 @@ impl Interpreter {
                         let target_path = self.expand_word(&redirect.target).await?;
                         let path = self.resolve_path(&target_path);
                         let content = self.fs.read_file(&path).await?;
-                        let text = String::from_utf8_lossy(&content).to_string();
+                        let text = bytes_to_latin1_string(&content);
                         let lines: Vec<String> =
                             text.lines().rev().map(|l| l.to_string()).collect();
                         self.coproc_buffers.insert(fd, lines);
@@ -3889,7 +3896,7 @@ impl Interpreter {
 
         // Read file content
         let content = match self.fs.read_file(&path).await {
-            Ok(c) => String::from_utf8_lossy(&c).to_string(),
+            Ok(c) => bytes_to_latin1_string(&c),
             Err(_) => {
                 return Ok(ExecResult::err(
                     format!("bash: {}: No such file or directory", name),
@@ -3932,7 +3939,7 @@ impl Interpreter {
                     continue;
                 }
                 if let Ok(content) = self.fs.read_file(&candidate).await {
-                    let script_text = String::from_utf8_lossy(&content).to_string();
+                    let script_text = bytes_to_latin1_string(&content);
                     let result = self
                         .execute_script_content(name, &script_text, args, stdin, redirects)
                         .await?;
@@ -4063,7 +4070,7 @@ impl Interpreter {
         let content = if filename.contains('/') {
             let path = self.resolve_path(filename);
             match self.fs.read_file(&path).await {
-                Ok(c) => String::from_utf8_lossy(&c).to_string(),
+                Ok(c) => bytes_to_latin1_string(&c),
                 Err(_) => {
                     return Ok(ExecResult::err(
                         format!("source: {}: No such file or directory", filename),
@@ -4086,7 +4093,7 @@ impl Interpreter {
                 }
                 let candidate = PathBuf::from(dir).join(filename);
                 if let Ok(c) = self.fs.read_file(&candidate).await {
-                    found = Some(String::from_utf8_lossy(&c).to_string());
+                    found = Some(bytes_to_latin1_string(&c));
                     break;
                 }
             }
@@ -4094,7 +4101,7 @@ impl Interpreter {
             if found.is_none() {
                 let path = self.resolve_path(filename);
                 if let Ok(c) = self.fs.read_file(&path).await {
-                    found = Some(String::from_utf8_lossy(&c).to_string());
+                    found = Some(bytes_to_latin1_string(&c));
                 }
             }
             match found {
@@ -5199,7 +5206,7 @@ impl Interpreter {
                         stdin = Some(String::new()); // EOF
                     } else {
                         let content = self.fs.read_file(&path).await?;
-                        stdin = Some(String::from_utf8_lossy(&content).to_string());
+                        stdin = Some(bytes_to_latin1_string(&content));
                     }
                 }
                 RedirectKind::HereString => {
diff --git a/crates/bashkit/tests/urandom_tests.rs b/crates/bashkit/tests/urandom_tests.rs
new file mode 100644
index 00000000..be955855
--- /dev/null
+++ b/crates/bashkit/tests/urandom_tests.rs
@@ -0,0 +1,21 @@
+//! Tests for /dev/urandom raw byte handling
+
+use bashkit::Bash;
+
+/// Issue #811: /dev/urandom should return raw bytes, not UTF-8 replacement chars
+#[tokio::test]
+async fn urandom_no_replacement_chars() {
+    let mut bash = Bash::new();
+    // Read 100 bytes and check output via od
+    let result = bash
+        .exec("head -c 100 /dev/urandom | od -A n -t x1 | tr -d ' \\n'")
+        .await
+        .unwrap();
+    let hex = result.stdout.trim();
+    // Should not contain the UTF-8 replacement character pattern efbfbd
+    assert!(
+        !hex.contains("efbfbd"),
+        "Output should not contain UTF-8 replacement chars: {}",
+        &hex[..hex.len().min(60)]
+    );
+}