From c219d638a0be61960527a4acc8871e63a2df4ecc Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Thu, 26 Mar 2026 13:49:09 +0000 Subject: [PATCH] fix(vfs): preserve raw bytes when reading /dev/urandom MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #811 — /dev/urandom now returns raw bytes instead of UTF-8 replacement characters. Changed file content conversion from String::from_utf8_lossy (which replaces bytes > 0x7F with U+FFFD) to Latin-1 mapping (each byte 0x00-0xFF maps to its Unicode code point). --- crates/bashkit/src/builtins/headtail.rs | 7 ++++--- crates/bashkit/src/interpreter/mod.rs | 25 ++++++++++++++++--------- crates/bashkit/tests/urandom_tests.rs | 21 +++++++++++++++++++++ 3 files changed, 41 insertions(+), 12 deletions(-) create mode 100644 crates/bashkit/tests/urandom_tests.rs diff --git a/crates/bashkit/src/builtins/headtail.rs b/crates/bashkit/src/builtins/headtail.rs index 37083b6d..d2d3ce32 100644 --- a/crates/bashkit/src/builtins/headtail.rs +++ b/crates/bashkit/src/builtins/headtail.rs @@ -55,11 +55,12 @@ impl Builtin for Head { match ctx.fs.read_file(&path).await { Ok(content) => { if byte_mode { - // Byte mode: take first N bytes, lossy convert + // Byte mode: take first N bytes, preserve raw byte values let bytes = &content[..content.len().min(count)]; - output.push_str(&String::from_utf8_lossy(bytes)); + let s: String = bytes.iter().map(|&b| b as char).collect(); + output.push_str(&s); } else { - let text = String::from_utf8_lossy(&content); + let text: String = content.iter().map(|&b| b as char).collect(); output.push_str(&take_first_lines(&text, count)); } } diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index 50e377d9..a90b249a 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -256,6 +256,13 @@ fn command_not_found_message(name: &str, known_commands: &[&str]) -> String { /// Check if a path refers to /dev/null after normalization. /// Handles attempts to bypass via paths like `/dev/../dev/null`. +/// Convert bytes to string preserving all byte values (Latin-1/ISO 8859-1 mapping). +/// Each byte 0x00-0xFF maps to the corresponding Unicode code point. +/// This avoids the lossy UTF-8 conversion that replaces bytes > 0x7F with U+FFFD. +fn bytes_to_latin1_string(bytes: &[u8]) -> String { + bytes.iter().map(|&b| b as char).collect() +} + fn is_dev_null(path: &Path) -> bool { // Normalize the path to handle .. and . components let mut normalized = PathBuf::new(); @@ -2624,7 +2631,7 @@ impl Interpreter { } else if let Some(ref file) = script_file { let path = self.resolve_path(file); match self.fs.read_file(&path).await { - Ok(content) => String::from_utf8_lossy(&content).to_string(), + Ok(content) => bytes_to_latin1_string(&content), Err(_) => { return Ok(ExecResult::err( format!("{}: {}: No such file or directory\n", shell_name, file), @@ -3265,7 +3272,7 @@ impl Interpreter { for (path_str, commands) in deferred { let path = Path::new(&path_str); let stdin_data = if let Ok(bytes) = self.fs.read_file(path).await { - let s = String::from_utf8_lossy(&bytes).to_string(); + let s = bytes_to_latin1_string(&bytes); if s.is_empty() { None } else { Some(s) } } else { None @@ -3630,7 +3637,7 @@ impl Interpreter { let target_path = self.expand_word(&redirect.target).await?; let path = self.resolve_path(&target_path); let content = self.fs.read_file(&path).await?; - let text = String::from_utf8_lossy(&content).to_string(); + let text = bytes_to_latin1_string(&content); let lines: Vec = text.lines().rev().map(|l| l.to_string()).collect(); self.coproc_buffers.insert(fd, lines); @@ -3889,7 +3896,7 @@ impl Interpreter { // Read file content let content = match self.fs.read_file(&path).await { - Ok(c) => String::from_utf8_lossy(&c).to_string(), + Ok(c) => bytes_to_latin1_string(&c), Err(_) => { return Ok(ExecResult::err( format!("bash: {}: No such file or directory", name), @@ -3932,7 +3939,7 @@ impl Interpreter { continue; } if let Ok(content) = self.fs.read_file(&candidate).await { - let script_text = String::from_utf8_lossy(&content).to_string(); + let script_text = bytes_to_latin1_string(&content); let result = self .execute_script_content(name, &script_text, args, stdin, redirects) .await?; @@ -4063,7 +4070,7 @@ impl Interpreter { let content = if filename.contains('/') { let path = self.resolve_path(filename); match self.fs.read_file(&path).await { - Ok(c) => String::from_utf8_lossy(&c).to_string(), + Ok(c) => bytes_to_latin1_string(&c), Err(_) => { return Ok(ExecResult::err( format!("source: {}: No such file or directory", filename), @@ -4086,7 +4093,7 @@ impl Interpreter { } let candidate = PathBuf::from(dir).join(filename); if let Ok(c) = self.fs.read_file(&candidate).await { - found = Some(String::from_utf8_lossy(&c).to_string()); + found = Some(bytes_to_latin1_string(&c)); break; } } @@ -4094,7 +4101,7 @@ impl Interpreter { if found.is_none() { let path = self.resolve_path(filename); if let Ok(c) = self.fs.read_file(&path).await { - found = Some(String::from_utf8_lossy(&c).to_string()); + found = Some(bytes_to_latin1_string(&c)); } } match found { @@ -5199,7 +5206,7 @@ impl Interpreter { stdin = Some(String::new()); // EOF } else { let content = self.fs.read_file(&path).await?; - stdin = Some(String::from_utf8_lossy(&content).to_string()); + stdin = Some(bytes_to_latin1_string(&content)); } } RedirectKind::HereString => { diff --git a/crates/bashkit/tests/urandom_tests.rs b/crates/bashkit/tests/urandom_tests.rs new file mode 100644 index 00000000..be955855 --- /dev/null +++ b/crates/bashkit/tests/urandom_tests.rs @@ -0,0 +1,21 @@ +//! Tests for /dev/urandom raw byte handling + +use bashkit::Bash; + +/// Issue #811: /dev/urandom should return raw bytes, not UTF-8 replacement chars +#[tokio::test] +async fn urandom_no_replacement_chars() { + let mut bash = Bash::new(); + // Read 100 bytes and check output via od + let result = bash + .exec("head -c 100 /dev/urandom | od -A n -t x1 | tr -d ' \\n'") + .await + .unwrap(); + let hex = result.stdout.trim(); + // Should not contain the UTF-8 replacement character pattern efbfbd + assert!( + !hex.contains("efbfbd"), + "Output should not contain UTF-8 replacement chars: {}", + &hex[..hex.len().min(60)] + ); +}