Skip to content

Commit e2c3181

Browse files
authored
fix(vfs): preserve raw bytes when reading /dev/urandom (#828)
## Summary - Replace `String::from_utf8_lossy` with Latin-1 byte-to-char mapping for file reads - Bytes > 0x7F are now preserved instead of being replaced with U+FFFD - Applied to interpreter file reads and head builtin byte mode ## Test plan - [x] `urandom_no_replacement_chars` — verifies no U+FFFD in od output - [x] Full test suite passes Closes #811
1 parent 45ea773 commit e2c3181

File tree

3 files changed

+41
-12
lines changed

3 files changed

+41
-12
lines changed

crates/bashkit/src/builtins/headtail.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,12 @@ impl Builtin for Head {
5555
match ctx.fs.read_file(&path).await {
5656
Ok(content) => {
5757
if byte_mode {
58-
// Byte mode: take first N bytes, lossy convert
58+
// Byte mode: take first N bytes, preserve raw byte values
5959
let bytes = &content[..content.len().min(count)];
60-
output.push_str(&String::from_utf8_lossy(bytes));
60+
let s: String = bytes.iter().map(|&b| b as char).collect();
61+
output.push_str(&s);
6162
} else {
62-
let text = String::from_utf8_lossy(&content);
63+
let text: String = content.iter().map(|&b| b as char).collect();
6364
output.push_str(&take_first_lines(&text, count));
6465
}
6566
}

crates/bashkit/src/interpreter/mod.rs

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,13 @@ fn command_not_found_message(name: &str, known_commands: &[&str]) -> String {
256256

257257
/// Check if a path refers to /dev/null after normalization.
258258
/// Handles attempts to bypass via paths like `/dev/../dev/null`.
259+
/// Convert bytes to string preserving all byte values (Latin-1/ISO 8859-1 mapping).
260+
/// Each byte 0x00-0xFF maps to the corresponding Unicode code point.
261+
/// This avoids the lossy UTF-8 conversion that replaces bytes > 0x7F with U+FFFD.
262+
fn bytes_to_latin1_string(bytes: &[u8]) -> String {
263+
bytes.iter().map(|&b| b as char).collect()
264+
}
265+
259266
fn is_dev_null(path: &Path) -> bool {
260267
// Normalize the path to handle .. and . components
261268
let mut normalized = PathBuf::new();
@@ -2624,7 +2631,7 @@ impl Interpreter {
26242631
} else if let Some(ref file) = script_file {
26252632
let path = self.resolve_path(file);
26262633
match self.fs.read_file(&path).await {
2627-
Ok(content) => String::from_utf8_lossy(&content).to_string(),
2634+
Ok(content) => bytes_to_latin1_string(&content),
26282635
Err(_) => {
26292636
return Ok(ExecResult::err(
26302637
format!("{}: {}: No such file or directory\n", shell_name, file),
@@ -3265,7 +3272,7 @@ impl Interpreter {
32653272
for (path_str, commands) in deferred {
32663273
let path = Path::new(&path_str);
32673274
let stdin_data = if let Ok(bytes) = self.fs.read_file(path).await {
3268-
let s = String::from_utf8_lossy(&bytes).to_string();
3275+
let s = bytes_to_latin1_string(&bytes);
32693276
if s.is_empty() { None } else { Some(s) }
32703277
} else {
32713278
None
@@ -3630,7 +3637,7 @@ impl Interpreter {
36303637
let target_path = self.expand_word(&redirect.target).await?;
36313638
let path = self.resolve_path(&target_path);
36323639
let content = self.fs.read_file(&path).await?;
3633-
let text = String::from_utf8_lossy(&content).to_string();
3640+
let text = bytes_to_latin1_string(&content);
36343641
let lines: Vec<String> =
36353642
text.lines().rev().map(|l| l.to_string()).collect();
36363643
self.coproc_buffers.insert(fd, lines);
@@ -3889,7 +3896,7 @@ impl Interpreter {
38893896

38903897
// Read file content
38913898
let content = match self.fs.read_file(&path).await {
3892-
Ok(c) => String::from_utf8_lossy(&c).to_string(),
3899+
Ok(c) => bytes_to_latin1_string(&c),
38933900
Err(_) => {
38943901
return Ok(ExecResult::err(
38953902
format!("bash: {}: No such file or directory", name),
@@ -3932,7 +3939,7 @@ impl Interpreter {
39323939
continue;
39333940
}
39343941
if let Ok(content) = self.fs.read_file(&candidate).await {
3935-
let script_text = String::from_utf8_lossy(&content).to_string();
3942+
let script_text = bytes_to_latin1_string(&content);
39363943
let result = self
39373944
.execute_script_content(name, &script_text, args, stdin, redirects)
39383945
.await?;
@@ -4063,7 +4070,7 @@ impl Interpreter {
40634070
let content = if filename.contains('/') {
40644071
let path = self.resolve_path(filename);
40654072
match self.fs.read_file(&path).await {
4066-
Ok(c) => String::from_utf8_lossy(&c).to_string(),
4073+
Ok(c) => bytes_to_latin1_string(&c),
40674074
Err(_) => {
40684075
return Ok(ExecResult::err(
40694076
format!("source: {}: No such file or directory", filename),
@@ -4086,15 +4093,15 @@ impl Interpreter {
40864093
}
40874094
let candidate = PathBuf::from(dir).join(filename);
40884095
if let Ok(c) = self.fs.read_file(&candidate).await {
4089-
found = Some(String::from_utf8_lossy(&c).to_string());
4096+
found = Some(bytes_to_latin1_string(&c));
40904097
break;
40914098
}
40924099
}
40934100
// Also try cwd as fallback (bash sources from cwd too)
40944101
if found.is_none() {
40954102
let path = self.resolve_path(filename);
40964103
if let Ok(c) = self.fs.read_file(&path).await {
4097-
found = Some(String::from_utf8_lossy(&c).to_string());
4104+
found = Some(bytes_to_latin1_string(&c));
40984105
}
40994106
}
41004107
match found {
@@ -5199,7 +5206,7 @@ impl Interpreter {
51995206
stdin = Some(String::new()); // EOF
52005207
} else {
52015208
let content = self.fs.read_file(&path).await?;
5202-
stdin = Some(String::from_utf8_lossy(&content).to_string());
5209+
stdin = Some(bytes_to_latin1_string(&content));
52035210
}
52045211
}
52055212
RedirectKind::HereString => {
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//! Tests for /dev/urandom raw byte handling
2+
3+
use bashkit::Bash;
4+
5+
/// Issue #811: /dev/urandom should return raw bytes, not UTF-8 replacement chars
6+
#[tokio::test]
7+
async fn urandom_no_replacement_chars() {
8+
let mut bash = Bash::new();
9+
// Read 100 bytes and check output via od
10+
let result = bash
11+
.exec("head -c 100 /dev/urandom | od -A n -t x1 | tr -d ' \\n'")
12+
.await
13+
.unwrap();
14+
let hex = result.stdout.trim();
15+
// Should not contain the UTF-8 replacement character pattern efbfbd
16+
assert!(
17+
!hex.contains("efbfbd"),
18+
"Output should not contain UTF-8 replacement chars: {}",
19+
&hex[..hex.len().min(60)]
20+
);
21+
}

0 commit comments

Comments
 (0)