Skip to content

Commit af1364b

Browse files
committed
fix(builtins): preserve raw bytes from /dev/urandom through pipeline
Three fixes for binary data handling: 1. read_text_file: encode /dev/urandom bytes as Latin-1 (each byte 0x00-0xFF maps to one char) instead of UTF-8 lossy conversion 2. head -c: use char-level truncation so Latin-1 encoded bytes are counted correctly (each char = one original byte) 3. tr -c/-C: expand complement set to full 0-255 range so non-ASCII bytes from /dev/urandom are properly filtered This makes `tr -dc 'a-z0-9' < /dev/urandom | head -c N` produce exactly N alphanumeric characters. Closes #811
1 parent db454e5 commit af1364b

File tree

4 files changed

+60
-7
lines changed

4 files changed

+60
-7
lines changed

crates/bashkit/src/builtins/cuttr.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -319,10 +319,12 @@ impl Builtin for Tr {
319319

320320
let mut set1 = expand_char_set(non_flag_args[0]);
321321
if complement {
322-
// Complement: use all ASCII chars NOT in set1
322+
// Complement: use all byte-range chars (0-255) NOT in set1.
323+
// Covers full Latin-1 range so binary data from /dev/urandom
324+
// (where each byte maps to one char) is handled correctly.
323325
let original = set1.clone();
324-
set1 = (0u8..=127)
325-
.map(|b| b as char)
326+
set1 = (0u16..=255)
327+
.map(|b| b as u8 as char)
326328
.filter(|c| !original.contains(c))
327329
.collect();
328330
}

crates/bashkit/src/builtins/headtail.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,12 @@ fn parse_head_args(args: &[String], default: usize) -> Result<(usize, bool, Vec<
165165
Ok((count, byte_mode, files))
166166
}
167167

168-
/// Take the first N bytes from text
168+
/// Take the first N bytes from text.
169+
/// Uses char-level truncation so that Latin-1 encoded binary data
170+
/// (e.g. from /dev/urandom where each byte maps to one char) is
171+
/// counted correctly — each char represents one original byte.
169172
fn take_first_bytes(text: &str, n: usize) -> String {
170-
let bytes = text.as_bytes();
171-
let take = bytes.len().min(n);
172-
String::from_utf8_lossy(&bytes[..take]).to_string()
173+
text.chars().take(n).collect()
173174
}
174175

175176
/// Parse arguments for tail command, including +N "from start" syntax.

crates/bashkit/src/builtins/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,13 @@ pub(crate) async fn read_text_file(
220220
.await
221221
.map_err(|e| ExecResult::err(format!("{cmd_name}: {}: {e}\n", path.display()), 1))?;
222222

223+
// Binary device files (/dev/urandom, /dev/random): preserve raw bytes as
224+
// Latin-1 (ISO 8859-1) so each byte 0x00-0xFF maps 1:1 to a char.
225+
// This lets `tr -dc 'a-z0-9' < /dev/urandom | head -c N` work correctly.
226+
if path == Path::new("/dev/urandom") || path == Path::new("/dev/random") {
227+
return Ok(content.iter().map(|&b| b as char).collect());
228+
}
229+
223230
Ok(String::from_utf8_lossy(&content).into_owned())
224231
}
225232

crates/bashkit/tests/urandom_tests.rs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,46 @@ async fn urandom_no_replacement_chars() {
1919
&hex[..hex.len().min(60)]
2020
);
2121
}
22+
23+
/// Issue #811: head -c N /dev/urandom should return exactly N chars
24+
/// (each original byte maps to one char in the Latin-1 model)
25+
#[tokio::test]
26+
async fn urandom_head_char_count() {
27+
let mut bash = Bash::new();
28+
for n in [1, 4, 8, 16, 32] {
29+
let result = bash
30+
.exec(&format!("head -c {n} /dev/urandom | wc -m"))
31+
.await
32+
.unwrap();
33+
let count: usize = result.stdout.trim().parse().unwrap_or(0);
34+
assert_eq!(
35+
count, n,
36+
"head -c {n} /dev/urandom | wc -m should produce exactly {n} chars"
37+
);
38+
}
39+
}
40+
41+
/// Issue #811: tr -dc 'a-z0-9' < /dev/urandom | head -c 8 should produce 8 alphanumeric chars
42+
#[tokio::test]
43+
async fn urandom_tr_filter_alphanumeric() {
44+
let mut bash = Bash::new();
45+
let result = bash
46+
.exec("LC_ALL=C tr -dc 'a-z0-9' < /dev/urandom | head -c 8")
47+
.await
48+
.unwrap();
49+
let output = result.stdout.trim();
50+
assert_eq!(
51+
output.len(),
52+
8,
53+
"Should produce exactly 8 chars, got {}: {:?}",
54+
output.len(),
55+
output
56+
);
57+
assert!(
58+
output
59+
.chars()
60+
.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit()),
61+
"All chars should be a-z0-9, got: {:?}",
62+
output
63+
);
64+
}

0 commit comments

Comments
 (0)