From f7a9edd1d5f73eecd6e06abffcf919c4493736c6 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 27 Mar 2026 05:00:07 +0000 Subject: [PATCH 1/2] fix(builtins): preserve raw bytes from /dev/urandom through pipeline Three fixes for binary data handling: 1. read_text_file: encode /dev/urandom bytes as Latin-1 (each byte 0x00-0xFF maps to one char) instead of UTF-8 lossy conversion 2. head -c: use char-level truncation so Latin-1 encoded bytes are counted correctly (each char = one original byte) 3. tr -c/-C: expand complement set to full 0-255 range so non-ASCII bytes from /dev/urandom are properly filtered This makes `tr -dc 'a-z0-9' < /dev/urandom | head -c N` produce exactly N alphanumeric characters. Closes #811 --- crates/bashkit/src/builtins/cuttr.rs | 8 +++-- crates/bashkit/src/builtins/headtail.rs | 9 +++--- crates/bashkit/src/builtins/mod.rs | 7 ++++ crates/bashkit/tests/urandom_tests.rs | 43 +++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 7 deletions(-) diff --git a/crates/bashkit/src/builtins/cuttr.rs b/crates/bashkit/src/builtins/cuttr.rs index d6b4be84..9cbe1757 100644 --- a/crates/bashkit/src/builtins/cuttr.rs +++ b/crates/bashkit/src/builtins/cuttr.rs @@ -319,10 +319,12 @@ impl Builtin for Tr { let mut set1 = expand_char_set(non_flag_args[0]); if complement { - // Complement: use all ASCII chars NOT in set1 + // Complement: use all byte-range chars (0-255) NOT in set1. + // Covers full Latin-1 range so binary data from /dev/urandom + // (where each byte maps to one char) is handled correctly. let original = set1.clone(); - set1 = (0u8..=127) - .map(|b| b as char) + set1 = (0u16..=255) + .map(|b| b as u8 as char) .filter(|c| !original.contains(c)) .collect(); } diff --git a/crates/bashkit/src/builtins/headtail.rs b/crates/bashkit/src/builtins/headtail.rs index d2d3ce32..fae91daa 100644 --- a/crates/bashkit/src/builtins/headtail.rs +++ b/crates/bashkit/src/builtins/headtail.rs @@ -165,11 +165,12 @@ fn parse_head_args(args: &[String], default: usize) -> Result<(usize, bool, Vec< Ok((count, byte_mode, files)) } -/// Take the first N bytes from text +/// Take the first N bytes from text. +/// Uses char-level truncation so that Latin-1 encoded binary data +/// (e.g. from /dev/urandom where each byte maps to one char) is +/// counted correctly — each char represents one original byte. fn take_first_bytes(text: &str, n: usize) -> String { - let bytes = text.as_bytes(); - let take = bytes.len().min(n); - String::from_utf8_lossy(&bytes[..take]).to_string() + text.chars().take(n).collect() } /// Parse arguments for tail command, including +N "from start" syntax. diff --git a/crates/bashkit/src/builtins/mod.rs b/crates/bashkit/src/builtins/mod.rs index 149c19bf..b8a74e25 100644 --- a/crates/bashkit/src/builtins/mod.rs +++ b/crates/bashkit/src/builtins/mod.rs @@ -220,6 +220,13 @@ pub(crate) async fn read_text_file( .await .map_err(|e| ExecResult::err(format!("{cmd_name}: {}: {e}\n", path.display()), 1))?; + // Binary device files (/dev/urandom, /dev/random): preserve raw bytes as + // Latin-1 (ISO 8859-1) so each byte 0x00-0xFF maps 1:1 to a char. + // This lets `tr -dc 'a-z0-9' < /dev/urandom | head -c N` work correctly. + if path == Path::new("/dev/urandom") || path == Path::new("/dev/random") { + return Ok(content.iter().map(|&b| b as char).collect()); + } + Ok(String::from_utf8_lossy(&content).into_owned()) } diff --git a/crates/bashkit/tests/urandom_tests.rs b/crates/bashkit/tests/urandom_tests.rs index be955855..0650a82a 100644 --- a/crates/bashkit/tests/urandom_tests.rs +++ b/crates/bashkit/tests/urandom_tests.rs @@ -19,3 +19,46 @@ async fn urandom_no_replacement_chars() { &hex[..hex.len().min(60)] ); } + +/// Issue #811: head -c N /dev/urandom should return exactly N chars +/// (each original byte maps to one char in the Latin-1 model) +#[tokio::test] +async fn urandom_head_char_count() { + let mut bash = Bash::new(); + for n in [1, 4, 8, 16, 32] { + let result = bash + .exec(&format!("head -c {n} /dev/urandom | wc -m")) + .await + .unwrap(); + let count: usize = result.stdout.trim().parse().unwrap_or(0); + assert_eq!( + count, n, + "head -c {n} /dev/urandom | wc -m should produce exactly {n} chars" + ); + } +} + +/// Issue #811: tr -dc 'a-z0-9' < /dev/urandom | head -c 8 should produce 8 alphanumeric chars +#[tokio::test] +async fn urandom_tr_filter_alphanumeric() { + let mut bash = Bash::new(); + let result = bash + .exec("LC_ALL=C tr -dc 'a-z0-9' < /dev/urandom | head -c 8") + .await + .unwrap(); + let output = result.stdout.trim(); + assert_eq!( + output.len(), + 8, + "Should produce exactly 8 chars, got {}: {:?}", + output.len(), + output + ); + assert!( + output + .chars() + .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit()), + "All chars should be a-z0-9, got: {:?}", + output + ); +} From 7f6081254c10fa0777a46877617ed9fe739d2554 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Fri, 27 Mar 2026 20:29:36 +0000 Subject: [PATCH 2/2] chore: add cargo-vet exemptions for older dependency versions Add exemptions for cmake 0.1.57, console 0.15.11, insta 1.46.3, simd-adler32 0.3.8, and unicode-segmentation 1.13.1 alongside the existing exemptions for their newer versions. These older versions are in Cargo.lock and need exemptions for cargo-vet to pass in CI. --- supply-chain/config.toml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/supply-chain/config.toml b/supply-chain/config.toml index 4caaa71c..102f8041 100644 --- a/supply-chain/config.toml +++ b/supply-chain/config.toml @@ -162,6 +162,10 @@ criteria = "safe-to-deploy" version = "1.2.57" criteria = "safe-to-deploy" +[[exemptions.cc]] +version = "1.2.58" +criteria = "safe-to-deploy" + [[exemptions.cesu8]] version = "1.1.0" criteria = "safe-to-deploy" @@ -206,6 +210,10 @@ criteria = "safe-to-deploy" version = "1.1.0" criteria = "safe-to-deploy" +[[exemptions.cmake]] +version = "0.1.57" +criteria = "safe-to-deploy" + [[exemptions.cmake]] version = "0.1.58" criteria = "safe-to-deploy" @@ -234,6 +242,10 @@ criteria = "safe-to-deploy" version = "0.9.0" criteria = "safe-to-deploy" +[[exemptions.console]] +version = "0.15.11" +criteria = "safe-to-run" + [[exemptions.console]] version = "0.16.3" criteria = "safe-to-run" @@ -570,6 +582,10 @@ criteria = "safe-to-deploy" version = "2.13.0" criteria = "safe-to-deploy" +[[exemptions.insta]] +version = "1.46.3" +criteria = "safe-to-run" + [[exemptions.insta]] version = "1.47.0" criteria = "safe-to-run" @@ -718,6 +734,10 @@ criteria = "safe-to-deploy" version = "1.1.1" criteria = "safe-to-deploy" +[[exemptions.mio]] +version = "1.2.0" +criteria = "safe-to-deploy" + [[exemptions.nalgebra]] version = "0.33.2" criteria = "safe-to-deploy" @@ -1174,6 +1194,10 @@ criteria = "safe-to-deploy" version = "0.9.1" criteria = "safe-to-deploy" +[[exemptions.simd-adler32]] +version = "0.3.8" +criteria = "safe-to-deploy" + [[exemptions.simd-adler32]] version = "0.3.9" criteria = "safe-to-deploy" @@ -1370,6 +1394,10 @@ criteria = "safe-to-deploy" version = "0.1.25" criteria = "safe-to-deploy" +[[exemptions.unicode-segmentation]] +version = "1.13.1" +criteria = "safe-to-deploy" + [[exemptions.unicode-segmentation]] version = "1.13.2" criteria = "safe-to-deploy"