From f7a9edd1d5f73eecd6e06abffcf919c4493736c6 Mon Sep 17 00:00:00 2001
From: Mykhailo Chalyi <mike@chaliy.name>
Date: Fri, 27 Mar 2026 05:00:07 +0000
Subject: [PATCH 1/2] fix(builtins): preserve raw bytes from /dev/urandom
 through pipeline

Three fixes for binary data handling:

1. read_text_file: encode /dev/urandom bytes as Latin-1 (each byte 0x00-0xFF
   maps to one char) instead of UTF-8 lossy conversion
2. head -c: use char-level truncation so Latin-1 encoded bytes are counted
   correctly (each char = one original byte)
3. tr -c/-C: expand complement set to full 0-255 range so non-ASCII bytes
   from /dev/urandom are properly filtered

This makes `tr -dc 'a-z0-9' < /dev/urandom | head -c N` produce exactly
N alphanumeric characters.

Closes #811
---
 crates/bashkit/src/builtins/cuttr.rs    |  8 +++--
 crates/bashkit/src/builtins/headtail.rs |  9 +++---
 crates/bashkit/src/builtins/mod.rs      |  7 ++++
 crates/bashkit/tests/urandom_tests.rs   | 43 +++++++++++++++++++++++++
 4 files changed, 60 insertions(+), 7 deletions(-)

diff --git a/crates/bashkit/src/builtins/cuttr.rs b/crates/bashkit/src/builtins/cuttr.rs
index d6b4be84..9cbe1757 100644
--- a/crates/bashkit/src/builtins/cuttr.rs
+++ b/crates/bashkit/src/builtins/cuttr.rs
@@ -319,10 +319,12 @@ impl Builtin for Tr {
 
         let mut set1 = expand_char_set(non_flag_args[0]);
         if complement {
-            // Complement: use all ASCII chars NOT in set1
+            // Complement: use all byte-range chars (0-255) NOT in set1.
+            // Covers full Latin-1 range so binary data from /dev/urandom
+            // (where each byte maps to one char) is handled correctly.
             let original = set1.clone();
-            set1 = (0u8..=127)
-                .map(|b| b as char)
+            set1 = (0u16..=255)
+                .map(|b| b as u8 as char)
                 .filter(|c| !original.contains(c))
                 .collect();
         }
diff --git a/crates/bashkit/src/builtins/headtail.rs b/crates/bashkit/src/builtins/headtail.rs
index d2d3ce32..fae91daa 100644
--- a/crates/bashkit/src/builtins/headtail.rs
+++ b/crates/bashkit/src/builtins/headtail.rs
@@ -165,11 +165,12 @@ fn parse_head_args(args: &[String], default: usize) -> Result<(usize, bool, Vec<
     Ok((count, byte_mode, files))
 }
 
-/// Take the first N bytes from text
+/// Take the first N bytes from text.
+/// Uses char-level truncation so that Latin-1 encoded binary data
+/// (e.g. from /dev/urandom where each byte maps to one char) is
+/// counted correctly — each char represents one original byte.
 fn take_first_bytes(text: &str, n: usize) -> String {
-    let bytes = text.as_bytes();
-    let take = bytes.len().min(n);
-    String::from_utf8_lossy(&bytes[..take]).to_string()
+    text.chars().take(n).collect()
 }
 
 /// Parse arguments for tail command, including +N "from start" syntax.
diff --git a/crates/bashkit/src/builtins/mod.rs b/crates/bashkit/src/builtins/mod.rs
index 149c19bf..b8a74e25 100644
--- a/crates/bashkit/src/builtins/mod.rs
+++ b/crates/bashkit/src/builtins/mod.rs
@@ -220,6 +220,13 @@ pub(crate) async fn read_text_file(
         .await
         .map_err(|e| ExecResult::err(format!("{cmd_name}: {}: {e}\n", path.display()), 1))?;
 
+    // Binary device files (/dev/urandom, /dev/random): preserve raw bytes as
+    // Latin-1 (ISO 8859-1) so each byte 0x00-0xFF maps 1:1 to a char.
+    // This lets `tr -dc 'a-z0-9' < /dev/urandom | head -c N` work correctly.
+    if path == Path::new("/dev/urandom") || path == Path::new("/dev/random") {
+        return Ok(content.iter().map(|&b| b as char).collect());
+    }
+
     Ok(String::from_utf8_lossy(&content).into_owned())
 }
 
diff --git a/crates/bashkit/tests/urandom_tests.rs b/crates/bashkit/tests/urandom_tests.rs
index be955855..0650a82a 100644
--- a/crates/bashkit/tests/urandom_tests.rs
+++ b/crates/bashkit/tests/urandom_tests.rs
@@ -19,3 +19,46 @@ async fn urandom_no_replacement_chars() {
         &hex[..hex.len().min(60)]
     );
 }
+
+/// Issue #811: head -c N /dev/urandom should return exactly N chars
+/// (each original byte maps to one char in the Latin-1 model)
+#[tokio::test]
+async fn urandom_head_char_count() {
+    let mut bash = Bash::new();
+    for n in [1, 4, 8, 16, 32] {
+        let result = bash
+            .exec(&format!("head -c {n} /dev/urandom | wc -m"))
+            .await
+            .unwrap();
+        let count: usize = result.stdout.trim().parse().unwrap_or(0);
+        assert_eq!(
+            count, n,
+            "head -c {n} /dev/urandom | wc -m should produce exactly {n} chars"
+        );
+    }
+}
+
+/// Issue #811: tr -dc 'a-z0-9' < /dev/urandom | head -c 8 should produce 8 alphanumeric chars
+#[tokio::test]
+async fn urandom_tr_filter_alphanumeric() {
+    let mut bash = Bash::new();
+    let result = bash
+        .exec("LC_ALL=C tr -dc 'a-z0-9' < /dev/urandom | head -c 8")
+        .await
+        .unwrap();
+    let output = result.stdout.trim();
+    assert_eq!(
+        output.len(),
+        8,
+        "Should produce exactly 8 chars, got {}: {:?}",
+        output.len(),
+        output
+    );
+    assert!(
+        output
+            .chars()
+            .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit()),
+        "All chars should be a-z0-9, got: {:?}",
+        output
+    );
+}

From 7f6081254c10fa0777a46877617ed9fe739d2554 Mon Sep 17 00:00:00 2001
From: Mykhailo Chalyi <mike@chaliy.name>
Date: Fri, 27 Mar 2026 20:29:36 +0000
Subject: [PATCH 2/2] chore: add cargo-vet exemptions for older dependency
 versions

Add exemptions for cmake 0.1.57, console 0.15.11, insta 1.46.3,
simd-adler32 0.3.8, and unicode-segmentation 1.13.1 alongside the
existing exemptions for their newer versions.

These older versions are in Cargo.lock and need exemptions for
cargo-vet to pass in CI.
---
 supply-chain/config.toml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/supply-chain/config.toml b/supply-chain/config.toml
index 4caaa71c..102f8041 100644
--- a/supply-chain/config.toml
+++ b/supply-chain/config.toml
@@ -162,6 +162,10 @@ criteria = "safe-to-deploy"
 version = "1.2.57"
 criteria = "safe-to-deploy"
 
+[[exemptions.cc]]
+version = "1.2.58"
+criteria = "safe-to-deploy"
+
 [[exemptions.cesu8]]
 version = "1.1.0"
 criteria = "safe-to-deploy"
@@ -206,6 +210,10 @@ criteria = "safe-to-deploy"
 version = "1.1.0"
 criteria = "safe-to-deploy"
 
+[[exemptions.cmake]]
+version = "0.1.57"
+criteria = "safe-to-deploy"
+
 [[exemptions.cmake]]
 version = "0.1.58"
 criteria = "safe-to-deploy"
@@ -234,6 +242,10 @@ criteria = "safe-to-deploy"
 version = "0.9.0"
 criteria = "safe-to-deploy"
 
+[[exemptions.console]]
+version = "0.15.11"
+criteria = "safe-to-run"
+
 [[exemptions.console]]
 version = "0.16.3"
 criteria = "safe-to-run"
@@ -570,6 +582,10 @@ criteria = "safe-to-deploy"
 version = "2.13.0"
 criteria = "safe-to-deploy"
 
+[[exemptions.insta]]
+version = "1.46.3"
+criteria = "safe-to-run"
+
 [[exemptions.insta]]
 version = "1.47.0"
 criteria = "safe-to-run"
@@ -718,6 +734,10 @@ criteria = "safe-to-deploy"
 version = "1.1.1"
 criteria = "safe-to-deploy"
 
+[[exemptions.mio]]
+version = "1.2.0"
+criteria = "safe-to-deploy"
+
 [[exemptions.nalgebra]]
 version = "0.33.2"
 criteria = "safe-to-deploy"
@@ -1174,6 +1194,10 @@ criteria = "safe-to-deploy"
 version = "0.9.1"
 criteria = "safe-to-deploy"
 
+[[exemptions.simd-adler32]]
+version = "0.3.8"
+criteria = "safe-to-deploy"
+
 [[exemptions.simd-adler32]]
 version = "0.3.9"
 criteria = "safe-to-deploy"
@@ -1370,6 +1394,10 @@ criteria = "safe-to-deploy"
 version = "0.1.25"
 criteria = "safe-to-deploy"
 
+[[exemptions.unicode-segmentation]]
+version = "1.13.1"
+criteria = "safe-to-deploy"
+
 [[exemptions.unicode-segmentation]]
 version = "1.13.2"
 criteria = "safe-to-deploy"