From 0d62c145bf7119a5a369132960ba3efc11821906 Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Thu, 19 Mar 2026 16:46:49 -0400 Subject: [PATCH 1/4] test aarch64 crashes --- src/cpp/mod.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/cpp/mod.rs b/src/cpp/mod.rs index e672ecc..e2124e3 100644 --- a/src/cpp/mod.rs +++ b/src/cpp/mod.rs @@ -479,4 +479,35 @@ mod tests { assert_eq!(decoded, input); } + + // Regression: Simple8bRleCodec internally reinterpret_cast's the uint32_t* + // output buffer to uint64_t*, writing 64-bit words directly. On ARM64 + // (strict-alignment architectures) a 64-bit store to a 4-byte-aligned + // (but not 8-byte-aligned) address is undefined behaviour and causes + // SIGSEGV. Reproduce the exact pattern: pass a slice starting at + // output[1], which is at a +4-byte offset from the allocation and is + // therefore 4-byte aligned but NOT 8-byte aligned. + // + // This matches the calling convention introduced in the block-size branch + // (encode32_to_vec pushes a u32 header first, then passes &mut out[1..]). + #[test] + fn test_simple8b_rle_encode_at_u32_offset() { + let codec = Simple8bRleCodec::new(); + // 128 small values to exercise the codec properly (same as block-size + // test_anylen_128 input; fewer values may not trigger the misaligned + // write because the codec emits fewer 64-bit output words). + let input: Vec = (1u32..=128).collect(); + + // Allocate one extra leading u32 so that buf[1..] is at offset +4. + let capacity = input.len() * 2 + 1024; + let mut buf = vec![0u32; 1 + capacity]; + // buf[0] acts as the header word (as in encode32_to_vec); buf[1..] is + // the misaligned encode target. + let encoded = codec.encode32(&input, &mut buf[1..]).unwrap(); + let encoded_len = encoded.len(); + + let mut decoded = vec![0u32; input.len()]; + let decoded_slice = codec.decode32(&buf[1..1 + encoded_len], &mut decoded).unwrap(); + assert_eq!(decoded_slice, input.as_slice()); + } } From 09364f49b32f1a816258d651f017a7b5e9660cc8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 19 Mar 2026 20:47:07 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/cpp/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cpp/mod.rs b/src/cpp/mod.rs index e2124e3..e3cc55a 100644 --- a/src/cpp/mod.rs +++ b/src/cpp/mod.rs @@ -507,7 +507,9 @@ mod tests { let encoded_len = encoded.len(); let mut decoded = vec![0u32; input.len()]; - let decoded_slice = codec.decode32(&buf[1..1 + encoded_len], &mut decoded).unwrap(); + let decoded_slice = codec + .decode32(&buf[1..1 + encoded_len], &mut decoded) + .unwrap(); assert_eq!(decoded_slice, input.as_slice()); } } From 4ae5a2128d394f4898a960e11b37cd4c0d6aace4 Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Thu, 19 Mar 2026 16:49:42 -0400 Subject: [PATCH 3/4] lint --- src/cpp/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpp/mod.rs b/src/cpp/mod.rs index e3cc55a..dc22052 100644 --- a/src/cpp/mod.rs +++ b/src/cpp/mod.rs @@ -508,7 +508,7 @@ mod tests { let mut decoded = vec![0u32; input.len()]; let decoded_slice = codec - .decode32(&buf[1..1 + encoded_len], &mut decoded) + .decode32(&buf[1..=encoded_len], &mut decoded) .unwrap(); assert_eq!(decoded_slice, input.as_slice()); } From ca4c8c0b1483fa3396f34a107bd4a74a07666630 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 19 Mar 2026 20:50:14 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/cpp/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/cpp/mod.rs b/src/cpp/mod.rs index dc22052..7b66abd 100644 --- a/src/cpp/mod.rs +++ b/src/cpp/mod.rs @@ -507,9 +507,7 @@ mod tests { let encoded_len = encoded.len(); let mut decoded = vec![0u32; input.len()]; - let decoded_slice = codec - .decode32(&buf[1..=encoded_len], &mut decoded) - .unwrap(); + let decoded_slice = codec.decode32(&buf[1..=encoded_len], &mut decoded).unwrap(); assert_eq!(decoded_slice, input.as_slice()); } }