From d5f3a7fb40507769337c2fe337d0bb1e478d3687 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agust=C3=ADn=20Gonz=C3=A1lez?= Date: Thu, 13 Nov 2025 13:16:46 -0300 Subject: [PATCH 1/3] Add unit tests reproducing #69 --- CMakeLists.txt | 2 ++ unittest/test_simple16.cpp | 16 ++++++++++++++++ unittest/test_simple8b.cpp | 16 ++++++++++++++++ unittest/util.h | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+) create mode 100644 unittest/test_simple16.cpp create mode 100644 unittest/test_simple8b.cpp create mode 100644 unittest/util.h diff --git a/CMakeLists.txt b/CMakeLists.txt index c5751a6..2df211e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -173,6 +173,8 @@ if(FASTPFOR_WITH_TEST) unittest/test_composite.cpp unittest/test_driver.cpp unittest/test_fastpfor.cpp + unittest/test_simple8b.cpp + unittest/test_simple16.cpp unittest/test_variablebyte.cpp) target_link_libraries(FastPFOR_unittest gtest FastPFOR) enable_testing() diff --git a/unittest/test_simple16.cpp b/unittest/test_simple16.cpp new file mode 100644 index 0000000..6427cbf --- /dev/null +++ b/unittest/test_simple16.cpp @@ -0,0 +1,16 @@ +#include + +#include "simple16.h" +#include "util.h" + +namespace FastPForLib { + +TEST(Simple16Test, DecodesWithUnknownLength) { + Simple16 codec; + std::vector in; + for (uint32_t i = 0; i < 128; ++i) { + in.push_back(i); + } + verifyUnknownInputLengthDecode(codec, in); +} +} // namespace FastPForLib diff --git a/unittest/test_simple8b.cpp b/unittest/test_simple8b.cpp new file mode 100644 index 0000000..f999102 --- /dev/null +++ b/unittest/test_simple8b.cpp @@ -0,0 +1,16 @@ +#include + +#include "simple8b.h" +#include "util.h" + +namespace FastPForLib { + +TEST(Simple8bTest, DecodesWithUnknownLength) { + Simple8b codec; + std::vector in; + for (uint32_t i = 0; i < 128; ++i) { + in.push_back(i); + } + verifyUnknownInputLengthDecode(codec, in); +} +} // namespace FastPForLib diff --git a/unittest/util.h b/unittest/util.h new file mode 100644 index 0000000..910ba49 --- /dev/null +++ b/unittest/util.h @@ -0,0 +1,36 @@ +#ifndef FASTPFORLIB_UTIL_H_ +#define FASTPFORLIB_UTIL_H_ + +#include + +#include "gtest/gtest.h" + +namespace FastPForLib { + +template +void verifyUnknownInputLengthDecode(Codec &codec, const std::vector &in) { + std::vector encoded(in.size() * 2, 0); + size_t encodedSize; + codec.encodeArray(in.data(), in.size(), encoded.data(), encodedSize); + encoded.resize(encodedSize); + + std::vector decoded(in.size(), 0); + size_t n = in.size(); + const uint32_t *decodedUntil = + codec.decodeArray(encoded.data(), 0, decoded.data(), n); + + // Check that the decoded size matches the input size. + EXPECT_EQ(n, in.size()); + + // Check that the returned pointer matches the end of the encoded buffer. + EXPECT_EQ(decodedUntil, encoded.data() + encodedSize); + + // Check each decoded element matches its corresponding input value. + for (size_t i = 0; i < in.size(); ++i) { + EXPECT_EQ(in[i], decoded[i]); + } +} + +} // namespace FastPForLib + +#endif // FASTPFORLIB_UTIL_H_ From eb74a8d384e7e731178cb88413e3c8c935609ce1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agust=C3=ADn=20Gonz=C3=A1lez?= Date: Thu, 13 Nov 2025 14:08:44 -0300 Subject: [PATCH 2/3] Fix debug assert when input lenght is unknown (0) --- headers/codecs.h | 4 ++++ headers/simple16.h | 2 +- headers/simple8b.h | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/headers/codecs.h b/headers/codecs.h index adfcec1..a8681c5 100644 --- a/headers/codecs.h +++ b/headers/codecs.h @@ -53,6 +53,10 @@ class IntegerCODEC { * of the variable nvalue gets updated with the number actually use * (if nvalue exceeds the original value, there might be a buffer * overrun). + * + * NOTE: Decoding can be performed with an unknown input length. This + * case is indicated by a length of 0; however, nvalue must be provided + * in order for the decoder knows how many values to decode. */ virtual const uint32_t *decodeArray(const uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue) = 0; diff --git a/headers/simple16.h b/headers/simple16.h index cd4e06a..e8e3423 100644 --- a/headers/simple16.h +++ b/headers/simple16.h @@ -745,7 +745,7 @@ const uint32_t *Simple16::decodeArray(const uint32_t *in, printf("simple16 stats[%u]=%f\n", k, stats[k] * 1.0 / sum); } #endif - ASSERT(in <= endin, std::to_string(in - endin)); + ASSERT(len == 0 || in <= endin, std::to_string(in - endin)); return in; } diff --git a/headers/simple8b.h b/headers/simple8b.h index 3bb2a5d..4e4874c 100644 --- a/headers/simple8b.h +++ b/headers/simple8b.h @@ -637,9 +637,9 @@ const uint32_t *Simple8b::decodeArray(const uint32_t *in, printf("simple8b stats[%u]=%f\n", k, stats[k] * 1.0 / sum); } #endif - assert(in64 <= finalin64); + assert(len == 0 || in64 <= finalin64); in = reinterpret_cast(in64); - assert(in <= endin); + assert(len == 0 || in <= endin); // check that we don't overrun the buffer too much? ASSERT(out < end + 240, std::to_string(out - end)); nvalue = MarkLength ? actualvalue : out - initout; From cead07625b23062b61e83b3fa30a6c2f96e9d96a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Agust=C3=ADn=20Gonz=C3=A1lez?= Date: Sat, 15 Nov 2025 15:28:56 -0300 Subject: [PATCH 3/3] Fix buffer overrun in `test_simple16.cpp` --- unittest/test_simple16.cpp | 6 +++++- unittest/test_simple8b.cpp | 4 +++- unittest/util.h | 6 ++++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/unittest/test_simple16.cpp b/unittest/test_simple16.cpp index 6427cbf..333bcb6 100644 --- a/unittest/test_simple16.cpp +++ b/unittest/test_simple16.cpp @@ -11,6 +11,10 @@ TEST(Simple16Test, DecodesWithUnknownLength) { for (uint32_t i = 0; i < 128; ++i) { in.push_back(i); } - verifyUnknownInputLengthDecode(codec, in); + + // Simple16 may overrun the output buffer regardless of `n`, so a headroom of + // 28 (the maximum number of elements in a single pack) is added. + std::vector decoded(in.size() + 28, 0); + verifyUnknownInputLengthDecode(codec, in, decoded); } } // namespace FastPForLib diff --git a/unittest/test_simple8b.cpp b/unittest/test_simple8b.cpp index f999102..00f84dc 100644 --- a/unittest/test_simple8b.cpp +++ b/unittest/test_simple8b.cpp @@ -11,6 +11,8 @@ TEST(Simple8bTest, DecodesWithUnknownLength) { for (uint32_t i = 0; i < 128; ++i) { in.push_back(i); } - verifyUnknownInputLengthDecode(codec, in); + + std::vector decoded(in.size(), 0); + verifyUnknownInputLengthDecode(codec, in, decoded); } } // namespace FastPForLib diff --git a/unittest/util.h b/unittest/util.h index 910ba49..4316d15 100644 --- a/unittest/util.h +++ b/unittest/util.h @@ -8,16 +8,18 @@ namespace FastPForLib { template -void verifyUnknownInputLengthDecode(Codec &codec, const std::vector &in) { +void verifyUnknownInputLengthDecode(Codec &codec, + const std::vector &in, + std::vector &decoded) { std::vector encoded(in.size() * 2, 0); size_t encodedSize; codec.encodeArray(in.data(), in.size(), encoded.data(), encodedSize); encoded.resize(encodedSize); - std::vector decoded(in.size(), 0); size_t n = in.size(); const uint32_t *decodedUntil = codec.decodeArray(encoded.data(), 0, decoded.data(), n); + decoded.resize(n); // Check that the decoded size matches the input size. EXPECT_EQ(n, in.size());