From 8339284939456585ad25fb7e4c272e7bf9950fc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Tue, 19 Nov 2024 16:11:56 +0100 Subject: [PATCH 01/15] bench: measure block (size)serialization speed Measure both full block serialization and size computation via `SizeComputer`. `SizeComputer` returns the exact final size of the serialized content without writing any bytes. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='SizeComputerBlock|SerializeBlock' --min-time=10000 > C compiler ............................ AppleClang 16.0.0.16000026 | ns/block | block/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 195,610.62 | 5,112.20 | 0.3% | 11.00 | `SerializeBlock` | 12,061.83 | 82,906.19 | 0.1% | 11.01 | `SizeComputerBlock` > C++ compiler .......................... GNU 13.3.0 | ns/block | block/s | err% | ins/block | cyc/block | IPC | bra/block | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 867,857.55 | 1,152.26 | 0.0% | 8,015,883.90 | 3,116,099.08 | 2.572 | 1,517,035.87 | 0.5% | 10.81 | `SerializeBlock` | 30,928.27 | 32,332.88 | 0.0% | 221,683.03 | 111,055.84 | 1.996 | 53,037.03 | 0.8% | 11.03 | `SizeComputerBlock` --- src/bench/checkblock.cpp | 33 +++++++++++++++++++++++++++++---- src/streams.h | 1 + 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/bench/checkblock.cpp b/src/bench/checkblock.cpp index 765b8b0dadcd..128a37e6608a 100644 --- a/src/bench/checkblock.cpp +++ b/src/bench/checkblock.cpp @@ -21,11 +21,34 @@ #include #include +static void SizeComputerBlock(benchmark::Bench& bench) { + CBlock block; + DataStream(benchmark::data::block413567) >> TX_WITH_WITNESS(block); + + bench.unit("block").run([&] { + SizeComputer size_computer; + size_computer << TX_WITH_WITNESS(block); + assert(size_computer.size() == benchmark::data::block413567.size()); + }); +} + +static void SerializeBlock(benchmark::Bench& bench) { + CBlock block; + DataStream(benchmark::data::block413567) >> TX_WITH_WITNESS(block); + + // Create output stream and verify first serialization matches input + bench.unit("block").run([&] { + DataStream output_stream(benchmark::data::block413567.size()); + output_stream << TX_WITH_WITNESS(block); + assert(output_stream.size() == benchmark::data::block413567.size()); + }); +} + // These are the two major time-sinks which happen after we have fully received // a block off the wire, but before we can relay the block on to peers using // compact block relay. -static void DeserializeBlockTest(benchmark::Bench& bench) +static void DeserializeBlock(benchmark::Bench& bench) { DataStream stream(benchmark::data::block413567); std::byte a{0}; @@ -39,7 +62,7 @@ static void DeserializeBlockTest(benchmark::Bench& bench) }); } -static void DeserializeAndCheckBlockTest(benchmark::Bench& bench) +static void DeserializeAndCheckBlock(benchmark::Bench& bench) { DataStream stream(benchmark::data::block413567); std::byte a{0}; @@ -60,5 +83,7 @@ static void DeserializeAndCheckBlockTest(benchmark::Bench& bench) }); } -BENCHMARK(DeserializeBlockTest); -BENCHMARK(DeserializeAndCheckBlockTest); +BENCHMARK(SizeComputerBlock); +BENCHMARK(SerializeBlock); +BENCHMARK(DeserializeBlock); +BENCHMARK(DeserializeAndCheckBlock); diff --git a/src/streams.h b/src/streams.h index f70adcf74a71..346984259c15 100644 --- a/src/streams.h +++ b/src/streams.h @@ -148,6 +148,7 @@ class DataStream typedef vector_type::reverse_iterator reverse_iterator; explicit DataStream() = default; + explicit DataStream(size_type n) { reserve(n); } explicit DataStream(std::span sp) : DataStream{std::as_bytes(sp)} {} explicit DataStream(std::span sp) : vch(sp.data(), sp.data() + sp.size()) {} From 9d3fcb28eee0115047446a4c628533cbfb091e43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 17 Jan 2025 14:29:33 +0100 Subject: [PATCH 02/15] refactor: reduce template bloat in primitive serialization Merged multiple template methods into single constexpr-delimited implementation to reduce template bloat (i.e. related functionality is grouped into a single method, but can be optimized because of C++20 constexpr conditions). This unifies related methods that were only bound before by similar signatures - and enables `SizeComputer` optimizations later --- src/serialize.h | 72 ++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index 21b3325f7a7a..57eb373111c6 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -240,41 +240,47 @@ const Out& AsBase(const In& x) template concept CharNotInt8 = std::same_as && !std::same_as; -// clang-format off +template +concept ByteOrIntegral = std::is_same_v || + (std::is_integral_v && !std::is_same_v); + template void Serialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t -template void Serialize(Stream& s, std::byte a) { ser_writedata8(s, uint8_t(a)); } -template void Serialize(Stream& s, int8_t a) { ser_writedata8(s, uint8_t(a)); } -template void Serialize(Stream& s, uint8_t a) { ser_writedata8(s, a); } -template void Serialize(Stream& s, int16_t a) { ser_writedata16(s, uint16_t(a)); } -template void Serialize(Stream& s, uint16_t a) { ser_writedata16(s, a); } -template void Serialize(Stream& s, int32_t a) { ser_writedata32(s, uint32_t(a)); } -template void Serialize(Stream& s, uint32_t a) { ser_writedata32(s, a); } -template void Serialize(Stream& s, int64_t a) { ser_writedata64(s, uint64_t(a)); } -template void Serialize(Stream& s, uint64_t a) { ser_writedata64(s, a); } - -template void Serialize(Stream& s, const B (&a)[N]) { s.write(MakeByteSpan(a)); } -template void Serialize(Stream& s, const std::array& a) { s.write(MakeByteSpan(a)); } -template void Serialize(Stream& s, std::span span) { s.write(std::as_bytes(span)); } -template void Serialize(Stream& s, std::span span) { s.write(std::as_bytes(span)); } +template void Serialize(Stream& s, T a) +{ + if constexpr (sizeof(T) == 1) { + ser_writedata8(s, static_cast(a)); // (u)int8_t or std::byte or bool + } else if constexpr (sizeof(T) == 2) { + ser_writedata16(s, static_cast(a)); // (u)int16_t + } else if constexpr (sizeof(T) == 4) { + ser_writedata32(s, static_cast(a)); // (u)int32_t + } else { + static_assert(sizeof(T) == 8); + ser_writedata64(s, static_cast(a)); // (u)int64_t + } +} +template void Serialize(Stream& s, const B (&a)[N]) { s.write(MakeByteSpan(a)); } +template void Serialize(Stream& s, const std::array& a) { s.write(MakeByteSpan(a)); } +template void Serialize(Stream& s, std::span span) { s.write(std::as_bytes(span)); } +template void Serialize(Stream& s, std::span span) { s.write(std::as_bytes(span)); } template void Unserialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t -template void Unserialize(Stream& s, std::byte& a) { a = std::byte(ser_readdata8(s)); } -template void Unserialize(Stream& s, int8_t& a) { a = int8_t(ser_readdata8(s)); } -template void Unserialize(Stream& s, uint8_t& a) { a = ser_readdata8(s); } -template void Unserialize(Stream& s, int16_t& a) { a = int16_t(ser_readdata16(s)); } -template void Unserialize(Stream& s, uint16_t& a) { a = ser_readdata16(s); } -template void Unserialize(Stream& s, int32_t& a) { a = int32_t(ser_readdata32(s)); } -template void Unserialize(Stream& s, uint32_t& a) { a = ser_readdata32(s); } -template void Unserialize(Stream& s, int64_t& a) { a = int64_t(ser_readdata64(s)); } -template void Unserialize(Stream& s, uint64_t& a) { a = ser_readdata64(s); } - -template void Unserialize(Stream& s, B (&a)[N]) { s.read(MakeWritableByteSpan(a)); } -template void Unserialize(Stream& s, std::array& a) { s.read(MakeWritableByteSpan(a)); } -template void Unserialize(Stream& s, std::span span) { s.read(std::as_writable_bytes(span)); } -template void Unserialize(Stream& s, std::span span) { s.read(std::as_writable_bytes(span)); } - -template void Serialize(Stream& s, bool a) { uint8_t f = a; ser_writedata8(s, f); } -template void Unserialize(Stream& s, bool& a) { uint8_t f = ser_readdata8(s); a = f; } +template void Unserialize(Stream& s, T& a) +{ + if constexpr (sizeof(T) == 1) { + a = static_cast(ser_readdata8(s)); // (u)int8_t or std::byte or bool + } else if constexpr (sizeof(T) == 2) { + a = static_cast(ser_readdata16(s)); // (u)int16_t + } else if constexpr (sizeof(T) == 4) { + a = static_cast(ser_readdata32(s)); // (u)int32_t + } else { + static_assert(sizeof(T) == 8); + a = static_cast(ser_readdata64(s)); // (u)int64_t + } +} +template void Unserialize(Stream& s, B (&a)[N]) { s.read(MakeWritableByteSpan(a)); } +template void Unserialize(Stream& s, std::array& a) { s.read(MakeWritableByteSpan(a)); } +template void Unserialize(Stream& s, std::span span) { s.read(std::as_writable_bytes(span)); } +template void Unserialize(Stream& s, std::span span) { s.read(std::as_writable_bytes(span)); } // clang-format on @@ -480,7 +486,7 @@ class Wrapper * serialization, and Unser(stream, object&) for deserialization. Serialization routines (inside * READWRITE, or directly with << and >> operators), can then use Using(object). * - * This works by constructing a Wrapper-wrapped version of object, where T is + * This works by constructing a Wrapper-wrapped version of object, where T is * const during serialization, and non-const during deserialization, which maintains const * correctness. */ From 19d97d4212fc8ae3b9712e38e476ed461d30a178 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Thu, 20 Mar 2025 10:28:52 +0100 Subject: [PATCH 03/15] refactor: add explicit static extent to spans --- src/serialize.h | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index 57eb373111c6..181dae77048a 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -53,56 +53,56 @@ constexpr deserialize_type deserialize {}; */ template inline void ser_writedata8(Stream &s, uint8_t obj) { - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline void ser_writedata16(Stream &s, uint16_t obj) { obj = htole16_internal(obj); - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline void ser_writedata32(Stream &s, uint32_t obj) { obj = htole32_internal(obj); - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline void ser_writedata32be(Stream &s, uint32_t obj) { obj = htobe32_internal(obj); - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline void ser_writedata64(Stream &s, uint64_t obj) { obj = htole64_internal(obj); - s.write(std::as_bytes(std::span{&obj, 1})); + s.write(std::as_bytes(std::span{&obj, 1})); } template inline uint8_t ser_readdata8(Stream &s) { uint8_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return obj; } template inline uint16_t ser_readdata16(Stream &s) { uint16_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return le16toh_internal(obj); } template inline uint32_t ser_readdata32(Stream &s) { uint32_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return le32toh_internal(obj); } template inline uint32_t ser_readdata32be(Stream &s) { uint32_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return be32toh_internal(obj); } template inline uint64_t ser_readdata64(Stream &s) { uint64_t obj; - s.read(std::as_writable_bytes(std::span{&obj, 1})); + s.read(std::as_writable_bytes(std::span{&obj, 1})); return le64toh_internal(obj); } @@ -280,7 +280,6 @@ template void Unserialize(Stream& s, T& a) template void Unserialize(Stream& s, B (&a)[N]) { s.read(MakeWritableByteSpan(a)); } template void Unserialize(Stream& s, std::array& a) { s.read(MakeWritableByteSpan(a)); } template void Unserialize(Stream& s, std::span span) { s.read(std::as_writable_bytes(span)); } -template void Unserialize(Stream& s, std::span span) { s.read(std::as_writable_bytes(span)); } // clang-format on @@ -533,10 +532,10 @@ struct CustomUintFormatter if (v < 0 || v > MAX) throw std::ios_base::failure("CustomUintFormatter value out of range"); if (BigEndian) { uint64_t raw = htobe64_internal(v); - s.write(std::as_bytes(std::span{&raw, 1}).last(Bytes)); + s.write(std::as_bytes(std::span{&raw, 1}).template last()); } else { uint64_t raw = htole64_internal(v); - s.write(std::as_bytes(std::span{&raw, 1}).first(Bytes)); + s.write(std::as_bytes(std::span{&raw, 1}).template first()); } } @@ -546,10 +545,10 @@ struct CustomUintFormatter static_assert(std::numeric_limits::max() >= MAX && std::numeric_limits::min() <= 0, "Assigned type too small"); uint64_t raw = 0; if (BigEndian) { - s.read(std::as_writable_bytes(std::span{&raw, 1}).last(Bytes)); + s.read(std::as_writable_bytes(std::span{&raw, 1}).last()); v = static_cast(be64toh_internal(raw)); } else { - s.read(std::as_writable_bytes(std::span{&raw, 1}).first(Bytes)); + s.read(std::as_writable_bytes(std::span{&raw, 1}).first()); v = static_cast(le64toh_internal(raw)); } } From 91e8668d2525b62e6448278c228898266b9a58bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Fri, 14 Feb 2025 13:54:57 +0100 Subject: [PATCH 04/15] optimization: merge `SizeComputer` specializations and add new overloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Endianness doesn’t affect final size, so skip it in `SizeComputer`. Fold existing overloads into one implementation, short‑circuiting logic when only the serialized size is needed. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/src/bench/bench_bitcoin -filter='SizeComputerBlock|SerializeBlock' --min-time=10000 > C compiler ............................ AppleClang 16.0.0.16000026 | ns/block | block/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 191,652.29 | 5,217.78 | 0.4% | 10.96 | `SerializeBlock` | 10,323.55 | 96,865.92 | 0.2% | 11.01 | `SizeComputerBlock` > C++ compiler .......................... GNU 13.3.0 | ns/block | block/s | err% | ins/block | cyc/block | IPC | bra/block | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 614,847.32 | 1,626.42 | 0.0% | 8,015,883.64 | 2,207,628.07 | 3.631 | 1,517,035.62 | 0.5% | 10.56 | `SerializeBlock` | 26,020.31 | 38,431.52 | 0.0% | 159,390.03 | 93,438.33 | 1.706 | 42,131.03 | 0.9% | 11.00 | `SizeComputerBlock` --- src/serialize.h | 115 ++++++++++++++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 42 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index 181dae77048a..b3e5f859b752 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -48,6 +48,16 @@ static const unsigned int MAX_VECTOR_ALLOCATE = 5000000; struct deserialize_type {}; constexpr deserialize_type deserialize {}; +class SizeComputer; + +//! Check if type contains a stream by seeing if it has a GetStream() method. +template +concept ContainsStream = requires(T t) { t.GetStream(); }; + +template +concept ContainsSizeComputer = ContainsStream && + std::is_same_v().GetStream())>, SizeComputer>; + /* * Lowest-level serialization and conversion. */ @@ -107,8 +117,6 @@ template inline uint64_t ser_readdata64(Stream &s) } -class SizeComputer; - /** * Convert any argument to a reference to X, maintaining constness. * @@ -247,7 +255,9 @@ concept ByteOrIntegral = std::is_same_v || template void Serialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t template void Serialize(Stream& s, T a) { - if constexpr (sizeof(T) == 1) { + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(sizeof(T)); + } else if constexpr (sizeof(T) == 1) { ser_writedata8(s, static_cast(a)); // (u)int8_t or std::byte or bool } else if constexpr (sizeof(T) == 2) { ser_writedata16(s, static_cast(a)); // (u)int16_t @@ -258,10 +268,38 @@ template void Serialize(Stream& s, T a) ser_writedata64(s, static_cast(a)); // (u)int64_t } } -template void Serialize(Stream& s, const B (&a)[N]) { s.write(MakeByteSpan(a)); } -template void Serialize(Stream& s, const std::array& a) { s.write(MakeByteSpan(a)); } -template void Serialize(Stream& s, std::span span) { s.write(std::as_bytes(span)); } -template void Serialize(Stream& s, std::span span) { s.write(std::as_bytes(span)); } +template void Serialize(Stream& s, const B (&a)[N]) +{ + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(N); + } else { + s.write(MakeByteSpan(a)); + } +} +template void Serialize(Stream& s, const std::array& a) +{ + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(N); + } else { + s.write(MakeByteSpan(a)); + } +} +template void Serialize(Stream& s, std::span span) +{ + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(N); + } else { + s.write(std::as_bytes(span)); + } +} +template void Serialize(Stream& s, std::span span) +{ + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(span.size()); + } else { + s.write(std::as_bytes(span)); + } +} template void Unserialize(Stream&, V) = delete; // char serialization forbidden. Use uint8_t or int8_t template void Unserialize(Stream& s, T& a) @@ -298,12 +336,14 @@ constexpr inline unsigned int GetSizeOfCompactSize(uint64_t nSize) else return sizeof(unsigned char) + sizeof(uint64_t); } -inline void WriteCompactSize(SizeComputer& os, uint64_t nSize); - template void WriteCompactSize(Stream& os, uint64_t nSize) { - if (nSize < 253) + if constexpr (ContainsSizeComputer) + { + os.GetStream().seek(GetSizeOfCompactSize(nSize)); + } + else if (nSize < 253) { ser_writedata8(os, nSize); } @@ -410,7 +450,7 @@ struct CheckVarIntMode { }; template -inline unsigned int GetSizeOfVarInt(I n) +constexpr unsigned int GetSizeOfVarInt(I n) { CheckVarIntMode(); int nRet = 0; @@ -423,25 +463,26 @@ inline unsigned int GetSizeOfVarInt(I n) return nRet; } -template -inline void WriteVarInt(SizeComputer& os, I n); - template void WriteVarInt(Stream& os, I n) { - CheckVarIntMode(); - unsigned char tmp[(sizeof(n)*8+6)/7]; - int len=0; - while(true) { - tmp[len] = (n & 0x7F) | (len ? 0x80 : 0x00); - if (n <= 0x7F) - break; - n = (n >> 7) - 1; - len++; + if constexpr (ContainsSizeComputer) { + os.GetStream().seek(GetSizeOfVarInt(n)); + } else { + CheckVarIntMode(); + unsigned char tmp[(sizeof(n)*8+6)/7]; + int len=0; + while(true) { + tmp[len] = (n & 0x7F) | (len ? 0x80 : 0x00); + if (n <= 0x7F) + break; + n = (n >> 7) - 1; + len++; + } + do { + ser_writedata8(os, tmp[len]); + } while(len--); } - do { - ser_writedata8(os, tmp[len]); - } while(len--); } template @@ -530,7 +571,9 @@ struct CustomUintFormatter template void Ser(Stream& s, I v) { if (v < 0 || v > MAX) throw std::ios_base::failure("CustomUintFormatter value out of range"); - if (BigEndian) { + if constexpr (ContainsSizeComputer) { + s.GetStream().seek(Bytes); + } else if (BigEndian) { uint64_t raw = htobe64_internal(v); s.write(std::as_bytes(std::span{&raw, 1}).template last()); } else { @@ -1061,6 +1104,9 @@ class SizeComputer public: SizeComputer() = default; + SizeComputer& GetStream() { return *this; } + const SizeComputer& GetStream() const { return *this; }; + void write(std::span src) { m_size += src.size(); @@ -1085,27 +1131,12 @@ class SizeComputer } }; -template -inline void WriteVarInt(SizeComputer &s, I n) -{ - s.seek(GetSizeOfVarInt(n)); -} - -inline void WriteCompactSize(SizeComputer &s, uint64_t nSize) -{ - s.seek(GetSizeOfCompactSize(nSize)); -} - template uint64_t GetSerializeSize(const T& t) { return (SizeComputer() << t).size(); } -//! Check if type contains a stream by seeing if has a GetStream() method. -template -concept ContainsStream = requires(T t) { t.GetStream(); }; - /** Wrapper that overrides the GetParams() function of a stream. */ template class ParamsStream From b4b7335a8a104ba6c3731f741ff5d6eebec7661d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Sun, 9 Mar 2025 21:33:36 +0100 Subject: [PATCH 05/15] optimization: add single byte writes Single byte writes are used very often (used for every (u)int8_t or std::byte or bool and for every VarInt's first byte which is also needed for every (pre)Vector). It makes sense to avoid the generalized serialization infrastructure that isn't needed: * AutoFile write doesn't need to allocate 4k buffer for a single byte now; * `VectorWriter` and `DataStream` avoids memcpy/insert calls. > cmake -B build -DBUILD_BENCH=ON -DCMAKE_BUILD_TYPE=Release && cmake --build build -j$(nproc) && build/bin/bench_bitcoin -filter='SizeComputerBlock|SerializeBlock' --min-time=10000 > C compiler ............................ AppleClang 16.0.0.16000026 | ns/block | block/s | err% | total | benchmark |--------------------:|--------------------:|--------:|----------:|:---------- | 174,569.19 | 5,728.39 | 0.6% | 10.89 | `SerializeBlock` | 10,241.16 | 97,645.21 | 0.0% | 11.00 | `SizeComputerBlock` > C++ compiler .......................... GNU 13.3.0 | ns/block | block/s | err% | ins/block | cyc/block | IPC | bra/block | miss% | total | benchmark |--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:---------- | 615,000.56 | 1,626.01 | 0.0% | 8,015,883.64 | 2,208,340.88 | 3.630 | 1,517,035.62 | 0.5% | 10.56 | `SerializeBlock` | 25,676.76 | 38,945.72 | 0.0% | 159,390.03 | 92,202.10 | 1.729 | 42,131.03 | 0.9% | 11.00 | `SizeComputerBlock` --- src/bench/checkblock.cpp | 4 ++-- src/bench/rpc_blockchain.cpp | 2 +- src/crypto/sha256.cpp | 15 +++++++++++++++ src/crypto/sha256.h | 1 + src/hash.h | 24 +++++++++++++++++++++++- src/serialize.h | 6 ++++++ src/streams.cpp | 13 +++++++++++++ src/streams.h | 17 +++++++++++++++++ src/test/crypto_tests.cpp | 2 +- src/test/fuzz/autofile.cpp | 4 ++-- src/test/streams_tests.cpp | 6 +++--- 11 files changed, 84 insertions(+), 10 deletions(-) diff --git a/src/bench/checkblock.cpp b/src/bench/checkblock.cpp index 128a37e6608a..3f0dd2b1f51b 100644 --- a/src/bench/checkblock.cpp +++ b/src/bench/checkblock.cpp @@ -52,7 +52,7 @@ static void DeserializeBlock(benchmark::Bench& bench) { DataStream stream(benchmark::data::block413567); std::byte a{0}; - stream.write({&a, 1}); // Prevent compaction + stream.write(std::span{&a, 1}); // Prevent compaction bench.unit("block").run([&] { CBlock block; @@ -66,7 +66,7 @@ static void DeserializeAndCheckBlock(benchmark::Bench& bench) { DataStream stream(benchmark::data::block413567); std::byte a{0}; - stream.write({&a, 1}); // Prevent compaction + stream.write(std::span{&a, 1}); // Prevent compaction ArgsManager bench_args; const auto chainParams = CreateChainParams(bench_args, ChainType::MAIN); diff --git a/src/bench/rpc_blockchain.cpp b/src/bench/rpc_blockchain.cpp index 0e89ac78a136..3f8ad5351980 100644 --- a/src/bench/rpc_blockchain.cpp +++ b/src/bench/rpc_blockchain.cpp @@ -33,7 +33,7 @@ struct TestBlockAndIndex { { DataStream stream{benchmark::data::block413567}; std::byte a{0}; - stream.write({&a, 1}); // Prevent compaction + stream.write(std::span{&a, 1}); // Prevent compaction stream >> TX_WITH_WITNESS(block); diff --git a/src/crypto/sha256.cpp b/src/crypto/sha256.cpp index 54bd9a59f9ef..7d4dfa81941f 100644 --- a/src/crypto/sha256.cpp +++ b/src/crypto/sha256.cpp @@ -721,6 +721,21 @@ CSHA256& CSHA256::Write(const unsigned char* data, size_t len) } return *this; } +CSHA256& CSHA256::Write(unsigned char data) +{ + size_t bufsize = bytes % 64; + + // Add the single byte to the buffer + buf[bufsize] = data; + bytes += 1; + + if (bufsize == 63) { + // Process the buffer if full + Transform(s, buf, 1); + } + + return *this; +} void CSHA256::Finalize(unsigned char hash[OUTPUT_SIZE]) { diff --git a/src/crypto/sha256.h b/src/crypto/sha256.h index 3ac771c5d0db..ba4b5eb9c5e1 100644 --- a/src/crypto/sha256.h +++ b/src/crypto/sha256.h @@ -22,6 +22,7 @@ class CSHA256 CSHA256(); CSHA256& Write(const unsigned char* data, size_t len); + CSHA256& Write(unsigned char data); void Finalize(unsigned char hash[OUTPUT_SIZE]); CSHA256& Reset(); }; diff --git a/src/hash.h b/src/hash.h index 34486af64a1d..da3b1ab1145f 100644 --- a/src/hash.h +++ b/src/hash.h @@ -38,6 +38,10 @@ class CHash256 { sha.Write(input.data(), input.size()); return *this; } + CHash256& Write(std::span input) { + sha.Write(input[0]); + return *this; + } CHash256& Reset() { sha.Reset(); @@ -63,6 +67,10 @@ class CHash160 { sha.Write(input.data(), input.size()); return *this; } + CHash160& Write(std::span input) { + sha.Write(input[0]); + return *this; + } CHash160& Reset() { sha.Reset(); @@ -107,6 +115,10 @@ class HashWriter { ctx.Write(UCharCast(src.data()), src.size()); } + void write(std::span src) + { + ctx.Write(*UCharCast(&src[0])); + } /** Compute the double-SHA256 hash of all data written to this object. * @@ -160,13 +172,18 @@ class HashVerifier : public HashWriter m_source.read(dst); this->write(dst); } + void read(std::span dst) + { + m_source.read(dst); + this->write(std::span{dst}); + } void ignore(size_t num_bytes) { std::byte data[1024]; while (num_bytes > 0) { size_t now = std::min(num_bytes, 1024); - read({data, now}); + read(std::span{data, now}); num_bytes -= now; } } @@ -194,6 +211,11 @@ class HashedSourceWriter : public HashWriter m_source.write(src); HashWriter::write(src); } + void write(std::span src) + { + m_source.write(src); + HashWriter::write(src); + } template HashedSourceWriter& operator<<(const T& obj) diff --git a/src/serialize.h b/src/serialize.h index b3e5f859b752..521ba108eabc 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -1111,6 +1111,10 @@ class SizeComputer { m_size += src.size(); } + void write(std::span) + { + this->m_size += 1; + } /** Pretend this many bytes are written, without specifying them. */ void seek(uint64_t num) @@ -1161,7 +1165,9 @@ class ParamsStream template ParamsStream& operator<<(const U& obj) { ::Serialize(*this, obj); return *this; } template ParamsStream& operator>>(U&& obj) { ::Unserialize(*this, obj); return *this; } void write(std::span src) { GetStream().write(src); } + void write(std::span src) { GetStream().write(src); } void read(std::span dst) { GetStream().read(dst); } + void read(std::span dst) { GetStream().read(dst); } void ignore(size_t num) { GetStream().ignore(num); } bool empty() const { return GetStream().empty(); } size_t size() const { return GetStream().size(); } diff --git a/src/streams.cpp b/src/streams.cpp index e38b9592942e..65df3c4b6f55 100644 --- a/src/streams.cpp +++ b/src/streams.cpp @@ -78,6 +78,13 @@ void AutoFile::read(std::span dst) } } +void AutoFile::read(std::span dst) +{ + if (detail_fread(dst) != 1) { + throw std::ios_base::failure(feof() ? "AutoFile::read: end of file" : "AutoFile::read: fread failed"); + } +} + void AutoFile::ignore(size_t nSize) { if (!m_file) throw std::ios_base::failure("AutoFile::ignore: file handle is nullptr"); @@ -112,6 +119,12 @@ void AutoFile::write(std::span src) } } +void AutoFile::write(std::span src) +{ + std::byte temp_byte = src[0]; + write_buffer(std::span(&temp_byte, 1)); +} + void AutoFile::write_buffer(std::span src) { if (!m_file) throw std::ios_base::failure("AutoFile::write_buffer: file handle is nullptr"); diff --git a/src/streams.h b/src/streams.h index 346984259c15..ad073be5207b 100644 --- a/src/streams.h +++ b/src/streams.h @@ -65,6 +65,17 @@ class VectorWriter } nPos += src.size(); } + void write(std::span src) + { + assert(nPos <= vchData.size()); + const auto byte{*UCharCast(&src[0])}; + if (nPos < vchData.size()) { + vchData[nPos] = byte; + } else { + vchData.push_back(byte); + } + nPos += 1; + } template VectorWriter& operator<<(const T& obj) { @@ -239,6 +250,10 @@ class DataStream // Write to the end of the buffer vch.insert(vch.end(), src.begin(), src.end()); } + void write(std::span src) + { + vch.push_back(src[0]); + } template DataStream& operator<<(const T& obj) @@ -454,8 +469,10 @@ class AutoFile // Stream subset // void read(std::span dst); + void read(std::span dst); void ignore(size_t nSize); void write(std::span src); + void write(std::span src); template AutoFile& operator<<(const T& obj) diff --git a/src/test/crypto_tests.cpp b/src/test/crypto_tests.cpp index 5588d4cdbc66..0aab9ef0e77d 100644 --- a/src/test/crypto_tests.cpp +++ b/src/test/crypto_tests.cpp @@ -1079,7 +1079,7 @@ BOOST_AUTO_TEST_CASE(sha256d64) in[j] = m_rng.randbits(8); } for (int j = 0; j < i; ++j) { - CHash256().Write({in + 64 * j, 64}).Finalize({out1 + 32 * j, 32}); + CHash256().Write(std::span{in + 64 * j, 64}).Finalize({out1 + 32 * j, 32}); } SHA256D64(out2, in, i); BOOST_CHECK(memcmp(out1, out2, 32 * i) == 0); diff --git a/src/test/fuzz/autofile.cpp b/src/test/fuzz/autofile.cpp index 5aa5d8c13322..b1be07c11239 100644 --- a/src/test/fuzz/autofile.cpp +++ b/src/test/fuzz/autofile.cpp @@ -31,14 +31,14 @@ FUZZ_TARGET(autofile) [&] { std::array arr{}; try { - auto_file.read({arr.data(), fuzzed_data_provider.ConsumeIntegralInRange(0, 4096)}); + auto_file.read(std::span{arr.data(), fuzzed_data_provider.ConsumeIntegralInRange(0, 4096)}); } catch (const std::ios_base::failure&) { } }, [&] { const std::array arr{}; try { - auto_file.write({arr.data(), fuzzed_data_provider.ConsumeIntegralInRange(0, 4096)}); + auto_file.write(std::span{arr.data(), fuzzed_data_provider.ConsumeIntegralInRange(0, 4096)}); } catch (const std::ios_base::failure&) { } }, diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp index af75ee987ad3..1c76497d3c35 100644 --- a/src/test/streams_tests.cpp +++ b/src/test/streams_tests.cpp @@ -98,9 +98,9 @@ BOOST_AUTO_TEST_CASE(xor_file) { // Check errors for missing file AutoFile xor_file{raw_file("rb"), obfuscation}; - BOOST_CHECK_EXCEPTION(xor_file << std::byte{}, std::ios_base::failure, HasReason{"AutoFile::write: file handle is nullptr"}); - BOOST_CHECK_EXCEPTION(xor_file >> std::byte{}, std::ios_base::failure, HasReason{"AutoFile::read: file handle is nullptr"}); - BOOST_CHECK_EXCEPTION(xor_file.ignore(1), std::ios_base::failure, HasReason{"AutoFile::ignore: file handle is nullptr"}); + BOOST_CHECK_EXCEPTION(xor_file << std::byte{}, std::ios_base::failure, HasReason{"file handle is nullptr"}); + BOOST_CHECK_EXCEPTION(xor_file >> std::byte{}, std::ios_base::failure, HasReason{"file handle is nullptr"}); + BOOST_CHECK_EXCEPTION(xor_file.ignore(1), std::ios_base::failure, HasReason{"file handle is nullptr"}); BOOST_CHECK_EXCEPTION(xor_file.size(), std::ios_base::failure, HasReason{"AutoFile::size: file handle is nullptr"}); } { From bec0cb7c586f38c3073870dda128e23cc60c48a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Mon, 16 Feb 2026 18:00:20 +0000 Subject: [PATCH 06/15] serialize: optimize WriteVarInt writes Fast-path the common single-byte case and batch multi-byte encodes into a single span write. --- src/serialize.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index 521ba108eabc..fea4cbf51112 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -470,18 +470,18 @@ void WriteVarInt(Stream& os, I n) os.GetStream().seek(GetSizeOfVarInt(n)); } else { CheckVarIntMode(); - unsigned char tmp[(sizeof(n)*8+6)/7]; - int len=0; - while(true) { - tmp[len] = (n & 0x7F) | (len ? 0x80 : 0x00); - if (n <= 0x7F) - break; + if (n <= 0x7F) { + ser_writedata8(os, n); + return; + } + unsigned char tmp[(sizeof(n) * 8 + 6) / 7]; + size_t pos = std::size(tmp); + tmp[--pos] = n & 0x7F; + while (n > 0x7F) { n = (n >> 7) - 1; - len++; + tmp[--pos] = (n & 0x7F) | 0x80; } - do { - ser_writedata8(os, tmp[len]); - } while(len--); + os.write(std::as_bytes(std::span{tmp}.subspan(pos))); } } From 8a84b59b8a0e1cbf02e3d4e50927b3cc21a67b5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Mon, 16 Feb 2026 18:32:17 +0000 Subject: [PATCH 07/15] streams: specialize span reads Use a single templated read() implementation for fixed and dynamic span extents, and keep the 1-byte read fast path inside that method. --- src/streams.h | 75 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/src/streams.h b/src/streams.h index ad073be5207b..a0d050ff0aff 100644 --- a/src/streams.h +++ b/src/streams.h @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -112,18 +111,24 @@ class SpanReader size_t size() const { return m_data.size(); } bool empty() const { return m_data.empty(); } - void read(std::span dst) + template + void read(std::span dst) { - if (dst.size() == 0) { - return; - } - - // Read from the beginning of the buffer - if (dst.size() > m_data.size()) { - throw std::ios_base::failure("SpanReader::read(): end of data"); + if constexpr (Extent == 1) { + if (m_data.empty()) { + throw std::ios_base::failure("SpanReader::read(): end of data"); + } + dst[0] = m_data[0]; + m_data = m_data.subspan(1); + } else { + const auto n{dst.size()}; + // Read from the beginning of the buffer + if (n > m_data.size()) { + throw std::ios_base::failure("SpanReader::read(): end of data"); + } + memcpy(dst.data(), m_data.data(), n); + m_data = m_data.subspan(n); } - memcpy(dst.data(), m_data.data(), dst.size()); - m_data = m_data.subspan(dst.size()); } void ignore(size_t n) @@ -212,37 +217,47 @@ class DataStream // int in_avail() const { return size(); } - void read(std::span dst) + template + void read(std::span dst) { - if (dst.size() == 0) return; - - // Read from the beginning of the buffer - auto next_read_pos{CheckedAdd(m_read_pos, dst.size())}; - if (!next_read_pos.has_value() || next_read_pos.value() > vch.size()) { - throw std::ios_base::failure("DataStream::read(): end of data"); - } - memcpy(dst.data(), &vch[m_read_pos], dst.size()); - if (next_read_pos.value() == vch.size()) { - m_read_pos = 0; - vch.clear(); - return; + if constexpr (Extent == 1) { + if (m_read_pos == vch.size()) { + throw std::ios_base::failure("DataStream::read(): end of data"); + } + dst[0] = vch[m_read_pos]; + ++m_read_pos; + if (m_read_pos == vch.size()) { + m_read_pos = 0; + vch.clear(); + } + } else { + const auto n{dst.size()}; + const auto avail{vch.size() - m_read_pos}; + if (n > avail) { + throw std::ios_base::failure("DataStream::read(): end of data"); + } + memcpy(dst.data(), &vch[m_read_pos], n); + if (n == avail) { + m_read_pos = 0; + vch.clear(); + return; + } + m_read_pos += n; } - m_read_pos = next_read_pos.value(); } void ignore(size_t num_ignore) { - // Ignore from the beginning of the buffer - auto next_read_pos{CheckedAdd(m_read_pos, num_ignore)}; - if (!next_read_pos.has_value() || next_read_pos.value() > vch.size()) { + const auto avail{vch.size() - m_read_pos}; + if (num_ignore > avail) { throw std::ios_base::failure("DataStream::ignore(): end of data"); } - if (next_read_pos.value() == vch.size()) { + if (num_ignore == avail) { m_read_pos = 0; vch.clear(); return; } - m_read_pos = next_read_pos.value(); + m_read_pos += num_ignore; } void write(std::span src) From 90bca75d6b22f8dcabe63ff7e64d5156e24f6e7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Mon, 16 Feb 2026 18:35:12 +0000 Subject: [PATCH 08/15] streams: specialize span writes Use a single templated DataStream::write() implementation for fixed and dynamic span extents, keeping the static-extent special cases inside the same method. --- src/streams.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/streams.h b/src/streams.h index a0d050ff0aff..f690d99ba11d 100644 --- a/src/streams.h +++ b/src/streams.h @@ -260,14 +260,22 @@ class DataStream m_read_pos += num_ignore; } - void write(std::span src) + template + void write(std::span src) { // Write to the end of the buffer - vch.insert(vch.end(), src.begin(), src.end()); - } - void write(std::span src) - { - vch.push_back(src[0]); + if constexpr (Extent == 1) { + vch.push_back(src[0]); + } else if constexpr (Extent == 2) { + vch.push_back(src[0]); + vch.push_back(src[1]); + } else if constexpr (Extent != std::dynamic_extent) { + // Keep Extent a compile-time constant so small fixed-size writes can be optimized better + // than the dynamic-size path. + vch.insert(vch.end(), src.data(), src.data() + Extent); + } else { + vch.insert(vch.end(), src.data(), src.data() + src.size()); + } } template From a7b02a7f39548deaa775ce7fe2539774dc39f909 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Mon, 16 Feb 2026 18:36:10 +0000 Subject: [PATCH 09/15] streams: fast-path VectorWriter append writes Add an explicit append fast path in VectorWriter::write(std::span) and reuse a single source pointer for both insert branches. This removes overwrite bookkeeping when nPos is already at the end, which is the dominant case. Microbenchmark (/tmp/serialize_perf.cpp, g++-14.2, -O3): before: /tmp/serialize_perf_idea18_before.tsv after: /tmp/serialize_perf_idea18_after2.tsv VectorWriterWriteSpan32: 19.749404 -> 16.854612 ns/op (-14.658%) ReadCompactSize: 12.471655 -> 9.180395 ns/op (-26.390%) SerializeUint32: 1.328602 -> 1.258573 ns/op (-5.271%) UnserializeUint32: 2.460472 -> 2.469724 ns/op (+0.376%; noise-level) --- src/streams.h | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/streams.h b/src/streams.h index f690d99ba11d..0567f7a2bf26 100644 --- a/src/streams.h +++ b/src/streams.h @@ -52,29 +52,35 @@ class VectorWriter { ::SerializeMany(*this, std::forward(args)...); } - void write(std::span src) + template + void write(std::span src) { assert(nPos <= vchData.size()); + const auto src_ptr{UCharCast(src.data())}; + if constexpr (Extent == 1) { + const auto byte{src_ptr[0]}; + if (nPos < vchData.size()) { + vchData[nPos] = byte; + } else { + vchData.push_back(byte); + } + nPos += 1; + return; + } + if (nPos == vchData.size()) { + vchData.insert(vchData.end(), src_ptr, src_ptr + src.size()); + nPos += src.size(); + return; + } size_t nOverwrite = std::min(src.size(), vchData.size() - nPos); if (nOverwrite) { - memcpy(vchData.data() + nPos, src.data(), nOverwrite); + memcpy(vchData.data() + nPos, src_ptr, nOverwrite); } if (nOverwrite < src.size()) { - vchData.insert(vchData.end(), UCharCast(src.data()) + nOverwrite, UCharCast(src.data() + src.size())); + vchData.insert(vchData.end(), src_ptr + nOverwrite, src_ptr + src.size()); } nPos += src.size(); } - void write(std::span src) - { - assert(nPos <= vchData.size()); - const auto byte{*UCharCast(&src[0])}; - if (nPos < vchData.size()) { - vchData[nPos] = byte; - } else { - vchData.push_back(byte); - } - nPos += 1; - } template VectorWriter& operator<<(const T& obj) { From b33fea381a9f98bdbcfe6c26f0453fe4ac11d1e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Mon, 16 Feb 2026 18:45:09 +0000 Subject: [PATCH 10/15] serialize: move map/set elements into hinted inserts Use std::move when inserting deserialized temporary map/set elements. The value/category is already unique in this context, so this removes extra key/value copies while keeping code straightforward. Microbenchmark (/tmp/serialize_assoc_perf.cpp, g++-14.2, -O3): before: /tmp/serialize_assoc_idea27_before.tsv after: /tmp/serialize_assoc_idea27_after2.tsv UnserializeMap: 117092.799167 -> 92975.440833 ns/op (-20.598%) UnserializeSet: 157473.293333 -> 132820.890000 ns/op (-15.655%) --- src/serialize.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index fea4cbf51112..241965ef7a33 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -960,7 +960,7 @@ void Unserialize(Stream& is, std::map& m) { std::pair item; Unserialize(is, item); - mi = m.insert(mi, item); + mi = m.insert(mi, std::move(item)); } } @@ -987,7 +987,7 @@ void Unserialize(Stream& is, std::set& m) { K key; Unserialize(is, key); - it = m.insert(it, key); + it = m.insert(it, std::move(key)); } } From 08cce0bb7c89c347e109d63dcbfdd7bb107f0374 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Mon, 16 Feb 2026 19:47:41 +0000 Subject: [PATCH 11/15] serialize: fast path small byte vector deserialization Add a direct single-chunk path for BasicByte vector/prevector deserialization when encoded size fits within one allocation chunk. This avoids loop bookkeeping in the common case. Microbenchmark (/tmp/serialize_vector_perf.cpp, g++-14.2, -O3): before: /tmp/serialize_vector_try_before.tsv after: /tmp/serialize_vector_try_patch.tsv VectorUnserialize: 57.370785 -> 55.291175 ns/op (-3.625%) PrevectorUnserialize: 43.594430 -> 40.879915 ns/op (-6.226%) serialize: use size_t counters in byte vector chunk loops Switch BasicByte vector/prevector chunked deserialization counters from unsigned int to size_t. This removes repeated integer-width conversions in loop control and std::min calls. Microbenchmark (/tmp/serialize_vector_perf.cpp, g++-14.2, -O3): before: /tmp/serialize_vector_try_size_t_before.tsv after: /tmp/serialize_vector_try_size_t_after.tsv VectorUnserialize: 53.531320 -> 51.489485 ns/op (-3.814%) PrevectorUnserialize: 39.887540 -> 39.709020 ns/op (-0.448%) --- src/serialize.h | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index 241965ef7a33..14b4aef3d088 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -863,10 +863,18 @@ void Unserialize(Stream& is, prevector& v) if constexpr (BasicByte) { // Use optimized version for unformatted basic bytes // Limit size per read so bogus size value won't cause out of memory v.clear(); - unsigned int nSize = ReadCompactSize(is); - unsigned int i = 0; + size_t nSize = ReadCompactSize(is); + constexpr size_t max_chunk{static_cast(1 + 4999999 / sizeof(T))}; + if (nSize <= max_chunk) { + v.resize_uninitialized(nSize); + if (nSize != 0) { + is.read(std::as_writable_bytes(std::span{v.data(), nSize})); + } + return; + } + size_t i = 0; while (i < nSize) { - unsigned int blk = std::min(nSize - i, (unsigned int)(1 + 4999999 / sizeof(T))); + size_t blk = std::min(nSize - i, max_chunk); v.resize_uninitialized(i + blk); is.read(std::as_writable_bytes(std::span{&v[i], blk})); i += blk; @@ -906,10 +914,18 @@ void Unserialize(Stream& is, std::vector& v) if constexpr (BasicByte) { // Use optimized version for unformatted basic bytes // Limit size per read so bogus size value won't cause out of memory v.clear(); - unsigned int nSize = ReadCompactSize(is); - unsigned int i = 0; + size_t nSize = ReadCompactSize(is); + constexpr size_t max_chunk{static_cast(1 + 4999999 / sizeof(T))}; + if (nSize <= max_chunk) { + v.resize(nSize); + if (nSize != 0) { + is.read(std::as_writable_bytes(std::span{v.data(), nSize})); + } + return; + } + size_t i = 0; while (i < nSize) { - unsigned int blk = std::min(nSize - i, (unsigned int)(1 + 4999999 / sizeof(T))); + size_t blk = std::min(nSize - i, max_chunk); v.resize(i + blk); is.read(std::as_writable_bytes(std::span{&v[i], blk})); i += blk; From 5fa2fa86dfaff4e15bb98af56ae2c559c13810cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Tue, 17 Feb 2026 19:20:30 +0000 Subject: [PATCH 12/15] serialize: fast-path 2-4 byte varint writes Bench: /tmp/ab_bench_score.py ab61b_writevarint_fastpath_2_3_4_reorder_p24 --pairs 24 score geomean_ns median: 133.394213552 -> 130.377638706 (-2.26%) serialize_perf geomean_ns median: 7.379390515 -> 7.126054177 (-3.43%) assoc_rw geomean_ns median: 43638.598005401 -> 43633.326177539 (-0.01%) --- src/serialize.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/serialize.h b/src/serialize.h index 14b4aef3d088..082326176176 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -463,6 +463,48 @@ constexpr unsigned int GetSizeOfVarInt(I n) return nRet; } +template +ALWAYS_INLINE void WriteVarIntFixed(Stream& os, I n) +{ + unsigned char out[N]; + if constexpr (N == 2) { + out[0] = static_cast(((n >> 7) - 1) | 0x80); + out[1] = static_cast(n & 0x7F); + } else { + I x = n; + out[N - 1] = static_cast(x & 0x7F); + if constexpr (N > 1) { + x = (x >> 7) - 1; + out[N - 2] = static_cast((x & 0x7F) | 0x80); + } + if constexpr (N > 2) { + x = (x >> 7) - 1; + out[N - 3] = static_cast((x & 0x7F) | 0x80); + } + if constexpr (N > 3) { + x = (x >> 7) - 1; + out[N - 4] = static_cast((x & 0x7F) | 0x80); + } + if constexpr (N > 4) { + x = (x >> 7) - 1; + out[N - 5] = static_cast((x & 0x7F) | 0x80); + } + if constexpr (N > 5) { + x = (x >> 7) - 1; + out[N - 6] = static_cast((x & 0x7F) | 0x80); + } + if constexpr (N > 6) { + x = (x >> 7) - 1; + out[N - 7] = static_cast((x & 0x7F) | 0x80); + } + if constexpr (N > 7) { + x = (x >> 7) - 1; + out[N - 8] = static_cast((x & 0x7F) | 0x80); + } + } + os.write(std::as_bytes(std::span{out})); +} + template void WriteVarInt(Stream& os, I n) { @@ -474,6 +516,18 @@ void WriteVarInt(Stream& os, I n) ser_writedata8(os, n); return; } + if (n <= 0x1020407F) { + if (n <= 0x407F) { + WriteVarIntFixed<2>(os, n); + return; + } + if (n <= 0x20407F) { + WriteVarIntFixed<3>(os, n); + return; + } + WriteVarIntFixed<4>(os, n); + return; + } unsigned char tmp[(sizeof(n) * 8 + 6) / 7]; size_t pos = std::size(tmp); tmp[--pos] = n & 0x7F; From bc0aab637c40bf3d283b7909696fd88744d8f084 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Tue, 17 Feb 2026 20:50:03 +0000 Subject: [PATCH 13/15] serialize: fast-path 5-byte varint writes Bench: /tmp/ab_bench_score.py ab72b_writevarint_fastpath_5byte_p24 --pairs 24 score geomean_ns median: 130.459794148 -> 128.699255502 (-1.35%) serialize_perf geomean_ns median: 7.129743280 -> 6.975918082 (-2.16%) assoc_rw geomean_ns median: 43707.674388987 -> 43725.749214815 (+0.04%) --- src/serialize.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/serialize.h b/src/serialize.h index 082326176176..22bc8e5d85de 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -528,6 +528,10 @@ void WriteVarInt(Stream& os, I n) WriteVarIntFixed<4>(os, n); return; } + if (n <= 0x81020407FULL) { + WriteVarIntFixed<5>(os, n); + return; + } unsigned char tmp[(sizeof(n) * 8 + 6) / 7]; size_t pos = std::size(tmp); tmp[--pos] = n & 0x7F; From 0961f31299be81c05507a536d02bb1d341e06c35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Wed, 18 Feb 2026 16:19:32 +0000 Subject: [PATCH 14/15] serialize: fast-path 6-byte varint writes Bench: /tmp/ab_bench_score.py ab73b_writevarint_fastpath_6byte_p24 --pairs 24 score geomean_ns median: 128.608289348 -> 126.638221393 (-1.53%) serialize_perf geomean_ns median: 6.975603185 -> 6.811420432 (-2.35%) assoc_rw geomean_ns median: 43707.099246200 -> 43745.066824529 (+0.09%) # Conflicts: # src/serialize.h # Please enter the commit message for your changes. Lines starting # with '#' will be ignored, and an empty message aborts the commit. # # interactive rebase in progress; onto 2069ee5b87 # Last commands done (3 commands done): # edit 70ac3174a9 serialize: fast-path 5-byte varint writes # edit 651423670b serialize: fast-path 6-byte varint writes # Next commands to do (5 remaining commands): # edit 05677d5bd5 serialize: inline and fast-path 7-byte varint writes # edit a2b0033b57 serialize: fast-path 8-byte varint writes # You are currently rebasing branch 'codex/pr31868-serialize-opt' on '2069ee5b87'. # # Changes to be committed: # modified: src/serialize.h # # Untracked files: # baseline-serialize-2.json # baseline-serialize-suite.json # baseline-serialize.json # digest_fit.py # output.log # result-rpi5-16-2-clang.txt # result-rpi5-16-2-gcc.txt # test/cache/ # --- src/serialize.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/serialize.h b/src/serialize.h index 22bc8e5d85de..8594abc8f1af 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -528,8 +528,12 @@ void WriteVarInt(Stream& os, I n) WriteVarIntFixed<4>(os, n); return; } - if (n <= 0x81020407FULL) { - WriteVarIntFixed<5>(os, n); + if (n <= 0x4081020407FULL) { + if (n <= 0x81020407FULL) { + WriteVarIntFixed<5>(os, n); + return; + } + WriteVarIntFixed<6>(os, n); return; } unsigned char tmp[(sizeof(n) * 8 + 6) / 7]; From e4d42b2eb54272bf505b6a29d6e7d50bf5fc8175 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C5=91rinc?= Date: Tue, 17 Feb 2026 21:35:45 +0000 Subject: [PATCH 15/15] serialize: inline and fast-path 7-byte varint writes Bench: /tmp/ab_bench_score.py ab76b_writevarint_fastpath_7_always_inline_p24 --pairs 24 score geomean_ns median: 126.693922531 -> 123.529060714 (-2.50%) serialize_perf geomean_ns median: 6.821017413 -> 6.570016636 (-3.68%) assoc_rw geomean_ns median: 43706.512507994 -> 43743.488301889 (+0.08%) --- src/serialize.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/serialize.h b/src/serialize.h index 8594abc8f1af..c946f4e90782 100644 --- a/src/serialize.h +++ b/src/serialize.h @@ -506,7 +506,7 @@ ALWAYS_INLINE void WriteVarIntFixed(Stream& os, I n) } template -void WriteVarInt(Stream& os, I n) +ALWAYS_INLINE void WriteVarInt(Stream& os, I n) { if constexpr (ContainsSizeComputer) { os.GetStream().seek(GetSizeOfVarInt(n)); @@ -536,6 +536,10 @@ void WriteVarInt(Stream& os, I n) WriteVarIntFixed<6>(os, n); return; } + if (n <= 0x204081020407FULL) { + WriteVarIntFixed<7>(os, n); + return; + } unsigned char tmp[(sizeof(n) * 8 + 6) / 7]; size_t pos = std::size(tmp); tmp[--pos] = n & 0x7F;