diff --git a/src/BitSoft.BinaryTools.Tests/Patch/Adler32RollingHash.cs b/src/BitSoft.BinaryTools.Tests/Patch/Adler32RollingHash.cs new file mode 100644 index 0000000..9544894 --- /dev/null +++ b/src/BitSoft.BinaryTools.Tests/Patch/Adler32RollingHash.cs @@ -0,0 +1,56 @@ +using System; + +public class Adler32RollingHash +{ + private const uint ModAdler = 65521; + private uint _s1 = 1; + private uint _s2 = 0; + private readonly int _windowSize; + + public Adler32RollingHash(int windowSize) + { + if (windowSize <= 0) + throw new ArgumentException("Window size must be positive.", nameof(windowSize)); + _windowSize = windowSize; + } + + public void CalculateInitialHash(ReadOnlySpan data) + { + if (data.Length != _windowSize) + throw new ArgumentException("Initial data length must match window size."); + + _s1 = 1; _s2 = 0; + int len = data.Length; int i = 0; + while (len > 0) + { + int k = Math.Min(len, 3800); len -= k; + while (k-- > 0) { _s1 += data[i++]; _s2 += _s1; } + _s1 %= ModAdler; _s2 %= ModAdler; + } + } + + public void Roll(byte byteOut, byte byteIn) + { + var s1_new = (int)_s1 - byteOut + byteIn; + _s1 = (uint)((s1_new % ModAdler + ModAdler) % ModAdler); + + long diff = (long)_windowSize * byteOut; + long tempS2 = (long)_s2 - diff + (long)_s1 - 1; + _s2 = (uint)((tempS2 % ModAdler + ModAdler) % ModAdler); + } + + public uint Checksum => (_s2 << 16) | _s1; + + public static uint CalculateFullChecksum(ReadOnlySpan data) + { + // Simple, non-optimized full checksum for verification + uint s1 = 1; + uint s2 = 0; + foreach (byte b in data) + { + s1 = (s1 + b) % ModAdler; + s2 = (s2 + s1) % ModAdler; + } + return (s2 << 16) | s1; + } +} diff --git a/src/BitSoft.BinaryTools.Tests/Patch/Adler32RollingHashTests.cs b/src/BitSoft.BinaryTools.Tests/Patch/Adler32RollingHashTests.cs new file mode 100644 index 0000000..cd6436c --- /dev/null +++ b/src/BitSoft.BinaryTools.Tests/Patch/Adler32RollingHashTests.cs @@ -0,0 +1,31 @@ +using System; + +public class Adler32RollingHashTests_Scalability +{ + [Test] + public void TestRollingHashWithMegaByteDataBuffer() + { + int Megabyte = 1024 * 1024; + int bufferSize = 1 * Megabyte; + int windowSize = 4096; + + var data = new byte[bufferSize]; + Random.Shared.NextBytes(data); + + Adler32RollingHash rollingHash = new (windowSize); + rollingHash.CalculateInitialHash(data.AsSpan(start: 0, length: windowSize)); + + for (int i = 0; i < data.Length - windowSize; i++) + { + byte byteOut = data[i]; + byte byteIn = data[i + windowSize]; + + rollingHash.Roll(byteOut, byteIn); + + var span = data.AsSpan(start: i + 1, length: windowSize); + + var expectedChecksum = Adler32RollingHash.CalculateFullChecksum(span); + Assert.That(expectedChecksum, Is.EqualTo(rollingHash.Checksum)); + } + } +} diff --git a/src/BitSoft.BinaryTools.Tests/Patch/BinaryPatchTests.cs b/src/BitSoft.BinaryTools.Tests/Patch/BinaryPatchTests.cs index 8ab4ceb..2b5895d 100644 --- a/src/BitSoft.BinaryTools.Tests/Patch/BinaryPatchTests.cs +++ b/src/BitSoft.BinaryTools.Tests/Patch/BinaryPatchTests.cs @@ -124,11 +124,9 @@ await BinaryPatch.CreateAsync( } [Ignore("Performance test")] - [TestCase(1024 * 1024, 512, 0, 128)] - [TestCase(1024 * 1024, 512, 1, 128)] - [TestCase(1024 * 1024, 512, 5, 128)] - [TestCase(1024 * 1024, 1024, 5, 128)] - [TestCase(1024 * 1024, 4096, 5, 128)] + [TestCase(1024 * 1024, 256, 0, 128)] + [TestCase(1024 * 1024, 256, 1, 128)] + [TestCase(1024 * 1024, 256, 5, 128)] public async Task Should_CreatePatch(int bufferLength, int blockSize, int changedBlocks, int changeSize) { // Arrange diff --git a/src/BitSoft.BinaryTools.Tests/Patch/RollingHashTests.cs b/src/BitSoft.BinaryTools.Tests/Patch/RollingHashTests.cs new file mode 100644 index 0000000..385dcda --- /dev/null +++ b/src/BitSoft.BinaryTools.Tests/Patch/RollingHashTests.cs @@ -0,0 +1,49 @@ +using BitSoft.BinaryTools.Patch; + +namespace BitSoft.BinaryTools.Tests.Patch; + +using System; + +[TestFixture] +public sealed class RollingHashTests +{ + [TestCase(1024, 32)] + [TestCase(1024, 64)] + [TestCase(1024, 128)] + [TestCase(1024, 256)] + [TestCase(1024, 512)] + [TestCase(1024 * 1024, 512)] + [TestCase(1024 * 1024, 1024)] + public void Should_CalculateHash(int bufferLength, int bufferSize) + { + // Arrange + var buffer = new byte[bufferLength]; + + Random.Shared.NextBytes(buffer); + + // Act & Assert + var initialSpan = buffer.AsSpan(start: 0, length: bufferSize); + var rollingHash = RollingHash.Create(initialSpan); + + for (var i = 0; i < bufferLength - bufferSize; i++) + { + var span = buffer.AsSpan(start: i, length: bufferSize); + + var spanHash = RollingHash.Create(span); + + Assert.That( + actual: rollingHash.GetChecksum(), + expression: Is.EqualTo(spanHash.GetChecksum()), + message: $"Failed as position '{i}'" + ); + + if (i < bufferLength - bufferSize - 1) + { + var oldByte = buffer[i]; + var newByte = buffer[i + bufferSize]; + + rollingHash.Update(removed: oldByte, added: newByte); + } + } + } +} diff --git a/src/BitSoft.BinaryTools/Patch/RollingHash.cs b/src/BitSoft.BinaryTools/Patch/RollingHash.cs index 4363f11..87dc4c6 100644 --- a/src/BitSoft.BinaryTools/Patch/RollingHash.cs +++ b/src/BitSoft.BinaryTools/Patch/RollingHash.cs @@ -6,11 +6,11 @@ public struct RollingHash { private const uint Base = 65521; - private uint _a; - private uint _b; - private readonly uint _length; + private long _a; + private long _b; + private readonly long _length; - private RollingHash(uint a, uint b, uint length) + private RollingHash(long a, long b, int length) { _a = a; _b = b; @@ -19,25 +19,31 @@ private RollingHash(uint a, uint b, uint length) public static RollingHash Create(ReadOnlySpan data) { - uint a = 1; - uint b = 0; + long a = 1; + long b = 0; - for (var i = 0; i < data.Length; i++) + foreach (var value in data) { - var value = data[i]; - a = (a + value) % Base; b = (b + a) % Base; } - return new RollingHash(a: a, b: b, length: (uint)data.Length); + return new RollingHash(a: a, b: b, length: data.Length); } public void Update(byte removed, byte added) { - _a = (_a - removed + added) % Base; - _b = (_b - _length * removed + _a - 1) % Base; + // Use int for calculations within s1 update + var s1_new = _a - removed + added; + // Correct potential negative result back into positive range before modulo + _a = (s1_new % Base + Base) % Base; + + // Use long for calculations within s2 update to handle large windowSize * byteOut + var tempS2 = _b - _length * removed + _a - 1; + + // Correct potential negative result back into positive range before modulo + _b = (tempS2 % Base + Base) % Base; } - public uint GetChecksum() => (_b << 16) | _a; + public uint GetChecksum() => (uint) ((_b << 16) | _a); }