diff --git a/Directory.Packages.props b/Directory.Packages.props index 4c05ccc..a926e67 100644 --- a/Directory.Packages.props +++ b/Directory.Packages.props @@ -1,5 +1,6 @@  + diff --git a/src/BitSoft.BinaryTools.Benchmarks/BinaryPatchBenchmark.cs b/src/BitSoft.BinaryTools.Benchmarks/BinaryPatchBenchmark.cs new file mode 100644 index 0000000..6c4729a --- /dev/null +++ b/src/BitSoft.BinaryTools.Benchmarks/BinaryPatchBenchmark.cs @@ -0,0 +1,73 @@ +using System; +using System.IO; +using System.Threading.Tasks; +using BenchmarkDotNet.Attributes; +using BitSoft.BinaryTools.Patch; + +namespace BitSoft.BinaryTools.Benchmarks; + +[ShortRunJob] +[MemoryDiagnoser] +public class BinaryPatchBenchmark +{ + private byte[]? _source; + private byte[]? _modified; + + private Stream? _sourceStream; + private Stream? _modifiedStream; + private Stream? _patchStream; + + [Params(1024 * 1024, 10 * 1024 * 1024)] + public int BufferLength { get; set; } + + [Params(5)] + public int ChangedBlocks { get; set; } + + [Params(512)] public int ChangeSize { get; set; } + + [Params(1024, 4096)] public int BlockSize { get; set; } + + [IterationSetup] + public void GlobalSetUp() + { + _source = new byte[BufferLength]; + _modified = new byte[BufferLength]; + + Random.Shared.NextBytes(_source); + + Array.Copy(sourceArray: _source, destinationArray: _modified, length: _source.Length); + + var changeBlockSize = _source.Length / (ChangedBlocks + 1); + for (var b = 1; b <= ChangedBlocks; b++) + { + var position = changeBlockSize * b; + + var span = _modified.AsSpan(start: position, length: ChangeSize); + + Random.Shared.NextBytes(span); + } + + _sourceStream = new MemoryStream(_source); + _modifiedStream = new MemoryStream(_modified); + _patchStream = new MemoryStream(); + } + + [IterationCleanup] + public void Cleanup() + { + _sourceStream?.Dispose(); + _modifiedStream?.Dispose(); + _patchStream?.Dispose(); + } + + [Benchmark] + public async Task CreateBinaryPatch() + { + await BinaryPatch.CreateAsync( + source: _sourceStream!, + modified: _modifiedStream!, + output: _patchStream!, + blockSize: BlockSize + ); + } +} diff --git a/src/BitSoft.BinaryTools.Benchmarks/BitSoft.BinaryTools.Benchmarks.csproj b/src/BitSoft.BinaryTools.Benchmarks/BitSoft.BinaryTools.Benchmarks.csproj new file mode 100644 index 0000000..48507eb --- /dev/null +++ b/src/BitSoft.BinaryTools.Benchmarks/BitSoft.BinaryTools.Benchmarks.csproj @@ -0,0 +1,17 @@ + + + + Exe + net8.0 + false + + + + + + + + + + + diff --git a/src/BitSoft.BinaryTools.Benchmarks/Program.cs b/src/BitSoft.BinaryTools.Benchmarks/Program.cs new file mode 100644 index 0000000..ef77935 --- /dev/null +++ b/src/BitSoft.BinaryTools.Benchmarks/Program.cs @@ -0,0 +1,12 @@ +using System.Reflection; +using BenchmarkDotNet.Running; + +namespace BitSoft.BinaryTools.Benchmarks; + +class Program +{ + static void Main(string[] args) + { + new BenchmarkSwitcher(typeof(Program).GetTypeInfo().Assembly).Run(args); + } +} diff --git a/src/BitSoft.BinaryTools.Benchmarks/Readme.md b/src/BitSoft.BinaryTools.Benchmarks/Readme.md new file mode 100644 index 0000000..ec982a0 --- /dev/null +++ b/src/BitSoft.BinaryTools.Benchmarks/Readme.md @@ -0,0 +1,31 @@ +# Benchmarks + +``` +| Method | BufferLength | ChangedBlocks | ChangeSize | BlockSize | Mean | Error | StdDev | Allocated | +|------------------ |------------- |-------------- |----------- |---------- |----------:|---------:|---------:|----------:| +| CreateBinaryPatch | 1048576 | 5 | 512 | 1024 | 36.85 ms | 10.66 ms | 0.584 ms | 2.3 MB | +| CreateBinaryPatch | 1048576 | 5 | 512 | 4096 | 37.47 ms | 11.53 ms | 0.632 ms | 2.19 MB | +| CreateBinaryPatch | 10485760 | 5 | 512 | 1024 | 397.60 ms | 28.31 ms | 1.552 ms | 34.86 MB | +| CreateBinaryPatch | 10485760 | 5 | 512 | 4096 | 376.37 ms | 58.44 ms | 3.203 ms | 32.71 MB | +``` +## Legends +``` + BufferLength : Value of the 'BufferLength' parameter + ChangedBlocks : Value of the 'ChangedBlocks' parameter + ChangeSize : Value of the 'ChangeSize' parameter + BlockSize : Value of the 'BlockSize' parameter + Mean : Arithmetic mean of all measurements + Error : Half of 99.9% confidence interval + StdDev : Standard deviation of all measurements + Allocated : Allocated memory per single operation (managed only, inclusive, 1KB = 1024B) + 1 us : 1 Microsecond (0.000001 sec) +``` + +## Additional info +``` +BenchmarkDotNet v0.15.6, macOS 26.1 (25B78) [Darwin 25.1.0] +Apple M1 Pro, 1 CPU, 10 logical and 10 physical cores +.NET SDK 10.0.100 + [Host] : .NET 8.0.22 (8.0.22, 8.0.2225.52707), Arm64 RyuJIT armv8.0-a + ShortRun : .NET 8.0.22 (8.0.22, 8.0.2225.52707), Arm64 RyuJIT armv8.0-a +``` diff --git a/src/BitSoft.BinaryTools.Benchmarks/Utils/Create.cs b/src/BitSoft.BinaryTools.Benchmarks/Utils/Create.cs new file mode 100644 index 0000000..e5f08f0 --- /dev/null +++ b/src/BitSoft.BinaryTools.Benchmarks/Utils/Create.cs @@ -0,0 +1,11 @@ +using System; + +namespace BitSoft.BinaryTools.Benchmarks.Utils; + +public static class Create +{ + public static void RandomData(Span buffer) + { + Random.Shared.NextBytes(buffer); + } +} diff --git a/src/BitSoft.BinaryTools.Tests/Patch/BinaryPatchTests.cs b/src/BitSoft.BinaryTools.Tests/Patch/BinaryPatchTests.cs index d59e315..26e44d8 100644 --- a/src/BitSoft.BinaryTools.Tests/Patch/BinaryPatchTests.cs +++ b/src/BitSoft.BinaryTools.Tests/Patch/BinaryPatchTests.cs @@ -1,4 +1,6 @@ +using System; using System.Collections.Generic; +using System.Diagnostics; using System.IO; using System.Threading.Tasks; using BitSoft.BinaryTools.Patch; @@ -11,10 +13,70 @@ public class BinaryPatchTests private static IEnumerable TestCases() { yield return new TestCaseData( - new byte[] { 0x0, 0x1, 0x0, 0x1, 0x0 }, - new byte[] { 0x0, 0x0, 0x1, 0x0, 0x0 }, + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }, + new byte[] { 0x1, 0x1, 0x2, 0x3, 0x4 }, 2 ); + yield return new TestCaseData( + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }, + new byte[] { 0x1, 0x1, 0x2, 0x3, 0x4 }, + 3 + ); + yield return new TestCaseData( + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }, + new byte[] { 0x1, 0x2, 0x3, 0x4 }, + 3 + ); + yield return new TestCaseData( + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }, + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6 }, + 3 + ); + yield return new TestCaseData( + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }, + new byte[] { 0x1, 0x7, 0x3, 0x4, 0x5, 0x6 }, + 3 + ); + yield return new TestCaseData( + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }, + new byte[] { 0x1, 0x7, 0x4, 0x5, 0x6 }, + 3 + ); + yield return new TestCaseData( + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }, + new byte[] { 0x1, 0x7, 0x8, 0x9, 0x2 }, + 3 + ); + yield return new TestCaseData( + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }, + new byte[] { 0x1, 0x2, 0x3, 0x9, 0x9 }, + 3 + ); + yield return new TestCaseData( + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }, + new byte[] { 0x1, 0x2, 0x9, 0x9, 0x9 }, + 3 + ); + yield return new TestCaseData( + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 }, + new byte[] { 0x9, 0x9, 0x1, 0x2, 0x3 }, + 3 + ); + yield return new TestCaseData( + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8 }, + new byte[] { 0x1, 0x2, 0x9, 0x4, 0x5, 0x6, 0x7, 0x8 }, + 3 + ); + yield return new TestCaseData( + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC }, + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5, 0xA, 0xA, 0xA, 0xA, 0xB, 0xA, 0xC }, + 3 + ); + yield return new TestCaseData( + new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9 }, + new byte[] { 0x1, 0x2, 0xA, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9 }, + 4 + ); yield return new TestCaseData( new byte[] { 0x0, 0x1 }, new byte[] { 0x0 }, @@ -60,4 +122,64 @@ await BinaryPatch.CreateAsync( Assert.That(patched.Length, Is.EqualTo(modified.Length)); Assert.That(patched, Is.EqualTo(modified)); } -} \ No newline at end of file + + [Ignore("Performance test")] + [TestCase(3 * 4, 4, 2, 2)] + [TestCase(100 * 4 * 4, 4, 5, 6)] + public async Task Should_CreatePatch(int bufferLength, int blockSize, int changedBlocks, int changeSize) + { + // Arrange + var source = new byte[bufferLength]; + var modified = new byte[bufferLength]; + + Random.Shared.NextBytes(source); + + Array.Copy(sourceArray: source, destinationArray: modified, length: source.Length); + + var changeBlockSize = source.Length / (changedBlocks + 1); + + for (var b = 1; b <= changedBlocks; b++) + { + var position = changeBlockSize * b; + + var span = modified.AsSpan(start: position, length: changeSize); + + Random.Shared.NextBytes(span); + } + + using var sourceStream = new MemoryStream(source); + using var modifiedStream = new MemoryStream(modified); + using var patchStream = new MemoryStream(); + + // Act + var stopwatch = Stopwatch.StartNew(); + + await BinaryPatch.CreateAsync( + source: sourceStream, + modified: modifiedStream, + output: patchStream, + blockSize: blockSize + ); + + stopwatch.Stop(); + + // Assert + Console.WriteLine("Source length: {0}", sourceStream.Length); + Console.WriteLine("Block size: {0}", blockSize); + Console.WriteLine("Patch length: {0}", patchStream.Position); + Console.WriteLine("Create time: {0:g}", stopwatch.Elapsed); + + sourceStream.Position = 0; + patchStream.Position = 0; + + using var patchedStream = new MemoryStream(); + + stopwatch.Restart(); + await BinaryPatch.ApplyAsync(source: sourceStream, patch: patchStream, output: patchedStream); + stopwatch.Stop(); + + Console.WriteLine("Apply time: {0:g}", stopwatch.Elapsed); + + Assert.That(patchedStream.ToArray(), Is.EqualTo(modified)); + } +} diff --git a/src/BitSoft.BinaryTools.Tests/Patch/StreamWindowReaderTests.cs b/src/BitSoft.BinaryTools.Tests/Patch/StreamWindowReaderTests.cs new file mode 100644 index 0000000..72da6c9 --- /dev/null +++ b/src/BitSoft.BinaryTools.Tests/Patch/StreamWindowReaderTests.cs @@ -0,0 +1,127 @@ +using System; +using System.Buffers; +using System.IO; +using System.Threading; +using System.Threading.Tasks; +using BitSoft.BinaryTools.Patch; + +namespace BitSoft.BinaryTools.Tests.Patch; + +[TestFixture] +public class StreamWindowReaderTests +{ + [Test] + public async Task Should_ReturnInitialWindow() + { + // Arrange + var source = new byte[] { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6 }; + using var sourceStream = new MemoryStream(source); + + // Act + using var reader = new StreamWindowReader(sourceStream, ArrayPool.Shared, windowSize: 2); + + await reader.MoveAsync(CancellationToken.None); + + // Assert + Assert.That(reader.Window.Length, Is.EqualTo(2)); + Assert.That(reader.Window.ToArray(), Is.EqualTo(source.AsMemory(start: 0, length: 2).ToArray()).AsCollection); + } + + [Test] + public async Task Should_ReturnMovedWindow() + { + // Arrange + var source = new byte[] { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6 }; + using var sourceStream = new MemoryStream(source); + + // Act + using var reader = new StreamWindowReader(sourceStream, ArrayPool.Shared, windowSize: 2); + + await reader.MoveAsync(CancellationToken.None); // 0 + await reader.MoveAsync(CancellationToken.None); // 1 + + // Assert + Assert.That(reader.Window.Length, Is.EqualTo(2)); + Assert.That(reader.Window.ToArray(), Is.EqualTo(source.AsMemory(start: 1, length: 2).ToArray()).AsCollection); + } + + [Test] + public async Task Should_ReturnMovedWindow_When_Overlapped() + { + // Arrange + var source = new byte[] { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6 }; + using var sourceStream = new MemoryStream(source); + + // Act + using var reader = new StreamWindowReader(sourceStream, ArrayPool.Shared, windowSize: 2); + + await reader.MoveAsync(CancellationToken.None); // 0 + await reader.MoveAsync(CancellationToken.None); // 1 + await reader.MoveAsync(CancellationToken.None); // 2 + await reader.MoveAsync(CancellationToken.None); // 3 + + // Assert + Assert.That(reader.Window.Length, Is.EqualTo(2)); + Assert.That(reader.Window.ToArray(), Is.EqualTo(source.AsMemory(start: 3, length: 2).ToArray()).AsCollection); + } + + [Test] + public async Task Should_ReturnMovedWindow_When_Pinned() + { + // Arrange + var source = new byte[] { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6 }; + using var sourceStream = new MemoryStream(source); + + // Act + using var reader = new StreamWindowReader(sourceStream, ArrayPool.Shared, windowSize: 2); + + await reader.MoveAsync(CancellationToken.None); // 0 + reader.PinPosition(); + await reader.MoveAsync(CancellationToken.None); // 1 + + // Assert + Assert.That(reader.Window.Length, Is.EqualTo(2)); + Assert.That(reader.PinnedWindow.ToArray(), + Is.EqualTo(source.AsMemory(start: 0, length: 1).ToArray()).AsCollection); + } + + [Test] + public async Task Should_ReturnMovedWindow_When_FinalBlockIsShort() + { + // Arrange + var source = new byte[] { 0x0, 0x1, 0x2, 0x3, 0x4 }; + using var sourceStream = new MemoryStream(source); + + // Act + using var reader = new StreamWindowReader(sourceStream, ArrayPool.Shared, windowSize: 2); + + await reader.MoveAsync(CancellationToken.None); // 0 + await reader.MoveAsync(CancellationToken.None); // 1 + await reader.MoveAsync(CancellationToken.None); // 2 + await reader.MoveAsync(CancellationToken.None); // 3 + await reader.MoveAsync(CancellationToken.None); // 4 + + // Assert + Assert.That(reader.Window.Length, Is.EqualTo(1)); + Assert.That(reader.Window.ToArray(), Is.EqualTo(source.AsMemory(start: 4, length: 1).ToArray()).AsCollection); + } + + [Test] + public async Task Should_ReturnMovedWindow_When_ReachEnd() + { + // Arrange + var source = new byte[] { 0x0, 0x1, 0x2 }; + using var sourceStream = new MemoryStream(source); + + // Act + using var reader = new StreamWindowReader(sourceStream, ArrayPool.Shared, windowSize: 2); + + await reader.MoveAsync(CancellationToken.None); // 0 + await reader.MoveAsync(CancellationToken.None); // 1 + await reader.MoveAsync(CancellationToken.None); // 2 + var result = await reader.MoveAsync(CancellationToken.None); + + // Assert + Assert.That(result, Is.False); + } +} diff --git a/src/BitSoft.BinaryTools.sln b/src/BitSoft.BinaryTools.sln index 90fe38e..9c1ff12 100644 --- a/src/BitSoft.BinaryTools.sln +++ b/src/BitSoft.BinaryTools.sln @@ -19,6 +19,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "workflows", "workflows", "{ ..\.github\workflows\dotnet.yml = ..\.github\workflows\dotnet.yml EndProjectSection EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BitSoft.BinaryTools.Benchmarks", "BitSoft.BinaryTools.Benchmarks\BitSoft.BinaryTools.Benchmarks.csproj", "{9C7C350F-7CD9-47C4-A3D3-64E9F75E5B18}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -33,6 +35,10 @@ Global {BE7275D4-BCC8-4E39-9DDC-6FE0426D650C}.Debug|Any CPU.Build.0 = Debug|Any CPU {BE7275D4-BCC8-4E39-9DDC-6FE0426D650C}.Release|Any CPU.ActiveCfg = Release|Any CPU {BE7275D4-BCC8-4E39-9DDC-6FE0426D650C}.Release|Any CPU.Build.0 = Release|Any CPU + {9C7C350F-7CD9-47C4-A3D3-64E9F75E5B18}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9C7C350F-7CD9-47C4-A3D3-64E9F75E5B18}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9C7C350F-7CD9-47C4-A3D3-64E9F75E5B18}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9C7C350F-7CD9-47C4-A3D3-64E9F75E5B18}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(NestedProjects) = preSolution {2C6A0F9D-48B5-40FD-A0FF-45ECA8034BB9} = {1B0803C7-C282-44FB-B1C9-6199FD6E1122} diff --git a/src/BitSoft.BinaryTools/Patch/BinaryPatch.cs b/src/BitSoft.BinaryTools/Patch/BinaryPatch.cs index 3bbd568..f4f45d7 100644 --- a/src/BitSoft.BinaryTools/Patch/BinaryPatch.cs +++ b/src/BitSoft.BinaryTools/Patch/BinaryPatch.cs @@ -26,133 +26,100 @@ public static async ValueTask CreateAsync( if (!output.CanWrite) throw new ArgumentException($"{nameof(output)} does not support writing.", nameof(output)); - var blockInfoContainer = await CalculateHashesAsync(source, blockSize, cancellationToken); + using var hashCalculator = new HashCalculator(); - using var writer = new PatchWriter(output); + var blockInfoContainer = await CalculateHashesAsync(source, hashCalculator, blockSize, cancellationToken); + using var reader = new StreamWindowReader(modified, Pool, windowSize: blockSize); + using var writer = new PatchWriter(output); await writer.WriteHeaderAsync(blockSize: blockSize, cancellationToken); - var bufferLength = blockSize * 2; - var buffer = Pool.Rent(minimumLength: bufferLength); - try + if (!await reader.MoveAsync(cancellationToken)) { - var length = await modified.ReadAsync(buffer.AsMemory(start: 0, length: bufferLength), cancellationToken); - if (length == 0) - return; + await writer.CompleteAsync(cancellationToken); + return; + } - const int NotDefined = -1; + RollingHash rollingHash = default; + var resetHash = true; - var segmentStart = NotDefined; - var position = 0; + while (true) + { + if (resetHash) + { + rollingHash = RollingHash.Create(reader.Window.Span); + resetHash = false; + } - RollingHash rollingHash = default; - var resetHash = true; + var block = blockInfoContainer.Match(rollingHash, reader.Window.Span); - while (true) + if (block is null) { - while (position < length) + if (!reader.IsPinned) + { + reader.PinPosition(); + } + + if (reader.Finished) { - if (resetHash) - { - var spanLength = Math.Min(blockSize, length); - var bufferSpan = buffer.AsSpan(start: 0, length: spanLength); - rollingHash = RollingHash.Create(bufferSpan); - resetHash = false; - } - - var block = blockInfoContainer.Match(rollingHash); - - if (block is null) - { - if (length <= blockSize) - { - var memory = buffer.AsMemory(start: position, length: length); - await writer.WriteDataAsync(memory, cancellationToken); - position = 0; - break; - } - - if (segmentStart == NotDefined) - { - segmentStart = position; - } - else if (position - segmentStart + 1 == blockSize) - { - var memory = buffer.AsMemory(start: segmentStart, length: position - segmentStart + 1); - await writer.WriteDataAsync(memory, cancellationToken); - - buffer - .AsSpan(start: position + 1, length: bufferLength - position - 2) - .CopyTo(buffer.AsSpan(start: 0)); - - segmentStart = NotDefined; - resetHash = true; - - break; - } - - position += 1; - - if (position == length) - { - var memory = buffer.AsMemory(start: segmentStart, length: position - segmentStart); - await writer.WriteDataAsync(memory, cancellationToken); - position = 0; - break; - } - - if (position + blockSize < length) - { - var removedByte = buffer[position - 1]; - var addedByte = buffer[position + blockSize - 1]; - rollingHash.Update(removed: removedByte, added: addedByte); - } - else - { - resetHash = true; - } - } + if (reader.IsPinned) + await writer.WriteDataAsync(reader.PinnedWindowWithCurrent, cancellationToken); else - { - if (segmentStart != NotDefined) - { - var memory = buffer.AsMemory(start: segmentStart, length: position - segmentStart); - await writer.WriteDataAsync(memory, cancellationToken); - segmentStart = NotDefined; - } - - await writer.WriteCopyAsync( - blockIndex: block.BlockIndex, - blockLength: block.Length, - cancellationToken: cancellationToken - ); - - buffer - .AsSpan(start: position + block.Length, length: bufferLength - position - block.Length - 1) - .CopyTo(buffer.AsSpan(start: 0)); - - resetHash = true; - - break; - } + await writer.WriteDataAsync(reader.Window, cancellationToken); + break; } - length = await modified.ReadAsync( - buffer.AsMemory(start: position, length: bufferLength - position - 1), - cancellationToken: cancellationToken - ); + if (reader.PinnedWindowWithCurrent.Length == blockSize) + { + await writer.WriteDataAsync(reader.PinnedWindowWithCurrent, cancellationToken); + reader.ResetPinnedPosition(); + } - length += position; - position = 0; + var firstByte = reader.Window.Span[0]; + if (await reader.MoveAsync(cancellationToken)) + { + var newByte = reader.Window.Span[reader.Window.Length - 1]; + rollingHash.Update(removed: firstByte, added: newByte); + } + else + { + break; + } + } + else + { + if (reader.IsPinned) + { + await writer.WriteDataAsync(reader.PinnedWindow, cancellationToken); + reader.ResetPinnedPosition(); + } - if (length == 0) + if (block is PatchBlockInfoWithLength blockInfoWithLength) + { + await writer.WriteCopyBlockWithLengthAsync( + blockIndex: block.BlockIndex, + blockLength: blockInfoWithLength.Length, + cancellationToken: cancellationToken + ); + } + else + { + await writer.WriteCopyBlockAsync( + blockIndex: block.BlockIndex, + cancellationToken: cancellationToken + ); + } + + if (await reader.SlideWindowAsync(cancellationToken)) + { + resetHash = true; + } + else + { break; + } } } - finally - { - Pool.Return(buffer); - } await writer.CompleteAsync(cancellationToken); } @@ -186,40 +153,57 @@ public static async ValueTask ApplyAsync( case DataPatchSegment dataPatchSegment: await output.WriteAsync(dataPatchSegment.Data, cancellationToken); break; - case CopyPatchSegment copyPatchSegment: - var targetPosition = blockSize * copyPatchSegment.BlockIndex; - source.Seek(targetPosition, SeekOrigin.Begin); - var buffer = Pool.Rent(copyPatchSegment.BlockLength); - try - { - var memory = buffer.AsMemory(start: 0, length: copyPatchSegment.BlockLength); - var count = await source.ReadAsync(memory, cancellationToken); - if (count != copyPatchSegment.BlockLength) throw new InvalidOperationException(); - await output.WriteAsync(memory, cancellationToken); - } - finally - { - Pool.Return(buffer); - } - + case CopyBlockSegment copyBlockSegment: + await CopyBlockSegmentAsync( + blockIndex: copyBlockSegment.BlockIndex, + blockLength: blockSize + ); + break; + case CopyBlockWithLengthSegment copyPatchSegment: + await CopyBlockSegmentAsync( + blockIndex: copyPatchSegment.BlockIndex, + blockLength: copyPatchSegment.BlockLength + ); break; default: throw new NotSupportedException(); } + + continue; + + async ValueTask CopyBlockSegmentAsync(int blockIndex, int blockLength) + { + var targetPosition = blockSize * blockIndex; + source.Seek(targetPosition, SeekOrigin.Begin); + var buffer = Pool.Rent(blockLength); + try + { + var memory = buffer.AsMemory(start: 0, length: blockLength); + var count = await source.ReadAsync(memory, cancellationToken); + if (count != blockLength) throw new InvalidOperationException(); + await output.WriteAsync(memory, cancellationToken); + } + finally + { + Pool.Return(buffer); + } + } } } private static async ValueTask CalculateHashesAsync( Stream source, + HashCalculator hashCalculator, int blockSize, CancellationToken cancellationToken = default) { ArgumentNullException.ThrowIfNull(source); + ArgumentNullException.ThrowIfNull(hashCalculator); if (!source.CanRead) throw new ArgumentException("source stream must be readable.", nameof(source)); - var blockInfoContainer = new BlockInfoContainer(); + var blockInfoContainer = new BlockInfoContainer(hashCalculator); var blockIndex = 0; @@ -232,9 +216,19 @@ private static async ValueTask CalculateHashesAsync( if (length == 0) break; - var hash = RollingHash.Create(buffer.AsSpan(start: 0, length: length)); + var span = buffer.AsSpan(start: 0, length: length); + + var hash = RollingHash.Create(span); + var strongHash = hashCalculator.CalculatedHash(buffer, offset: 0, count: length); - blockInfoContainer.Process(hash: hash, blockIndex: blockIndex, blockLength: length); + if (length == blockSize) + { + blockInfoContainer.Process(blockIndex: blockIndex, hash: hash, strongHash: strongHash); + } + else + { + blockInfoContainer.Process(blockIndex: blockIndex, blockLength: length, hash: hash, strongHash); + } if (length < blockSize) break; diff --git a/src/BitSoft.BinaryTools/Patch/BlockInfoContainer.cs b/src/BitSoft.BinaryTools/Patch/BlockInfoContainer.cs index 37c4269..f264b8f 100644 --- a/src/BitSoft.BinaryTools/Patch/BlockInfoContainer.cs +++ b/src/BitSoft.BinaryTools/Patch/BlockInfoContainer.cs @@ -1,15 +1,39 @@ +using System; using System.Collections.Generic; namespace BitSoft.BinaryTools.Patch; internal sealed class BlockInfoContainer { + private readonly HashCalculator _hashCalculator; private readonly Dictionary> _hashes = new(); - public void Process(RollingHash hash, int blockIndex, int blockLength) + public BlockInfoContainer(HashCalculator hashCalculator) + { + _hashCalculator = hashCalculator ?? throw new ArgumentNullException(nameof(hashCalculator)); + } + + public void Process(int blockIndex, RollingHash hash, byte[] strongHash) + { + var checksum = hash.GetChecksum(); + var block = new PatchBlockInfo(blockIndex: blockIndex, hash: checksum, strongHash); + if (!_hashes.TryGetValue(checksum, out var blocks)) + { + _hashes[block.Hash] = blocks = []; + } + + blocks.Add(block); + } + + public void Process(int blockIndex, int blockLength, RollingHash hash, byte[] strongHash) { var checksum = hash.GetChecksum(); - var block = new PatchBlockInfo(blockIndex: blockIndex, hash: checksum, length: blockLength); + var block = new PatchBlockInfoWithLength( + blockIndex: blockIndex, + length: blockLength, + hash: checksum, + strongHash: strongHash + ); if (!_hashes.TryGetValue(checksum, out var blocks)) { _hashes[block.Hash] = blocks = []; @@ -18,17 +42,20 @@ public void Process(RollingHash hash, int blockIndex, int blockLength) blocks.Add(block); } - public PatchBlockInfo? Match(RollingHash hash) + public PatchBlockInfo? Match(RollingHash hash, ReadOnlySpan span) { var checksum = hash.GetChecksum(); if (_hashes.TryGetValue(checksum, out var blocks)) { + var strongHash = _hashCalculator.CalculatedHash(span); + foreach (var block in blocks) { - return block; + if (block.StrongHash.SequenceEqual(strongHash)) + return block; } } return null; } -} \ No newline at end of file +} diff --git a/src/BitSoft.BinaryTools/Patch/CopyBlockSegment.cs b/src/BitSoft.BinaryTools/Patch/CopyBlockSegment.cs new file mode 100644 index 0000000..b1f306a --- /dev/null +++ b/src/BitSoft.BinaryTools/Patch/CopyBlockSegment.cs @@ -0,0 +1,6 @@ +namespace BitSoft.BinaryTools.Patch; + +internal sealed class CopyBlockSegment(int blockIndex) : IPatchSegment +{ + public int BlockIndex { get; } = blockIndex; +} diff --git a/src/BitSoft.BinaryTools/Patch/CopyPatchSegment.cs b/src/BitSoft.BinaryTools/Patch/CopyBlockWithLengthSegment.cs similarity index 59% rename from src/BitSoft.BinaryTools/Patch/CopyPatchSegment.cs rename to src/BitSoft.BinaryTools/Patch/CopyBlockWithLengthSegment.cs index c42a4ff..b9fa375 100644 --- a/src/BitSoft.BinaryTools/Patch/CopyPatchSegment.cs +++ b/src/BitSoft.BinaryTools/Patch/CopyBlockWithLengthSegment.cs @@ -1,6 +1,6 @@ namespace BitSoft.BinaryTools.Patch; -internal sealed class CopyPatchSegment(int blockIndex, int blockLength) : IPatchSegment +internal sealed class CopyBlockWithLengthSegment(int blockIndex, int blockLength) : IPatchSegment { public int BlockIndex { get; } = blockIndex; diff --git a/src/BitSoft.BinaryTools/Patch/HashCalculator.cs b/src/BitSoft.BinaryTools/Patch/HashCalculator.cs new file mode 100644 index 0000000..3f3575b --- /dev/null +++ b/src/BitSoft.BinaryTools/Patch/HashCalculator.cs @@ -0,0 +1,35 @@ +using System; +using System.Buffers; +using System.Security.Cryptography; + +namespace BitSoft.BinaryTools.Patch; + +internal sealed class HashCalculator : IDisposable +{ + private static ArrayPool Pool { get; } = ArrayPool.Shared; + + private readonly MD5 _md5 = MD5.Create(); + private readonly byte[] _buffer = Pool.Rent(minimumLength: 16); + + public byte[] CalculatedHash(byte[] source, int offset, int count) + { + ArgumentNullException.ThrowIfNull(source); + + return _md5.ComputeHash(source, offset: offset, count: count); + } + + public ReadOnlySpan CalculatedHash(ReadOnlySpan source) + { + if (_md5.TryComputeHash(source: source, destination: _buffer, out var bytesWritten)) + { + return _buffer.AsSpan(start: 0, length: bytesWritten); + } + + throw new InvalidOperationException("Hash calculation failed."); + } + + public void Dispose() + { + _md5.Dispose(); + } +} diff --git a/src/BitSoft.BinaryTools/Patch/PatchBlockInfo.cs b/src/BitSoft.BinaryTools/Patch/PatchBlockInfo.cs index 92d5786..8db174e 100644 --- a/src/BitSoft.BinaryTools/Patch/PatchBlockInfo.cs +++ b/src/BitSoft.BinaryTools/Patch/PatchBlockInfo.cs @@ -1,10 +1,10 @@ namespace BitSoft.BinaryTools.Patch; -public sealed class PatchBlockInfo(int blockIndex, uint hash, int length) +internal class PatchBlockInfo(int blockIndex, uint hash, byte[] strongHash) { public int BlockIndex { get; } = blockIndex; public uint Hash { get; } = hash; - public int Length { get; } = length; + public byte[] StrongHash { get; } = strongHash; } diff --git a/src/BitSoft.BinaryTools/Patch/PatchBlockInfoWithLength.cs b/src/BitSoft.BinaryTools/Patch/PatchBlockInfoWithLength.cs new file mode 100644 index 0000000..46f8980 --- /dev/null +++ b/src/BitSoft.BinaryTools/Patch/PatchBlockInfoWithLength.cs @@ -0,0 +1,7 @@ +namespace BitSoft.BinaryTools.Patch; + +internal sealed class PatchBlockInfoWithLength(int blockIndex, uint hash, byte[] strongHash, int length) + : PatchBlockInfo(blockIndex, hash, strongHash) +{ + public int Length { get; } = length; +} diff --git a/src/BitSoft.BinaryTools/Patch/PatchReader.cs b/src/BitSoft.BinaryTools/Patch/PatchReader.cs index 5e22ac9..103a1bc 100644 --- a/src/BitSoft.BinaryTools/Patch/PatchReader.cs +++ b/src/BitSoft.BinaryTools/Patch/PatchReader.cs @@ -45,11 +45,19 @@ public ValueTask ReadAsync(CancellationToken cancellationToken) case ProtocolConst.SegmentTypes.EndPatchSegment: Segment = null; return ValueTask.FromResult(false); - case ProtocolConst.SegmentTypes.CopyPatchSegment: + case ProtocolConst.SegmentTypes.CopyBlock: + { + var blockIndex = _reader.ReadInt32(); + Segment = new CopyBlockSegment(blockIndex: blockIndex); + break; + } + case ProtocolConst.SegmentTypes.CopyBlockWithLength: + { var blockIndex = _reader.ReadInt32(); var blockLength = _reader.ReadInt32(); - Segment = new CopyPatchSegment(blockIndex: blockIndex, blockLength: blockLength); + Segment = new CopyBlockWithLengthSegment(blockIndex: blockIndex, blockLength: blockLength); break; + } case ProtocolConst.SegmentTypes.DataPatchSegment: var length = _reader.ReadInt32(); var span = _buffer.AsSpan(start: 0, length: length); diff --git a/src/BitSoft.BinaryTools/Patch/PatchWriter.cs b/src/BitSoft.BinaryTools/Patch/PatchWriter.cs index 723cbdb..3cdd4b6 100644 --- a/src/BitSoft.BinaryTools/Patch/PatchWriter.cs +++ b/src/BitSoft.BinaryTools/Patch/PatchWriter.cs @@ -33,9 +33,17 @@ public ValueTask WriteDataAsync(ReadOnlyMemory memory, CancellationToken c return ValueTask.CompletedTask; } - public ValueTask WriteCopyAsync(int blockIndex, int blockLength, CancellationToken cancellationToken) + public ValueTask WriteCopyBlockAsync(int blockIndex, CancellationToken cancellationToken) { - _writer.Write(ProtocolConst.SegmentTypes.CopyPatchSegment); + _writer.Write(ProtocolConst.SegmentTypes.CopyBlock); + _writer.Write(blockIndex); + + return ValueTask.CompletedTask; + } + + public ValueTask WriteCopyBlockWithLengthAsync(int blockIndex, int blockLength, CancellationToken cancellationToken) + { + _writer.Write(ProtocolConst.SegmentTypes.CopyBlockWithLength); _writer.Write(blockIndex); _writer.Write(blockLength); diff --git a/src/BitSoft.BinaryTools/Patch/ProtocolConst.cs b/src/BitSoft.BinaryTools/Patch/ProtocolConst.cs index 02bae57..144348b 100644 --- a/src/BitSoft.BinaryTools/Patch/ProtocolConst.cs +++ b/src/BitSoft.BinaryTools/Patch/ProtocolConst.cs @@ -10,9 +10,10 @@ public static class ProtocolConst public static class SegmentTypes { - public const byte CopyPatchSegment = 0x1; - public const byte DataPatchSegment = 0x2; + public const byte CopyBlock = 0x1; + public const byte CopyBlockWithLength = 0x2; + public const byte DataPatchSegment = 0x3; public const byte EndPatchSegment = byte.MaxValue; } -} \ No newline at end of file +} diff --git a/src/BitSoft.BinaryTools/Patch/RollingHash.cs b/src/BitSoft.BinaryTools/Patch/RollingHash.cs index 12073a3..4363f11 100644 --- a/src/BitSoft.BinaryTools/Patch/RollingHash.cs +++ b/src/BitSoft.BinaryTools/Patch/RollingHash.cs @@ -8,14 +8,12 @@ public struct RollingHash private uint _a; private uint _b; - private uint _sumOfWindow; private readonly uint _length; - private RollingHash(uint a, uint b, uint sumOfWindow, uint length) + private RollingHash(uint a, uint b, uint length) { _a = a; _b = b; - _sumOfWindow = sumOfWindow; _length = length; } @@ -23,26 +21,23 @@ public static RollingHash Create(ReadOnlySpan data) { uint a = 1; uint b = 0; - uint sumOfWindow = 0; for (var i = 0; i < data.Length; i++) { var value = data[i]; a = (a + value) % Base; - sumOfWindow = (sumOfWindow + value) % Base; b = (b + a) % Base; } - return new RollingHash(a: a, b: b, sumOfWindow: sumOfWindow, length: (uint)data.Length); + return new RollingHash(a: a, b: b, length: (uint)data.Length); } public void Update(byte removed, byte added) { _a = (_a - removed + added) % Base; - _sumOfWindow = (_sumOfWindow - removed + added) % Base; - _b = (_b - _length * removed + _sumOfWindow) % Base; + _b = (_b - _length * removed + _a - 1) % Base; } public uint GetChecksum() => (_b << 16) | _a; -} \ No newline at end of file +} diff --git a/src/BitSoft.BinaryTools/Patch/StreamWindowReader.cs b/src/BitSoft.BinaryTools/Patch/StreamWindowReader.cs new file mode 100644 index 0000000..789e0a4 --- /dev/null +++ b/src/BitSoft.BinaryTools/Patch/StreamWindowReader.cs @@ -0,0 +1,154 @@ +using System; +using System.Buffers; +using System.IO; +using System.Threading; +using System.Threading.Tasks; + +namespace BitSoft.BinaryTools.Patch; + +public class StreamWindowReader : IDisposable +{ + private readonly Stream _stream; + private readonly ArrayPool _pool; + private readonly int _windowSize; + private readonly int _bufferSize; + private readonly byte[] _buffer; + + private const int NotDefined = -1; + + private int _position = NotDefined; + private int _pinnedPosition = NotDefined; + private int _size = NotDefined; + private bool _continureRead = true; + + public ReadOnlyMemory Window + { + get + { + return _position >= 0 + ? _buffer.AsMemory(start: _position, length: Math.Min(_windowSize, _size - _position)) + : throw new InvalidOperationException("The stream does not contain the window."); + } + } + + public ReadOnlyMemory PinnedWindow + { + get + { + return _pinnedPosition == NotDefined + ? throw new InvalidOperationException("Pinned position was not set.") + : _buffer.AsMemory(start: _pinnedPosition, length: _position - _pinnedPosition); + } + } + + public ReadOnlyMemory PinnedWindowWithCurrent + { + get + { + return _pinnedPosition == NotDefined + ? throw new InvalidOperationException("Pinned position was not set.") + : _buffer.AsMemory(start: _pinnedPosition, length: _position - _pinnedPosition + 1); + } + } + + public bool IsPinned => _pinnedPosition != NotDefined; + + public bool Finished => _position == _size - 1; + + public StreamWindowReader(Stream stream, ArrayPool pool, int windowSize) + { + _stream = stream ?? throw new ArgumentNullException(nameof(stream)); + _pool = pool ?? throw new ArgumentNullException(nameof(pool)); + _windowSize = windowSize; + _bufferSize = windowSize * 2; + + _buffer = _pool.Rent(minimumLength: _bufferSize); + } + + public async ValueTask SlideWindowAsync(CancellationToken cancellationToken) + { + for (var i = 0; i < _windowSize; i++) + { + if (await MoveAsync(cancellationToken)) + { + continue; + } + + return false; + } + + return true; + } + + public async ValueTask MoveAsync(CancellationToken cancellationToken) + { + if (_position != NotDefined && _position == _size) + return false; + + if (_position == NotDefined) + { + var count = await _stream.ReadAsync(_buffer.AsMemory(start: 0, length: _bufferSize), cancellationToken); + if (count == 0) + return false; + _position = 0; + _size = count; + return true; + } + + _position += 1; + + if (_continureRead && _position == _bufferSize - _windowSize) + { + if (_pinnedPosition == NotDefined) + { + var length = _size - _position; + + Array.Copy( + sourceArray: _buffer, + sourceIndex: _position, + destinationArray: _buffer, + destinationIndex: 0, + length: length + ); + + var count = await _stream.ReadAsync( + _buffer.AsMemory(start: length, length: length), + cancellationToken + ); + + _position = 0; + _size = length + count; + + if (count < _windowSize) + { + _continureRead = false; + } + } + else + { + throw new InvalidOperationException( + $"Pinned position '{_pinnedPosition}' for buffer '{_size}/{_bufferSize}' with window '{_windowSize}' was not reset."); + } + } + + if (_position == _size) + return false; + + return true; + } + + public void PinPosition() + { + _pinnedPosition = _position; + } + + public void ResetPinnedPosition() + { + _pinnedPosition = NotDefined; + } + + public void Dispose() + { + _pool.Return(_buffer); + } +}