diff --git a/src/Furnace.Data/Data.fs b/src/Furnace.Data/Data.fs index f2f8790f..fab19037 100644 --- a/src/Furnace.Data/Data.fs +++ b/src/Furnace.Data/Data.fs @@ -190,7 +190,7 @@ type MNIST(path:string, ?urls:seq, ?train:bool, ?transform:Tensor->Tenso let files = [for url in urls do Path.Combine(path, Path.GetFileName(url))] let loadMNISTImages(filename:string) = - let r = new BinaryReader(new GZipStream(File.OpenRead(filename), CompressionMode.Decompress)) + use r = new BinaryReader(new GZipStream(File.OpenRead(filename), CompressionMode.Decompress)) let magicnumber = r.ReadInt32() |> IPAddress.NetworkToHostOrder match magicnumber with | 2051 -> // Images @@ -205,7 +205,7 @@ type MNIST(path:string, ?urls:seq, ?train:bool, ?transform:Tensor->Tenso |> fun t -> t / 255 | _ -> failwith "Given file is not in the MNIST format." let loadMNISTLabels(filename:string) = - let r = new BinaryReader(new GZipStream(File.OpenRead(filename), CompressionMode.Decompress)) + use r = new BinaryReader(new GZipStream(File.OpenRead(filename), CompressionMode.Decompress)) let magicnumber = r.ReadInt32() |> IPAddress.NetworkToHostOrder match magicnumber with | 2049 -> // Labels diff --git a/tests/Furnace.Tests/Furnace.Tests.fsproj b/tests/Furnace.Tests/Furnace.Tests.fsproj index 8c2b6b0c..40569a6f 100644 --- a/tests/Furnace.Tests/Furnace.Tests.fsproj +++ b/tests/Furnace.Tests/Furnace.Tests.fsproj @@ -36,6 +36,8 @@ + + diff --git a/tests/Furnace.Tests/TestBooleanOperations.fs b/tests/Furnace.Tests/TestBooleanOperations.fs new file mode 100644 index 00000000..1b771327 --- /dev/null +++ b/tests/Furnace.Tests/TestBooleanOperations.fs @@ -0,0 +1,308 @@ +// Copyright (c) 2016- University of Oxford (Atılım Güneş Baydin ) +// and other contributors, see LICENSE in root of repository. +// +// BSD 2-Clause License. See LICENSE in root of repository. + +namespace Tests + +open System +open NUnit.Framework +open Furnace +open Tests.TestUtils + +[] +type TestBooleanOperations () = + + [] + member _.TestBooleanTensorLogicalOperations() = + let combo = ComboInfo(Backend.Reference, Device.CPU, Dtype.Bool) + + // Test logical AND and OR operations (AddTT as OR, MulTT as AND) + let t1 = combo.tensor([true; false; true; false]) + let t2 = combo.tensor([true; true; false; false]) + + // Test logical OR (AddTT operation in boolean context) + let orResult = t1 + t2 + let expectedOr = combo.tensor([true; true; true; false]) + Assert.CheckEqual(expectedOr, orResult) + + // Test logical AND (MulTT operation in boolean context) + let andResult = t1 * t2 + let expectedAnd = combo.tensor([true; false; false; false]) + Assert.CheckEqual(expectedAnd, andResult) + + [] + member _.TestBooleanTensorWithScalars() = + let combo = ComboInfo(Backend.Reference, Device.CPU, Dtype.Bool) + + // Test scalar operations + let t = combo.tensor([true; false; true]) + let trueScalar = FurnaceImage.tensor(true, dtype=Dtype.Bool, backend=combo.backend, device=combo.device) + let falseScalar = FurnaceImage.tensor(false, dtype=Dtype.Bool, backend=combo.backend, device=combo.device) + + // Test AddTT0 (OR with scalar) + let orWithTrue = t + trueScalar + let expectedOrTrue = combo.tensor([true; true; true]) + Assert.CheckEqual(expectedOrTrue, orWithTrue) + + let orWithFalse = t + falseScalar + Assert.CheckEqual(t, orWithFalse) + + // Test MulTT0 (AND with scalar) + let andWithTrue = t * trueScalar + Assert.CheckEqual(t, andWithTrue) + + let andWithFalse = t * falseScalar + let expectedAndFalse = combo.tensor([false; false; false]) + Assert.CheckEqual(expectedAndFalse, andWithFalse) + + [] + member _.TestBooleanTensorComparisons() = + let combo = ComboInfo(Backend.Reference, Device.CPU, Dtype.Bool) + + let t1 = combo.tensor([true; false; true; false]) + let t2 = combo.tensor([false; false; true; true]) + + // Test comparison operations specific to boolean tensors + let ltResult = t1.lt(t2) + let expectedLt = combo.tensor([false; false; false; true]) + Assert.CheckEqual(expectedLt, ltResult) + + let gtResult = t1.gt(t2) + let expectedGt = combo.tensor([true; false; false; false]) + Assert.CheckEqual(expectedGt, gtResult) + + let leResult = t1.le(t2) + let expectedLe = combo.tensor([false; true; true; true]) + Assert.CheckEqual(expectedLe, leResult) + + let geResult = t1.ge(t2) + let expectedGe = combo.tensor([true; true; true; false]) + Assert.CheckEqual(expectedGe, geResult) + + let eqResult = t1.eq(t2) + let expectedEq = combo.tensor([false; true; true; false]) + Assert.CheckEqual(expectedEq, eqResult) + + let neResult = t1.ne(t2) + let expectedNe = combo.tensor([true; false; false; true]) + Assert.CheckEqual(expectedNe, neResult) + + [] + member _.TestBooleanTensorReductionOperations() = + let combo = ComboInfo(Backend.Reference, Device.CPU, Dtype.Bool) + + // Test reduction operations on boolean tensors + let t = combo.tensor([[true; false; true]; [false; true; false]]) + + // Test sum (converts to int64 then sums) + let sumResult = t.sum() + Assert.AreEqual(3.0, sumResult.toScalar().toDouble()) // 3 true values + + // Test sum with dimensions + let sumDim0 = t.sum(0) + let expectedSumDim0 = FurnaceImage.tensor([1; 1; 1], dtype=Dtype.Int64, backend=combo.backend) + Assert.CheckEqual(expectedSumDim0, sumDim0) + + let sumDim1 = t.sum(1) + let expectedSumDim1 = FurnaceImage.tensor([2; 1], dtype=Dtype.Int64, backend=combo.backend) + Assert.CheckEqual(expectedSumDim1, sumDim1) + + // Test min/max operations + let maxResult = t.max() + Assert.AreEqual(1.0, maxResult.toScalar().toDouble()) // true as 1.0 + + let minResult = t.min() + Assert.AreEqual(0.0, minResult.toScalar().toDouble()) // false as 0.0 + + [] + member _.TestBooleanTensorMinMaxReduction() = + let combo = ComboInfo(Backend.Reference, Device.CPU, Dtype.Bool) + + // Test dimensional min/max reduction for boolean tensors + let t = combo.tensor([[true; false]; [false; true]]) + + // Max reduction by dimension + let maxDim0 = t.max(0) + let expectedMaxDim0 = combo.tensor([true; true]) + Assert.CheckEqual(expectedMaxDim0, maxDim0) + + let maxDim1 = t.max(1) + let expectedMaxDim1 = combo.tensor([true; true]) + Assert.CheckEqual(expectedMaxDim1, maxDim1) + + // Min reduction by dimension + let minDim0 = t.min(0) + let expectedMinDim0 = combo.tensor([false; false]) + Assert.CheckEqual(expectedMinDim0, minDim0) + + let minDim1 = t.min(1) + let expectedMinDim1 = combo.tensor([false; false]) + Assert.CheckEqual(expectedMinDim1, minDim1) + + [] + member _.TestBooleanTensorBasicProperties() = + let combo = ComboInfo(Backend.Reference, Device.CPU, Dtype.Bool) + + // Test basic properties and structure + let t = combo.tensor([true; false; true; false; true]) + + // Test basic properties + Assert.AreEqual([|5|], t.shape) + Assert.AreEqual(5, t.nelement) + Assert.AreEqual(Dtype.Bool, t.dtype) + + [] + member _.TestBooleanTensorSliceOperations() = + let combo = ComboInfo(Backend.Reference, Device.CPU, Dtype.Bool) + + // Test slice operations that may trigger GetTypedValues usage + let source = combo.tensor([[true; false]; [false; true]]) + + // Test slice access + let row0 = source[0] + let expectedRow0 = combo.tensor([true; false]) + Assert.CheckEqual(expectedRow0, row0) + + let row1 = source[1] + let expectedRow1 = combo.tensor([false; true]) + Assert.CheckEqual(expectedRow1, row1) + + [] + member _.TestBooleanTensorCastingOperations() = + let combo = ComboInfo(Backend.Reference, Device.CPU, Dtype.Bool) + + // Test casting boolean tensors to other types and back + let t = combo.tensor([true; false; true; false]) + + // Cast to int64 (used internally by SumT) + let asInt64 = t.cast(Dtype.Int64) + let expectedInt64 = FurnaceImage.tensor([1L; 0L; 1L; 0L], dtype=Dtype.Int64, backend=combo.backend) + Assert.CheckEqual(expectedInt64, asInt64) + + // Cast back to bool + let backToBool = asInt64.cast(Dtype.Bool) + Assert.CheckEqual(t, backToBool) + + [] + member _.TestBooleanTensorSignOperation() = + let combo = ComboInfo(Backend.Reference, Device.CPU, Dtype.Bool) + + // Test SignT operation (should return self for boolean tensors) + let t = combo.tensor([true; false; true]) + let signResult = t.sign() + + // SignT for boolean should return the same tensor + Assert.CheckEqual(t, signResult) + + [] + member _.TestBooleanTensorEqualsAndAllClose() = + let combo = ComboInfo(Backend.Reference, Device.CPU, Dtype.Bool) + + // Test Equals and AllClose operations + let t1 = combo.tensor([true; false; true]) + let t2 = combo.tensor([true; false; true]) + let t3 = combo.tensor([false; true; false]) + + // Test tensor equality using allclose + Assert.True(t1.allclose(t2)) + Assert.False(t1.allclose(t3)) + + // Test AllClose with tolerance parameters (should be same as Equals for boolean tensors) + Assert.True(t1.allclose(t2, 0.0, 0.0)) + Assert.False(t1.allclose(t3, 0.0, 0.0)) + + [] + member _.TestBooleanTensorAlphaOperations() = + let combo = ComboInfo(Backend.Reference, Device.CPU, Dtype.Bool) + + // Test AddTT with alpha parameter + let t1 = combo.tensor([true; false; true]) + let t2 = combo.tensor([false; true; false]) + + // Test basic AddTT operation (boolean OR) + let resultBasic = t1 + t2 + let expectedBasic = combo.tensor([true; true; true]) // true OR false, false OR true, true OR false + Assert.CheckEqual(expectedBasic, resultBasic) + + // Test MulTT operation (boolean AND) + let andResult = t1 * t2 + let expectedAnd = combo.tensor([false; false; false]) // true AND false, false AND true, true AND false + Assert.CheckEqual(expectedAnd, andResult) + + [] + member _.TestBooleanTensorUnsupportedOperations() = + let combo = ComboInfo(Backend.Reference, Device.CPU, Dtype.Bool) + + // Test that specific unsupported operations throw appropriate exceptions + let t1 = combo.tensor([true; false]) + let t2 = combo.tensor([false; true]) + + // Test subtraction - this should not be supported for boolean tensors + isInvalidOp (fun () -> t1 - t2) // SubTT not supported for Bool + + // Test division - boolean division may not be supported or may convert to float + try + let div_result = t1 / t2 + // If division works, it's okay if it converts to float (this is implementation-dependent behavior) + Assert.AreEqual(t1.shape, div_result.shape) + // Division result might be Float32 rather than Bool - this is acceptable + Assert.IsTrue(div_result.dtype = Dtype.Bool || div_result.dtype = Dtype.Float32, + $"Division result should be Bool or Float32, but got {div_result.dtype}") + with + | :? System.InvalidOperationException -> + // Division not supported - this is also acceptable behavior + () + | ex -> + // Any other exception type should fail the test + Assert.Fail($"Unexpected exception type for boolean division: {ex.GetType().Name}, Message: {ex.Message}") + + // Test operations that are actually unsupported on boolean tensors + // abs(), neg(), relu() operations throw InvalidOperationException for bool tensors + isInvalidOp (fun () -> t1.abs()) // AbsT not permitted on Bool + isInvalidOp (fun () -> t1.neg()) // NegT not permitted on Bool + isInvalidOp (fun () -> t1.relu()) // ReluT not permitted on Bool + + [] + member _.TestBooleanTensorEdgeCases() = + let combo = ComboInfo(Backend.Reference, Device.CPU, Dtype.Bool) + + // Test edge cases that might trigger different code paths + + // Empty boolean tensor + let empty = FurnaceImage.zeros([0], dtype=Dtype.Bool, backend=combo.backend) + Assert.AreEqual(0, empty.nelement) + + // Scalar boolean tensor + let scalar = combo.tensor(true) + Assert.AreEqual(1.0, scalar.toScalar().toDouble()) + + // Large boolean tensor to stress test operations + let large = combo.zeros([100; 50]) + Assert.AreEqual(5000, large.nelement) + + // Mixed operations with large tensors + let ones = combo.ones([100; 50]) + let mixed = large + ones // Should result in all true values + Assert.AreEqual(5000.0, mixed.sum().toScalar().toDouble()) + + [] + member _.TestBooleanTensorMakeLikeOperations() = + let combo = ComboInfo(Backend.Reference, Device.CPU, Dtype.Bool) + + // Test operations that use MakeLike method + let t1 = combo.tensor([[true; false]; [true; false]]) + let t2 = combo.tensor([[false; true]; [false; true]]) + + // All comparison operations should use MakeLike internally + let results = [ + t1.lt(t2); t1.gt(t2); t1.le(t2); t1.ge(t2); + t1.eq(t2); t1.ne(t2); t1 + t2; t1 * t2 + ] + + // All results should have same shape as input + results |> List.iter (fun r -> + Assert.AreEqual([|2; 2|], r.shape) + Assert.AreEqual(Dtype.Bool, r.dtype) + Assert.AreEqual(Backend.Reference, r.backend) + ) \ No newline at end of file diff --git a/tests/Furnace.Tests/TestMNISTOperations.fs b/tests/Furnace.Tests/TestMNISTOperations.fs new file mode 100644 index 00000000..158a6525 --- /dev/null +++ b/tests/Furnace.Tests/TestMNISTOperations.fs @@ -0,0 +1,567 @@ +// Copyright (c) 2016- University of Oxford (Atılım Güneş Baydin ) +// and other contributors, see LICENSE in root of repository. +// +// BSD 2-Clause License. See LICENSE in root of repository. + +namespace Tests + +open System +open System.IO +open System.IO.Compression +open System.Net +open NUnit.Framework +open Furnace +open Furnace.Data +open Tests.TestUtils + +[] +type TestMNISTOperations () = + + let createMockMNISTImageFile (filename: string) (numImages: int) = + // Create a mock MNIST image file with proper format + use stream = new FileStream(filename, FileMode.Create) + use gzip = new GZipStream(stream, CompressionMode.Compress) + use writer = new BinaryWriter(gzip) + + // Write MNIST image format header + writer.Write(IPAddress.HostToNetworkOrder(2051)) // Magic number for images + writer.Write(IPAddress.HostToNetworkOrder(numImages)) // Number of images + writer.Write(IPAddress.HostToNetworkOrder(28)) // Height + writer.Write(IPAddress.HostToNetworkOrder(28)) // Width + + // Write mock image data (28x28 bytes per image) + for i in 0..numImages-1 do + for pixel in 0..783 do // 28*28 = 784 pixels, 0-indexed so 783 + writer.Write(byte (i % 256)) // Simple pattern based on image index + + let createMockMNISTLabelFile (filename: string) (numLabels: int) = + // Create a mock MNIST label file with proper format + use stream = new FileStream(filename, FileMode.Create) + use gzip = new GZipStream(stream, CompressionMode.Compress) + use writer = new BinaryWriter(gzip) + + // Write MNIST label format header + writer.Write(IPAddress.HostToNetworkOrder(2049)) // Magic number for labels + writer.Write(IPAddress.HostToNetworkOrder(numLabels)) // Number of labels + + // Write mock label data + for i in 0..numLabels-1 do + writer.Write(byte (i % 10)) // Labels 0-9, cycling + + [] + member _.TestMNISTClassProperties() = + // Test MNIST class properties without requiring network access + let tempDir = Path.GetTempPath() + let uniqueId = System.Guid.NewGuid().ToString("N") + "-" + System.Environment.TickCount.ToString() + let mnistDir = Path.Combine(tempDir, $"test-mnist-props-{uniqueId}") + Directory.CreateDirectory(mnistDir) |> ignore + let fullMnistDir = Path.Combine(mnistDir, "mnist") + Directory.CreateDirectory(fullMnistDir) |> ignore + + try + // Create small mock files to avoid network download + let trainImagesFile = Path.Combine(fullMnistDir, "train-images-idx3-ubyte.gz") + let trainLabelsFile = Path.Combine(fullMnistDir, "train-labels-idx1-ubyte.gz") + + createMockMNISTImageFile trainImagesFile 10 + createMockMNISTLabelFile trainLabelsFile 10 + + // Create MNIST dataset with limited data + let mnist = MNIST(mnistDir, train=true, n=5) + + // Test class properties + Assert.AreEqual(10, mnist.classes) + Assert.AreEqual(10, mnist.classNames.Length) + + // Check class names are string representations of 0-9 + for i in 0..9 do + Assert.AreEqual(string i, mnist.classNames[i]) + + // Test length property + Assert.AreEqual(5, mnist.length) // We limited to 5 items + + finally + try + if Directory.Exists(mnistDir) then + // Force garbage collection to close any file handles + System.GC.Collect() + System.GC.WaitForPendingFinalizers() + System.GC.Collect() + System.Threading.Thread.Sleep(250) // Longer delay to allow file handles to be released + // Retry deletion up to 5 times with progressive backoff + let mutable attempts = 0 + let mutable deleted = false + while attempts < 5 && not deleted do + try + // Additional GC before each attempt + System.GC.Collect() + Directory.Delete(mnistDir, true) + deleted <- true + with + | :? System.IO.IOException when attempts < 4 -> + attempts <- attempts + 1 + let sleepTime = 300 * (attempts * attempts) // Progressive: 300, 1200, 2700, 4800ms + System.Threading.Thread.Sleep(sleepTime) + | _ -> + attempts <- 5 // Stop trying on other errors + with + | _ -> () // Ignore cleanup errors + + [] + member _.TestMNISTItemAccess() = + // Test MNIST item access with mock data + let tempDir = Path.GetTempPath() + let uniqueId = System.Guid.NewGuid().ToString("N") + "-" + System.Environment.TickCount.ToString() + let mnistDir = Path.Combine(tempDir, $"test-mnist-items-{uniqueId}") + Directory.CreateDirectory(mnistDir) |> ignore + let fullMnistDir = Path.Combine(mnistDir, "mnist") + Directory.CreateDirectory(fullMnistDir) |> ignore + + try + // Create small mock files + let trainImagesFile = Path.Combine(fullMnistDir, "train-images-idx3-ubyte.gz") + let trainLabelsFile = Path.Combine(fullMnistDir, "train-labels-idx1-ubyte.gz") + + createMockMNISTImageFile trainImagesFile 3 + createMockMNISTLabelFile trainLabelsFile 3 + + // Create MNIST dataset with custom transforms + let imageTransform = fun (t:Tensor) -> t * 2.0f // Simple scaling + let targetTransform = fun (t:Tensor) -> t + 1.0f // Offset labels by 1 + + let mnist = MNIST(mnistDir, train=true, n=3, transform=imageTransform, targetTransform=targetTransform) + + // Test item access + for i in 0..2 do + let data, target = mnist[i] + + // Verify data shape (should be [1, 28, 28] after processing) + Assert.AreEqual([|1; 28; 28|], data.shape) + + // Verify target is a scalar + Assert.AreEqual([||], target.shape) // Scalar tensor + + // Verify transforms were applied + // Original data is normalized (divided by 255) then multiplied by 2 + // Target should be (i % 10) + 1 due to targetTransform + let expectedTarget = float32 ((i % 10) + 1) + let actualTarget = target.toScalar().toSingle() + Assert.AreEqual(double expectedTarget, double actualTarget, 0.001) + + finally + try + if Directory.Exists(mnistDir) then + // Force garbage collection to close any file handles + System.GC.Collect() + System.GC.WaitForPendingFinalizers() + System.GC.Collect() + System.Threading.Thread.Sleep(250) // Longer delay to allow file handles to be released + // Retry deletion up to 5 times with progressive backoff + let mutable attempts = 0 + let mutable deleted = false + while attempts < 5 && not deleted do + try + // Additional GC before each attempt + System.GC.Collect() + Directory.Delete(mnistDir, true) + deleted <- true + with + | :? System.IO.IOException when attempts < 4 -> + attempts <- attempts + 1 + let sleepTime = 300 * (attempts * attempts) // Progressive: 300, 1200, 2700, 4800ms + System.Threading.Thread.Sleep(sleepTime) + | _ -> + attempts <- 5 // Stop trying on other errors + with + | _ -> () // Ignore cleanup errors + + [] + member _.TestMNISTTrainVsTest() = + // Test different behavior for train vs test sets + let tempDir = Path.GetTempPath() + let uniqueId = System.Guid.NewGuid().ToString("N") + "-" + System.Environment.TickCount.ToString() + let mnistDir = Path.Combine(tempDir, $"test-mnist-train-test-{uniqueId}") + Directory.CreateDirectory(mnistDir) |> ignore + let fullMnistDir = Path.Combine(mnistDir, "mnist") + Directory.CreateDirectory(fullMnistDir) |> ignore + + try + // Create mock files for both train and test + let trainImagesFile = Path.Combine(fullMnistDir, "train-images-idx3-ubyte.gz") + let trainLabelsFile = Path.Combine(fullMnistDir, "train-labels-idx1-ubyte.gz") + let testImagesFile = Path.Combine(fullMnistDir, "t10k-images-idx3-ubyte.gz") + let testLabelsFile = Path.Combine(fullMnistDir, "t10k-labels-idx1-ubyte.gz") + + createMockMNISTImageFile trainImagesFile 100 // Training has more data + createMockMNISTLabelFile trainLabelsFile 100 + createMockMNISTImageFile testImagesFile 20 // Test has less data + createMockMNISTLabelFile testLabelsFile 20 + + // Test train dataset + let trainDataset = MNIST(mnistDir, train=true, n=50) + Assert.AreEqual(50, trainDataset.length) + + // Test test dataset + let testDataset = MNIST(mnistDir, train=false, n=15) + Assert.AreEqual(15, testDataset.length) + + // Verify they produce different data (different files) + let trainItem, trainTarget = trainDataset[0] + let testItem, testTarget = testDataset[0] + + // Both should have same shape but potentially different data + Assert.AreEqual([|1; 28; 28|], trainItem.shape) + Assert.AreEqual([|1; 28; 28|], testItem.shape) + + finally + try + if Directory.Exists(mnistDir) then + // Force garbage collection to close any file handles + System.GC.Collect() + System.GC.WaitForPendingFinalizers() + System.GC.Collect() + System.Threading.Thread.Sleep(250) // Longer delay to allow file handles to be released + // Retry deletion up to 5 times with progressive backoff + let mutable attempts = 0 + let mutable deleted = false + while attempts < 5 && not deleted do + try + // Additional GC before each attempt + System.GC.Collect() + Directory.Delete(mnistDir, true) + deleted <- true + with + | :? System.IO.IOException when attempts < 4 -> + attempts <- attempts + 1 + let sleepTime = 300 * (attempts * attempts) // Progressive: 300, 1200, 2700, 4800ms + System.Threading.Thread.Sleep(sleepTime) + | _ -> + attempts <- 5 // Stop trying on other errors + with + | _ -> () // Ignore cleanup errors + + [] + member _.TestMNISTDefaultTransforms() = + // Test that default transforms are applied correctly + let tempDir = Path.GetTempPath() + let uniqueId = System.Guid.NewGuid().ToString("N") + "-" + System.Environment.TickCount.ToString() + let mnistDir = Path.Combine(tempDir, $"test-mnist-defaults-{uniqueId}") + Directory.CreateDirectory(mnistDir) |> ignore + let fullMnistDir = Path.Combine(mnistDir, "mnist") + Directory.CreateDirectory(fullMnistDir) |> ignore + + try + // Create mock files + let trainImagesFile = Path.Combine(fullMnistDir, "train-images-idx3-ubyte.gz") + let trainLabelsFile = Path.Combine(fullMnistDir, "train-labels-idx1-ubyte.gz") + + createMockMNISTImageFile trainImagesFile 2 + createMockMNISTLabelFile trainLabelsFile 2 + + // Create MNIST with default transforms + let mnist = MNIST(mnistDir, train=true, n=2) + + let data, target = mnist[0] + + // Test data shape and basic properties + Assert.AreEqual([|1; 28; 28|], data.shape) + Assert.AreEqual(Dtype.Float32, data.dtype) + + // Default transform is (t - 0.1307) / 0.3081 + // Since our mock data will be normalized to [0,1] first, we can't predict exact values + // but we can verify the transform was applied by checking it's not in [0,1] range + let dataValues = data.flatten() + Assert.AreEqual(784, dataValues.nelement) // 28*28 pixels + + finally + try + if Directory.Exists(mnistDir) then + // Force garbage collection to close any file handles + System.GC.Collect() + System.GC.WaitForPendingFinalizers() + System.GC.Collect() + System.Threading.Thread.Sleep(250) // Longer delay to allow file handles to be released + // Retry deletion up to 5 times with progressive backoff + let mutable attempts = 0 + let mutable deleted = false + while attempts < 5 && not deleted do + try + // Additional GC before each attempt + System.GC.Collect() + Directory.Delete(mnistDir, true) + deleted <- true + with + | :? System.IO.IOException when attempts < 4 -> + attempts <- attempts + 1 + let sleepTime = 300 * (attempts * attempts) // Progressive: 300, 1200, 2700, 4800ms + System.Threading.Thread.Sleep(sleepTime) + | _ -> + attempts <- 5 // Stop trying on other errors + with + | _ -> () // Ignore cleanup errors + + [] + member _.TestMNISTErrorHandling() = + // Test error handling for invalid files + let tempDir = Path.GetTempPath() + let processId = System.Diagnostics.Process.GetCurrentProcess().Id.ToString() + let threadId = System.Threading.Thread.CurrentThread.ManagedThreadId.ToString() + let ticks = System.DateTime.UtcNow.Ticks.ToString() + let uniqueId = System.Guid.NewGuid().ToString("N") + "-" + processId + "-" + threadId + "-" + ticks + let mnistDir = Path.Combine(tempDir, $"test-mnist-errors-{uniqueId}") + Directory.CreateDirectory(mnistDir) |> ignore + let fullMnistDir = Path.Combine(mnistDir, "mnist") + Directory.CreateDirectory(fullMnistDir) |> ignore + + try + // Create file with wrong magic number + let badImageFile = Path.Combine(fullMnistDir, "train-images-idx3-ubyte.gz") + + // Ensure proper file handle disposal using explicit scoping + do + use stream = new FileStream(badImageFile, FileMode.Create) + use gzip = new GZipStream(stream, CompressionMode.Compress) + use writer = new BinaryWriter(gzip) + writer.Write(IPAddress.HostToNetworkOrder(9999)) // Wrong magic number + // All file handles are now disposed + + // Force garbage collection to ensure all handles are released + System.GC.Collect() + System.GC.WaitForPendingFinalizers() + System.Threading.Thread.Sleep(100) // Small delay to allow file system + + // This should throw an exception due to invalid format + isException (fun () -> + let mnist = MNIST(mnistDir, train=true, n=1) + mnist[0] |> ignore + ) + + finally + try + if Directory.Exists(mnistDir) then + // Force garbage collection to close any file handles + System.GC.Collect() + System.GC.WaitForPendingFinalizers() + System.GC.Collect() + System.Threading.Thread.Sleep(250) // Longer delay to allow file handles to be released + // Retry deletion up to 5 times with progressive backoff + let mutable attempts = 0 + let mutable deleted = false + while attempts < 5 && not deleted do + try + // Additional GC before each attempt + System.GC.Collect() + Directory.Delete(mnistDir, true) + deleted <- true + with + | :? System.IO.IOException when attempts < 4 -> + attempts <- attempts + 1 + let sleepTime = 300 * (attempts * attempts) // Progressive: 300, 1200, 2700, 4800ms + System.Threading.Thread.Sleep(sleepTime) + | _ -> + attempts <- 5 // Stop trying on other errors + with + | _ -> () // Ignore cleanup errors + + [] + member _.TestMNISTWithCustomURLs() = + // Test MNIST creation with custom URLs (though we won't actually download) + let tempDir = Path.GetTempPath() + let uniqueId = System.Guid.NewGuid().ToString("N") + "-" + System.Environment.TickCount.ToString() + let mnistDir = Path.Combine(tempDir, $"test-mnist-urls-{uniqueId}") + Directory.CreateDirectory(mnistDir) |> ignore + let fullMnistDir = Path.Combine(mnistDir, "mnist") + Directory.CreateDirectory(fullMnistDir) |> ignore + + try + // Pre-create the files to avoid download + let trainImagesFile = Path.Combine(fullMnistDir, "custom-train-images.gz") + let trainLabelsFile = Path.Combine(fullMnistDir, "custom-train-labels.gz") + let testImagesFile = Path.Combine(fullMnistDir, "custom-test-images.gz") + let testLabelsFile = Path.Combine(fullMnistDir, "custom-test-labels.gz") + + createMockMNISTImageFile trainImagesFile 10 + createMockMNISTLabelFile trainLabelsFile 10 + createMockMNISTImageFile testImagesFile 5 + createMockMNISTLabelFile testLabelsFile 5 + + let customUrls = [ + "http://example.com/custom-train-images.gz" + "http://example.com/custom-train-labels.gz" + "http://example.com/custom-test-images.gz" + "http://example.com/custom-test-labels.gz" + ] + + // This should work because files already exist + let mnist = MNIST(mnistDir, urls=customUrls, train=true, n=5) + Assert.AreEqual(5, mnist.length) + + finally + try + if Directory.Exists(mnistDir) then + // Force garbage collection to close any file handles + System.GC.Collect() + System.GC.WaitForPendingFinalizers() + System.GC.Collect() + System.Threading.Thread.Sleep(250) // Longer delay to allow file handles to be released + // Retry deletion up to 5 times with progressive backoff + let mutable attempts = 0 + let mutable deleted = false + while attempts < 5 && not deleted do + try + // Additional GC before each attempt + System.GC.Collect() + Directory.Delete(mnistDir, true) + deleted <- true + with + | :? System.IO.IOException when attempts < 4 -> + attempts <- attempts + 1 + let sleepTime = 300 * (attempts * attempts) // Progressive: 300, 1200, 2700, 4800ms + System.Threading.Thread.Sleep(sleepTime) + | _ -> + attempts <- 5 // Stop trying on other errors + with + | _ -> () // Ignore cleanup errors + + [] + member _.TestMNISTDerivedFromDataset() = + // Test that MNIST properly inherits from Dataset base class + let tempDir = Path.GetTempPath() + let uniqueId = System.Guid.NewGuid().ToString("N") + "-" + System.Environment.TickCount.ToString() + let mnistDir = Path.Combine(tempDir, $"test-mnist-inheritance-{uniqueId}") + Directory.CreateDirectory(mnistDir) |> ignore + let fullMnistDir = Path.Combine(mnistDir, "mnist") + Directory.CreateDirectory(fullMnistDir) |> ignore + + try + // Create minimal mock files + let trainImagesFile = Path.Combine(fullMnistDir, "train-images-idx3-ubyte.gz") + let trainLabelsFile = Path.Combine(fullMnistDir, "train-labels-idx1-ubyte.gz") + + createMockMNISTImageFile trainImagesFile 3 + createMockMNISTLabelFile trainLabelsFile 3 + + let mnist = MNIST(mnistDir, train=true, n=3) + + // Test that it can be used as a Dataset + let dataset : Dataset = upcast mnist + Assert.AreEqual(3, dataset.length) + + // Test indexer works through base class + let data, target = dataset[1] + Assert.AreEqual([|1; 28; 28|], data.shape) + + finally + try + if Directory.Exists(mnistDir) then + // Force garbage collection to close any file handles + System.GC.Collect() + System.GC.WaitForPendingFinalizers() + System.GC.Collect() + System.Threading.Thread.Sleep(250) // Longer delay to allow file handles to be released + // Retry deletion up to 5 times with progressive backoff + let mutable attempts = 0 + let mutable deleted = false + while attempts < 5 && not deleted do + try + // Additional GC before each attempt + System.GC.Collect() + Directory.Delete(mnistDir, true) + deleted <- true + with + | :? System.IO.IOException when attempts < 4 -> + attempts <- attempts + 1 + let sleepTime = 300 * (attempts * attempts) // Progressive: 300, 1200, 2700, 4800ms + System.Threading.Thread.Sleep(sleepTime) + | _ -> + attempts <- 5 // Stop trying on other errors + with + | _ -> () // Ignore cleanup errors + + [] + member _.TestMNISTDataNormalization() = + // Test that MNIST data is properly normalized from byte values to [0,1] + let tempDir = Path.GetTempPath() + let processId = System.Diagnostics.Process.GetCurrentProcess().Id.ToString() + let threadId = System.Threading.Thread.CurrentThread.ManagedThreadId.ToString() + let ticks = System.DateTime.UtcNow.Ticks.ToString() + let uniqueId = System.Guid.NewGuid().ToString("N") + "-" + processId + "-" + threadId + "-" + ticks + let mnistDir = Path.Combine(tempDir, $"test-mnist-norm-{uniqueId}") + Directory.CreateDirectory(mnistDir) |> ignore + let fullMnistDir = Path.Combine(mnistDir, "mnist") + Directory.CreateDirectory(fullMnistDir) |> ignore + + try + // Create mock file with known byte patterns + let trainImagesFile = Path.Combine(fullMnistDir, "train-images-idx3-ubyte.gz") + let trainLabelsFile = Path.Combine(fullMnistDir, "train-labels-idx1-ubyte.gz") + + // Create file with specific byte pattern for testing normalization + // Ensure all file handles are properly closed by using explicit scoping + do + use stream = new FileStream(trainImagesFile, FileMode.Create) + use gzip = new GZipStream(stream, CompressionMode.Compress) + use writer = new BinaryWriter(gzip) + + writer.Write(IPAddress.HostToNetworkOrder(2051)) // Magic number + writer.Write(IPAddress.HostToNetworkOrder(1)) // 1 image + writer.Write(IPAddress.HostToNetworkOrder(28)) // Height + writer.Write(IPAddress.HostToNetworkOrder(28)) // Width + + // Write known pattern: 0, 127, 255 repeated + for i in 0..783 do + let value = match i % 3 with + | 0 -> 0uy + | 1 -> 127uy + | _ -> 255uy + writer.Write(value) + // All file handles are now disposed + + createMockMNISTLabelFile trainLabelsFile 1 + + // Force garbage collection to ensure all handles are released + System.GC.Collect() + System.GC.WaitForPendingFinalizers() + System.Threading.Thread.Sleep(100) // Small delay to allow file system + + // Use identity transform to see raw normalized data + let mnist = MNIST(mnistDir, train=true, n=1, transform=id) + let data, _ = mnist[0] + + // Verify normalization: bytes should be converted to [0,1] range + let flatData = data.flatten() + let minVal = flatData.min().toScalar().toSingle() + let maxVal = flatData.max().toScalar().toSingle() + + Assert.GreaterOrEqual(minVal, 0.0f) + Assert.LessOrEqual(maxVal, 1.0f) + + // Should have values corresponding to 0/255, 127/255, 255/255 + // Due to the pattern we wrote + + finally + try + if Directory.Exists(mnistDir) then + // Force garbage collection to close any file handles + System.GC.Collect() + System.GC.WaitForPendingFinalizers() + System.GC.Collect() + System.Threading.Thread.Sleep(250) // Longer delay to allow file handles to be released + // Retry deletion up to 5 times with progressive backoff + let mutable attempts = 0 + let mutable deleted = false + while attempts < 5 && not deleted do + try + // Additional GC before each attempt + System.GC.Collect() + Directory.Delete(mnistDir, true) + deleted <- true + with + | :? System.IO.IOException when attempts < 4 -> + attempts <- attempts + 1 + let sleepTime = 300 * (attempts * attempts) // Progressive: 300, 1200, 2700, 4800ms + System.Threading.Thread.Sleep(sleepTime) + | _ -> + attempts <- 5 // Stop trying on other errors + with + | _ -> () // Ignore cleanup errors \ No newline at end of file