diff --git a/.gitattributes b/.gitattributes
index ba05560..0d9f7ef 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -24,6 +24,7 @@ Dockerfile text eol=lf
*.f03 filter=tabs2spaces
*.sql filter=tabs2spaces
*.ino filter=tabs2spaces
+*.cu filter=tabs2spaces
# Project files
*.csproj filter=tabs2spaces
diff --git a/gfoidl.Stochastics.sln b/gfoidl.Stochastics.sln
index f9609a9..93a5f9b 100644
--- a/gfoidl.Stochastics.sln
+++ b/gfoidl.Stochastics.sln
@@ -72,6 +72,14 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "images", "images", "{A741E7
doc\release-notes\images\perf-avg-var-v1.1.0-preview-3.png = doc\release-notes\images\perf-avg-var-v1.1.0-preview-3.png
EndProjectSection
EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gfoidl.Stochastics.Gpu-win-64", "source\gfoidl.Stochastics.Gpu-win-64\gfoidl.Stochastics.Gpu-win-64.vcxproj", "{60D3CEC5-E8DE-45E4-8834-86C9940D942A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gfoidl.Stochastics.Gpu-win-64.Tests", "tests\gfoidl.Stochastics.Gpu-win-64.Tests\gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj", "{90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gfoidl.Stochastics.Gpu-win-64.Tests.Console", "tests\gfoidl.Stochastics.Gpu-win-64.Tests.Console\gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj", "{1C7AA347-76CE-4C7F-A468-B46ADDD93215}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gfoidl.Stochastics.Gpu-linux-64", "source\gfoidl.Stochastics.Gpu-linux-64\gfoidl.Stochastics.Gpu-linux-64.vcxproj", "{0729F977-87F1-4139-A9B1-569BFB4A352E}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -83,27 +91,21 @@ Global
{B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Debug|Any CPU.Build.0 = Debug|Any CPU
{B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Debug|x64.ActiveCfg = Debug|Any CPU
- {B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Debug|x64.Build.0 = Debug|Any CPU
{B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Release|Any CPU.ActiveCfg = Release|Any CPU
{B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Release|Any CPU.Build.0 = Release|Any CPU
{B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Release|x64.ActiveCfg = Release|Any CPU
- {B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Release|x64.Build.0 = Release|Any CPU
{902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Debug|Any CPU.Build.0 = Debug|Any CPU
{902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Debug|x64.ActiveCfg = Debug|Any CPU
- {902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Debug|x64.Build.0 = Debug|Any CPU
{902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Release|Any CPU.ActiveCfg = Release|Any CPU
{902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Release|Any CPU.Build.0 = Release|Any CPU
{902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Release|x64.ActiveCfg = Release|Any CPU
- {902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Release|x64.Build.0 = Release|Any CPU
{1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Debug|x64.ActiveCfg = Debug|Any CPU
- {1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Debug|x64.Build.0 = Debug|Any CPU
{1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Release|Any CPU.Build.0 = Release|Any CPU
{1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Release|x64.ActiveCfg = Release|Any CPU
- {1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Release|x64.Build.0 = Release|Any CPU
{813695B2-09E2-4111-BD6B-18EEF40299C0}.Debug|Any CPU.ActiveCfg = Debug|x64
{813695B2-09E2-4111-BD6B-18EEF40299C0}.Debug|x64.ActiveCfg = Debug|x64
{813695B2-09E2-4111-BD6B-18EEF40299C0}.Release|Any CPU.ActiveCfg = Release|x64
@@ -120,6 +122,28 @@ Global
{7A4AAA49-6071-4FEA-8920-D8EFC6B8A019}.Release|Any CPU.ActiveCfg = Release|x64
{7A4AAA49-6071-4FEA-8920-D8EFC6B8A019}.Release|x64.ActiveCfg = Release|x64
{7A4AAA49-6071-4FEA-8920-D8EFC6B8A019}.Release|x64.Build.0 = Release|x64
+ {60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Debug|Any CPU.ActiveCfg = Debug|x64
+ {60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Debug|x64.ActiveCfg = Debug|x64
+ {60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Debug|x64.Build.0 = Debug|x64
+ {60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Release|Any CPU.ActiveCfg = Release|x64
+ {60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Release|x64.ActiveCfg = Release|x64
+ {60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Release|x64.Build.0 = Release|x64
+ {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Debug|Any CPU.ActiveCfg = Debug|x64
+ {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Debug|x64.ActiveCfg = Debug|x64
+ {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Debug|x64.Build.0 = Debug|x64
+ {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Release|Any CPU.ActiveCfg = Release|x64
+ {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Release|x64.ActiveCfg = Release|x64
+ {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Release|x64.Build.0 = Release|x64
+ {1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Debug|Any CPU.ActiveCfg = Debug|x64
+ {1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Debug|x64.ActiveCfg = Debug|x64
+ {1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Debug|x64.Build.0 = Debug|x64
+ {1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Release|Any CPU.ActiveCfg = Release|x64
+ {1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Release|x64.ActiveCfg = Release|x64
+ {1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Release|x64.Build.0 = Release|x64
+ {0729F977-87F1-4139-A9B1-569BFB4A352E}.Debug|Any CPU.ActiveCfg = Debug|x64
+ {0729F977-87F1-4139-A9B1-569BFB4A352E}.Debug|x64.ActiveCfg = Debug|x64
+ {0729F977-87F1-4139-A9B1-569BFB4A352E}.Release|Any CPU.ActiveCfg = Release|x64
+ {0729F977-87F1-4139-A9B1-569BFB4A352E}.Release|x64.ActiveCfg = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -137,6 +161,10 @@ Global
{7A4AAA49-6071-4FEA-8920-D8EFC6B8A019} = {EFBEAA6C-2E09-4A10-898C-D20897F3609F}
{2EE5949B-0F90-49E5-B487-CC04C0940BC4} = {7917B2B4-78F0-4BDE-86FE-316549A84E4E}
{A741E7D0-1788-4145-BA6C-2C4698CCECF1} = {2EE5949B-0F90-49E5-B487-CC04C0940BC4}
+ {60D3CEC5-E8DE-45E4-8834-86C9940D942A} = {0B4051AE-E1F2-4601-B0A8-B4CA660CC4B2}
+ {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3} = {EFBEAA6C-2E09-4A10-898C-D20897F3609F}
+ {1C7AA347-76CE-4C7F-A468-B46ADDD93215} = {EFBEAA6C-2E09-4A10-898C-D20897F3609F}
+ {0729F977-87F1-4139-A9B1-569BFB4A352E} = {0B4051AE-E1F2-4601-B0A8-B4CA660CC4B2}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {A594189A-069D-4FEE-8CD8-1A5786CF97AA}
diff --git a/native-out/gfoidl-Stochastics-gpu.dll b/native-out/gfoidl-Stochastics-gpu.dll
new file mode 100644
index 0000000..df5b6cf
Binary files /dev/null and b/native-out/gfoidl-Stochastics-gpu.dll differ
diff --git a/native-out/libgfoidl-Stochastics-gpu.so b/native-out/libgfoidl-Stochastics-gpu.so
new file mode 100644
index 0000000..5394c54
Binary files /dev/null and b/native-out/libgfoidl-Stochastics-gpu.so differ
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/SampleStats.h b/source/gfoidl.Stochastics.Gpu-linux-64/SampleStats.h
new file mode 100644
index 0000000..d43a23c
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/SampleStats.h
@@ -0,0 +1,12 @@
+#pragma once
+//-----------------------------------------------------------------------------
+struct SampleStats
+{
+ double Mean;
+ double Max;
+ double Min;
+ double Delta;
+ double VarianceCore;
+ double Skewness;
+ double Kurtosis;
+};
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/build.sh b/source/gfoidl.Stochastics.Gpu-linux-64/build.sh
new file mode 100644
index 0000000..ed73bbe
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/build.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+set -e
+
+libName=libgfoidl-Stochastics-gpu.so
+
+# https://docs.nvidia.com/cuda/pdf/CUDA_Compiler_Driver_NVCC.pdf
+nvcc -std=c++14 \
+ --disable-warnings \
+ -Wno-deprecated-declarations \
+ -o "$libName" \
+ -O3 \
+ --ptxas-options=-v \
+ --machine 64 \
+ -x cu \
+ -cudart static \
+ -shared -rdc=true \
+ -Xcompiler -fPIC,-fvisibility=hidden \
+ -gencode=arch=compute_60,code=sm_60 \
+ -gencode=arch=compute_35,code=sm_35 \
+ kernel_utils.cu kernel.cu gpu_core.cu
+
+nm -DC "$libName" | grep " T "
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/dll.h b/source/gfoidl.Stochastics.Gpu-linux-64/dll.h
new file mode 100644
index 0000000..fc8d4b3
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/dll.h
@@ -0,0 +1,23 @@
+#pragma once
+//-----------------------------------------------------------------------------
+#if defined _WIN32 || defined __CYGWIN__
+ #ifdef GFOIDL_STOCHASTICS_GPU_EXPORTS
+ #define GPU_API __declspec(dllexport)
+ #else
+ #define GPU_API __declspec(dllimport)
+ #endif
+#else
+ #if __GNUC__ >= 4
+ #define GPU_API __attribute__ ((visibility ("default")))
+ #else
+ #define GPU_API
+ #endif
+#endif
+//-----------------------------------------------------------------------------
+#ifdef __cplusplus
+ #define BEGIN_EXTERN_C extern "C" {
+ #define END_EXTERN_C }
+#else
+ #define BEGIN_EXTERN_C
+ #define END_EXTERN_C
+#endif
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/gfoidl.Stochastics.Gpu-linux-64.vcxproj b/source/gfoidl.Stochastics.Gpu-linux-64/gfoidl.Stochastics.Gpu-linux-64.vcxproj
new file mode 100644
index 0000000..af8f40a
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/gfoidl.Stochastics.Gpu-linux-64.vcxproj
@@ -0,0 +1,51 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+ {0729f977-87f1-4139-a9b1-569bfb4a352e}
+ Linux
+ gfoidl_Stochastics_Gpu_linux_64
+ 15.0
+ Linux
+ 1.0
+ Generic
+ {2238F9CD-F817-4ECC-BD14-2524D2669B35}
+
+
+
+ true
+
+
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.cu b/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.cu
new file mode 100644
index 0000000..30f5c2b
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.cu
@@ -0,0 +1,90 @@
+//#define SINGLE_THREAD
+//-----------------------------------------------------------------------------
+#include "gpu_core.h"
+#include
+#include "kernel.h"
+
+#if defined(DEBUG) || defined(_DEBUG)
+ #include
+ #include
+#endif
+//-----------------------------------------------------------------------------
+// Forward declarations
+inline cudaError_t checkCuda(cudaError_t result);
+//-----------------------------------------------------------------------------
+bool gpu_available()
+{
+ int deviceCount;
+ cudaError_t errorId = cudaGetDeviceCount(&deviceCount);
+
+ return errorId == cudaSuccess
+ && deviceCount > 0;
+}
+//-----------------------------------------------------------------------------
+const char* gpu_get_error_string(const int errorCode)
+{
+ return cudaGetErrorString(static_cast(errorCode));
+}
+//-----------------------------------------------------------------------------
+int gpu_sample_calc_stats(double* sample, const int sampleSize, SampleStats* sampleStats)
+{
+ double* deviceSample;
+ SampleStats* deviceSampleStats;
+
+ try
+ {
+ checkCuda(cudaMalloc(&deviceSample, sizeof(double) * sampleSize));
+ checkCuda(cudaMalloc(&deviceSampleStats, sizeof(SampleStats)));
+
+ checkCuda(cudaMemcpy(deviceSample, sample, sizeof(double) * sampleSize, cudaMemcpyHostToDevice));
+ checkCuda(cudaMemset(deviceSampleStats, 0, sizeof(SampleStats)));
+
+#ifndef SINGLE_THREAD
+ const int blockSize = 256;
+ int numBlocks = (sampleSize + blockSize - 1) / blockSize;
+#else
+ const int blockSize = 1;
+ const int numBlocks = 1;
+#endif
+
+#if defined(DEBUG) || defined(_DEBUG)
+ printf("blockSize: %d\nnumBlocks: %d\n", blockSize, numBlocks);
+#endif
+
+ // For final fixup of values a separate kernel is queued to the device.
+ // Otherwise there's no way of syncing all threads in the grid.
+ Kernel::CalculateAverageAndVarianceCore<<>>(deviceSample, sampleSize, deviceSampleStats);
+ Kernel::CalculateAverageAndVarianceCoreFinal<<<1, 1>>>(deviceSampleStats, sampleSize);
+
+ Kernel::CalculateDeltaSkewnessKurtosis<<>>(deviceSample, sampleSize, deviceSampleStats);
+
+ //checkCuda(cudaDeviceSynchronize()); // not necessary
+ checkCuda(cudaMemcpy(sampleStats, deviceSampleStats, sizeof(SampleStats), cudaMemcpyDeviceToHost));
+
+ checkCuda(cudaFree(deviceSample));
+ checkCuda(cudaFree(deviceSampleStats));
+
+ // Final fixup of values, not done yet
+ // No need to launch a kernel for a simple division.
+ sampleStats->Delta /= sampleSize;
+
+ double sigma = sqrt(sampleStats->VarianceCore / sampleSize);
+ double t = sampleSize * sigma * sigma * sigma;
+ sampleStats->Skewness /= t;
+ sampleStats->Kurtosis /= t * sigma;
+ }
+ catch (const int e)
+ {
+ return e;
+ }
+
+ return 0;
+}
+//-----------------------------------------------------------------------------
+cudaError_t checkCuda(cudaError_t result)
+{
+ if (result != cudaSuccess)
+ throw static_cast(result);
+
+ return result;
+}
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.h b/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.h
new file mode 100644
index 0000000..9e887b4
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.h
@@ -0,0 +1,13 @@
+#pragma once
+//-----------------------------------------------------------------------------
+#include "dll.h"
+#include "SampleStats.h"
+//-----------------------------------------------------------------------------
+BEGIN_EXTERN_C
+
+GPU_API bool gpu_available();
+GPU_API const char* gpu_get_error_string(const int errorCode);
+
+GPU_API int gpu_sample_calc_stats(double* sample, const int sampleSize, SampleStats* sampleStats);
+
+END_EXTERN_C
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/kernel.cu b/source/gfoidl.Stochastics.Gpu-linux-64/kernel.cu
new file mode 100644
index 0000000..d3240e9
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/kernel.cu
@@ -0,0 +1,88 @@
+#include "kernel.h"
+#include "kernel_utils.h"
+//-----------------------------------------------------------------------------
+namespace Kernel
+{
+ __global__
+ void CalculateAverageAndVarianceCore(const double* sample, const int n, SampleStats* sampleStats)
+ {
+ const int index = blockDim.x * blockIdx.x + threadIdx.x;
+ const int stride = gridDim.x * blockDim.x;
+
+ double avg = 0;
+ double variance = 0;
+
+ for (int i = index; i < n; i += stride)
+ {
+ avg += sample[i];
+ variance += sample[i] * sample[i];
+ }
+
+ Utils::TwoDoubles twoDoubles {avg, variance};
+ twoDoubles = Utils::BlockReduceSum(twoDoubles);
+
+ // Final sum in first thread of each block
+ if (threadIdx.x == 0)
+ {
+#if __CUDA_ARCH__ < 600
+ Utils::atomicAdd(&sampleStats->Mean , twoDoubles.A);
+ Utils::atomicAdd(&sampleStats->VarianceCore, twoDoubles.B);
+#else
+ atomicAdd(&sampleStats->Mean , twoDoubles.A);
+ atomicAdd(&sampleStats->VarianceCore, twoDoubles.B);
+#endif
+ }
+ }
+ //-----------------------------------------------------------------------------
+ __global__
+ void CalculateAverageAndVarianceCoreFinal(SampleStats* sampleStats, const int n)
+ {
+ const int index = blockDim.x * blockIdx.x + threadIdx.x;
+
+ if (index == 0)
+ {
+ double avg = sampleStats->Mean / n;
+ sampleStats->Mean = avg;
+ sampleStats->VarianceCore -= n * avg*avg;
+ }
+ }
+ //-----------------------------------------------------------------------------
+ __global__
+ void CalculateDeltaSkewnessKurtosis(const double* sample, const int n, SampleStats* sampleStats)
+ {
+ const int index = blockDim.x * blockIdx.x + threadIdx.x;
+ const int stride = gridDim.x * blockDim.x;
+
+ double avg = sampleStats->Mean;
+ double delta = 0;
+ double skewness = 0;
+ double kurtosis = 0;
+
+ for (int i = index; i < n; i += stride)
+ {
+ double t = sample[i] - avg;
+ double t1 = t * t*t;
+
+ delta += abs(t);
+ skewness += t1;
+ kurtosis += t1 * t;
+ }
+
+ Utils::ThreeDoubles threeDoubles {delta, skewness,kurtosis};
+ threeDoubles = Utils::BlockReduceSum(threeDoubles);
+
+ // Final sum in first thread of each block
+ if (threadIdx.x == 0)
+ {
+#if __CUDA_ARCH__ < 600
+ Utils::atomicAdd(&sampleStats->Delta , threeDoubles.A);
+ Utils::atomicAdd(&sampleStats->Skewness, threeDoubles.B);
+ Utils::atomicAdd(&sampleStats->Kurtosis, threeDoubles.C);
+#else
+ atomicAdd(&sampleStats->Delta , threeDoubles.A);
+ atomicAdd(&sampleStats->Skewness, threeDoubles.B);
+ atomicAdd(&sampleStats->Kurtosis, threeDoubles.C);
+#endif
+ }
+ }
+}
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/kernel.h b/source/gfoidl.Stochastics.Gpu-linux-64/kernel.h
new file mode 100644
index 0000000..64fd921
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/kernel.h
@@ -0,0 +1,13 @@
+#pragma once
+//-----------------------------------------------------------------------------
+#include
+#include
+#include "SampleStats.h"
+//-----------------------------------------------------------------------------
+namespace Kernel
+{
+ __global__ void CalculateAverageAndVarianceCore(const double* sample, const int n, SampleStats* sampleStats);
+ __global__ void CalculateAverageAndVarianceCoreFinal(SampleStats* sampleStats, const int n);
+
+ __global__ void CalculateDeltaSkewnessKurtosis(const double* sample, const int n, SampleStats* sampleStats);
+}
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.cu b/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.cu
new file mode 100644
index 0000000..f3fffae
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.cu
@@ -0,0 +1,193 @@
+#include "kernel_utils.h"
+#include
+
+#ifdef _DEBUG
+ #include
+#endif
+//-----------------------------------------------------------------------------
+// Code for reduction taken from https://devblogs.nvidia.com/faster-parallel-reductions-kepler/
+//-----------------------------------------------------------------------------
+namespace Kernel
+{
+ namespace Utils
+ {
+ using uint = unsigned int;
+ const uint FULL_MASK = 0xffffffff;
+ //---------------------------------------------------------------------
+#if __CUDA_ARCH__ < 600
+ __device__
+ double atomicAdd(double* address, double val)
+ {
+ using ulli = unsigned long long int;
+
+ ulli* tmp = reinterpret_cast(address);
+ ulli old = *tmp;
+ ulli assumed;
+
+ do
+ {
+ assumed = old;
+
+ old = atomicCAS(tmp, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
+ } while (assumed != old);
+
+ return __longlong_as_double(old);
+ }
+#endif
+ //---------------------------------------------------------------------
+ __device__
+ double WarpReduceSum(double value)
+ {
+ for (int offset = warpSize / 2; offset > 0; offset /= 2)
+ value += __shfl_down_sync(FULL_MASK, value, offset);
+
+ return value;
+ }
+ //---------------------------------------------------------------------
+ __device__
+ TwoDoubles WarpReduceSum(TwoDoubles twoDoubles)
+ {
+ for (int offset = warpSize / 2; offset > 0; offset /= 2)
+ {
+ twoDoubles.A += __shfl_down_sync(FULL_MASK, twoDoubles.A, offset);
+ twoDoubles.B += __shfl_down_sync(FULL_MASK, twoDoubles.B, offset);
+ }
+
+ return twoDoubles;
+ }
+ //---------------------------------------------------------------------
+ __device__
+ ThreeDoubles WarpReduceSum(ThreeDoubles threeDoubles)
+ {
+ for (int offset = warpSize / 2; offset > 0; offset /= 2)
+ {
+ threeDoubles.A += __shfl_down_sync(FULL_MASK, threeDoubles.A, offset);
+ threeDoubles.B += __shfl_down_sync(FULL_MASK, threeDoubles.B, offset);
+ threeDoubles.C += __shfl_down_sync(FULL_MASK, threeDoubles.C, offset);
+ }
+
+ return threeDoubles;
+ }
+ //---------------------------------------------------------------------
+ __device__
+ double BlockReduceSum(double value)
+ {
+#ifdef _DEBUG
+ assert(warpSize == 32);
+#endif
+ static __shared__ double shared[32];
+ const int lane = threadIdx.x & (warpSize - 1); // threadIdx.x % warpSize
+ const int warpId = threadIdx.x / warpSize;
+
+ value = WarpReduceSum(value);
+
+ if (lane == 0)
+ shared[warpId] = value;
+
+ __syncthreads();
+
+ // Read from shared memory only if that warp existed
+ bool warpExisted = (threadIdx.x < blockDim.x / warpSize) || (threadIdx.x == 0 && blockDim.x == 1);
+ value = warpExisted ? shared[lane] : 0;
+
+ // Final reduce within first warp
+ if (warpId == 0)
+ value = WarpReduceSum(value);
+
+ return value;
+ }
+ //---------------------------------------------------------------------
+ __device__
+ TwoDoubles BlockReduceSum(TwoDoubles twoDoubles)
+ {
+#ifdef _DEBUG
+ assert(warpSize == 32);
+#endif
+ static __shared__ TwoDoubles shared[32];
+ const int lane = threadIdx.x & (warpSize - 1); // threadIdx.x % warpSize
+ const int warpId = threadIdx.x / warpSize;
+
+ twoDoubles = WarpReduceSum(twoDoubles);
+
+ if (lane == 0)
+ shared[warpId] = twoDoubles;
+
+ __syncthreads();
+
+ // Read from shared memory only if that warp existed
+ bool warpExisted = (threadIdx.x < blockDim.x / warpSize) || (threadIdx.x == 0 && blockDim.x == 1);
+ twoDoubles = warpExisted ? shared[lane] : TwoDoubles {0,0};
+
+ // Final reduce within first warp
+ if (warpId == 0)
+ twoDoubles = WarpReduceSum(twoDoubles);
+
+ return twoDoubles;
+ }
+ //---------------------------------------------------------------------
+ __device__
+ ThreeDoubles BlockReduceSum(ThreeDoubles threeDoubles)
+ {
+#ifdef _DEBUG
+ assert(warpSize == 32);
+#endif
+ static __shared__ ThreeDoubles shared[32];
+ const int lane = threadIdx.x & (warpSize - 1); // threadIdx.x % warpSize
+ const int warpId = threadIdx.x / warpSize;
+
+ threeDoubles = WarpReduceSum(threeDoubles);
+
+ if (lane == 0)
+ shared[warpId] = threeDoubles;
+
+ __syncthreads();
+
+ // Read from shared memory only if that warp existed
+ bool warpExisted = (threadIdx.x < blockDim.x / warpSize) || (threadIdx.x == 0 && blockDim.x == 1);
+ threeDoubles = warpExisted ? shared[lane] : ThreeDoubles {0,0,0};
+
+ // Final reduce within first warp
+ if (warpId == 0)
+ threeDoubles = WarpReduceSum(threeDoubles);
+
+ return threeDoubles;
+ }
+ //---------------------------------------------------------------------
+ __device__
+ void ReduceSum(double value, double* result)
+ {
+ value = BlockReduceSum(value);
+
+ // Final sum in first thread of each block
+ if (threadIdx.x == 0)
+ atomicAdd(result, value);
+ }
+ //---------------------------------------------------------------------
+ __device__
+ void ReduceSum(TwoDoubles twoDoubles, TwoDoubles* result)
+ {
+ twoDoubles = BlockReduceSum(twoDoubles);
+
+ // Final sum in first thread of each block
+ if (threadIdx.x == 0)
+ {
+ atomicAdd(&result->A, twoDoubles.A);
+ atomicAdd(&result->B, twoDoubles.B);
+ }
+ }
+ //---------------------------------------------------------------------
+ __device__
+ void ReduceSum(const double* in, const int n, double* result)
+ {
+ double blockSum = 0;
+
+ const int index = blockDim.x * blockIdx.x + threadIdx.x;
+ const int stride = gridDim.x * blockDim.x;
+
+ for (int i = index; i < n; i += stride)
+ blockSum += in[i];
+
+ ReduceSum(blockSum, result);
+ }
+ }
+}
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.h b/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.h
new file mode 100644
index 0000000..1cbf5fc
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.h
@@ -0,0 +1,53 @@
+#pragma once
+//-----------------------------------------------------------------------------
+#include
+//-----------------------------------------------------------------------------
+namespace Kernel
+{
+ namespace Utils
+ {
+ struct TwoDoubles
+ {
+ double A;
+ double B;
+
+ __device__ TwoDoubles() {}
+ __device__ TwoDoubles(const double a, const double b)
+ {
+ this->A = a;
+ this->B = b;
+ }
+ };
+ //---------------------------------------------------------------------
+ struct ThreeDoubles
+ {
+ double A;
+ double B;
+ double C;
+
+ __device__ ThreeDoubles() {}
+ __device__ ThreeDoubles(const double a, const double b, const double c)
+ {
+ this->A = a;
+ this->B = b;
+ this->C = c;
+ }
+ };
+ //---------------------------------------------------------------------
+ __device__ double WarpReduceSum(double value);
+ __device__ TwoDoubles WarpReduceSum(TwoDoubles twoDoubles);
+ __device__ ThreeDoubles WarpReduceSum(ThreeDoubles threeDoubles);
+
+ __device__ double BlockReduceSum(double value);
+ __device__ TwoDoubles BlockReduceSum(TwoDoubles twoDoubles);
+ __device__ ThreeDoubles BlockReduceSum(ThreeDoubles threeDoubles);
+
+ __device__ void ReduceSum(double value, double* result);
+ __device__ void ReduceSum(TwoDoubles twoDoubles, TwoDoubles* result);
+ __device__ void ReduceSum(const double* in, const int n, double* result);
+
+#if __CUDA_ARCH__ < 600
+ __device__ double atomicAdd(double* address, double val);
+#endif
+ }
+}
diff --git a/source/gfoidl.Stochastics.Gpu-win-64/cpp.hint b/source/gfoidl.Stochastics.Gpu-win-64/cpp.hint
new file mode 100644
index 0000000..f02e3a1
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-win-64/cpp.hint
@@ -0,0 +1,5 @@
+// Hint files help the Visual Studio IDE interpret Visual C++ identifiers
+// such as names of functions and macros.
+// For more information see https://go.microsoft.com/fwlink/?linkid=865984
+#define GPU_API __declspec(dllexport)
+#define GPU_API __declspec(dllimport)
diff --git a/source/gfoidl.Stochastics.Gpu-win-64/dummy.cpp b/source/gfoidl.Stochastics.Gpu-win-64/dummy.cpp
new file mode 100644
index 0000000..21c8c45
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-win-64/dummy.cpp
@@ -0,0 +1 @@
+// dummy.cpp needed for VS to show C++ compiler properties
diff --git a/source/gfoidl.Stochastics.Gpu-win-64/gfoidl.Stochastics.Gpu-win-64.vcxproj b/source/gfoidl.Stochastics.Gpu-win-64/gfoidl.Stochastics.Gpu-win-64.vcxproj
new file mode 100644
index 0000000..e425d55
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-win-64/gfoidl.Stochastics.Gpu-win-64.vcxproj
@@ -0,0 +1,114 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {60D3CEC5-E8DE-45E4-8834-86C9940D942A}
+ gfoidl_Stochastics_Gpu_win_64
+ 10.0.16299.0
+
+
+
+ DynamicLibrary
+ true
+ MultiByte
+ v141
+
+
+ DynamicLibrary
+ false
+ true
+ MultiByte
+ v141
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ gfoidl-Stochastics-gpu
+
+
+ gfoidl-Stochastics-gpu
+
+
+
+ Level3
+ Disabled
+ GFOIDL_STOCHASTICS_GPU_EXPORTS;WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ ..\gfoidl.Stochastics.Gpu-linux-64;%(AdditionalIncludeDirectories)
+
+
+ true
+ Console
+ cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+ 64
+ true
+ true
+ compute_60,sm_60;%(CodeGeneration)
+
+
+
+
+ Level3
+ MaxSpeed
+ true
+ true
+ GFOIDL_STOCHASTICS_GPU_EXPORTS;WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ ..\gfoidl.Stochastics.Gpu-linux-64;%(AdditionalIncludeDirectories)
+
+
+ true
+ true
+ true
+ Console
+ cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)
+
+
+ 64
+ true
+ true
+ compute_60,sm_60;%(CodeGeneration)
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/source/gfoidl.Stochastics.Native-win-x64/dll.h b/source/gfoidl.Stochastics.Native-win-x64/dll.h
index 6ede07c..63f4194 100644
--- a/source/gfoidl.Stochastics.Native-win-x64/dll.h
+++ b/source/gfoidl.Stochastics.Native-win-x64/dll.h
@@ -1,12 +1,6 @@
#pragma once
//-----------------------------------------------------------------------------
-// The following ifdef block is the standard way of creating macros which make exporting
-// from a DLL simpler. All files within this DLL are compiled with the GFOIDLSTOCHASTICSNATIVE_EXPORTS
-// symbol defined on the command line. This symbol should not be defined on any project
-// that uses this DLL. This way any other project whose source files include this file see
-// DLL_API functions as being imported from a DLL, whereas this DLL sees symbols
-// defined with this macro as being exported.
-#ifdef GFOIDLSTOCHASTICSNATIVE_EXPORTS
+#ifdef GFOIDL_STOCHASTICS_NATIVE_EXPORTS
#define DLL_API __declspec(dllexport)
#else
#define DLL_API __declspec(dllimport)
diff --git a/source/gfoidl.Stochastics.Native-win-x64/gfoidl.Stochastics.Native-win-x64.vcxproj b/source/gfoidl.Stochastics.Native-win-x64/gfoidl.Stochastics.Native-win-x64.vcxproj
index 5791532..bf5f842 100644
--- a/source/gfoidl.Stochastics.Native-win-x64/gfoidl.Stochastics.Native-win-x64.vcxproj
+++ b/source/gfoidl.Stochastics.Native-win-x64/gfoidl.Stochastics.Native-win-x64.vcxproj
@@ -57,7 +57,7 @@
Use
Level3
Disabled
- _DEBUG;GFOIDLSTOCHASTICSNATIVE_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)
+ _DEBUG;GFOIDL_STOCHASTICS_NATIVE_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)
true
@@ -72,7 +72,7 @@
MaxSpeed
true
true
- NDEBUG;GFOIDLSTOCHASTICSNATIVE_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)
+ NDEBUG;GFOIDL_STOCHASTICS_NATIVE_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)
true
diff --git a/source/gfoidl.Stochastics/Enumerators/ArrayEnumerator.cs b/source/gfoidl.Stochastics/Enumerators/ArrayEnumerator.cs
index f495a69..7bf7147 100644
--- a/source/gfoidl.Stochastics/Enumerators/ArrayEnumerator.cs
+++ b/source/gfoidl.Stochastics/Enumerators/ArrayEnumerator.cs
@@ -21,6 +21,8 @@ public ArrayEnumerable(T[] array)
//---------------------------------------------------------------------
public static implicit operator ArrayEnumerable(T[] array) => new ArrayEnumerable(array);
public static implicit operator T[] (ArrayEnumerable enumerable) => enumerable._array;
+ //---------------------------------------------------------------------
+ public ref T GetPinnableReference() => ref _array[0];
}
//---------------------------------------------------------------------
public struct ArrayEnumerator : IEnumerator
diff --git a/source/gfoidl.Stochastics/Native/Gpu.cs b/source/gfoidl.Stochastics/Native/Gpu.cs
new file mode 100644
index 0000000..5394a59
--- /dev/null
+++ b/source/gfoidl.Stochastics/Native/Gpu.cs
@@ -0,0 +1,114 @@
+using System;
+using System.Runtime.InteropServices;
+using gfoidl.Stochastics.Statistics;
+
+namespace gfoidl.Stochastics.Native
+{
+ internal static class Gpu
+ {
+ public const string EnvVariableName = "GFOIDL_STOCHASTICS_USE_GPU";
+ //---------------------------------------------------------------------
+ private static readonly bool s_isAvailable;
+ private static readonly bool s_isUseOfGpuForced;
+ //---------------------------------------------------------------------
+ static Gpu()
+ {
+ string env = Environment.GetEnvironmentVariable(EnvVariableName);
+
+ s_isAvailable = RuntimeInformation.OSArchitecture == Architecture.X64
+ && RuntimeHelper.IsRunningOnDotNetCore()
+ && GpuMethods.gpu_available()
+ && env != "0";
+
+ s_isUseOfGpuForced = string.Equals(env, "force", StringComparison.OrdinalIgnoreCase);
+ }
+ //---------------------------------------------------------------------
+ public static bool IsAvailable => s_isAvailable;
+ public static bool IsUseOfGpuForced => s_isUseOfGpuForced;
+ //---------------------------------------------------------------------
+ public static unsafe void CalculateSampleStats(Sample sample)
+ {
+ if (!s_isAvailable)
+ throw new InvalidOperationException(Strings.Gpu_not_available);
+
+ fixed (double* ptr = sample.Values)
+ {
+ SampleStats sampleStats = default;
+ int errorCode = GpuMethods.gpu_sample_calc_stats(ptr, sample.Count, &sampleStats);
+
+ if (errorCode != 0) ThrowGpuException(errorCode);
+
+ sample.Mean = sampleStats.Mean;
+ sample.Max = sampleStats.Max;
+ sample.Min = sampleStats.Min;
+ sample.Delta = sampleStats.Delta;
+ sample.VarianceCore = sampleStats.VarianceCore;
+ sample.Skewness = sampleStats.Skewness;
+ sample.Kurtosis = sampleStats.Kurtosis;
+ }
+ }
+ //---------------------------------------------------------------------
+ private static void ThrowGpuException(int errorCode)
+ {
+ IntPtr ptr = GpuMethods.gpu_get_error_string(errorCode);
+ string msg = $"CUDA runtime error: {Marshal.PtrToStringAnsi(ptr)}";
+ throw new GpuException(msg);
+ }
+ //---------------------------------------------------------------------
+ private static class GpuMethods
+ {
+ private const string LibName = "gfoidl-Stochastics-gpu";
+ //---------------------------------------------------------------------
+ [DllImport(LibName)]
+ public static extern bool gpu_available();
+ //---------------------------------------------------------------------
+ [DllImport(LibName)]
+ public static extern IntPtr gpu_get_error_string(int errorCode);
+ //---------------------------------------------------------------------
+ [DllImport(LibName)]
+ public static extern unsafe int gpu_sample_calc_stats(double* sample, int sampleSize, SampleStats* sampleStats);
+ }
+ //---------------------------------------------------------------------
+ [StructLayout(LayoutKind.Sequential)]
+ private struct SampleStats
+ {
+ public double Mean;
+ public double Max;
+ public double Min;
+ public double Delta;
+ public double VarianceCore;
+ public double Skewness;
+ public double Kurtosis;
+ }
+ }
+ //-------------------------------------------------------------------------
+ ///
+ /// Represents an error that is caused by the GPU.
+ ///
+ public class GpuException : Exception
+ {
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ public GpuException() : base() { }
+
+ ///
+ /// Initializes a new instance of the class
+ /// with a specified error message.
+ ///
+ /// The message that describes the error.
+ public GpuException(string message) : base(message) { }
+
+ ///
+ /// Initializes a new instance of the
+ /// class with a specified error message and a reference to the inner
+ /// exception that is the cause of this exception.
+ ///
+ /// The error message that explains the reason for the exception.
+ ///
+ /// The exception that is the cause of the current exception, or a null reference (Nothing in Visual Basic)
+ /// if no inner exception is specified.
+ ///
+ public GpuException(string message, Exception innerException) : base(message, innerException) { }
+ }
+}
diff --git a/source/gfoidl.Stochastics/RuntimeHelper.cs b/source/gfoidl.Stochastics/RuntimeHelper.cs
index 1e6d1b9..c651fd7 100644
--- a/source/gfoidl.Stochastics/RuntimeHelper.cs
+++ b/source/gfoidl.Stochastics/RuntimeHelper.cs
@@ -12,7 +12,7 @@ public static bool IsRunningOnDotNetCore()
if (!_isRunningOnDotNetCore.HasValue)
{
string frameworkName = Assembly.GetEntryAssembly()?.GetCustomAttribute()?.FrameworkName;
- _isRunningOnDotNetCore = frameworkName.Contains("NETCoreApp");
+ _isRunningOnDotNetCore = frameworkName?.Contains("NETCoreApp") ?? false;
}
return _isRunningOnDotNetCore.Value;
diff --git a/source/gfoidl.Stochastics/SpecialFunctions.cs b/source/gfoidl.Stochastics/SpecialFunctions.cs
index fd83902..dbff91f 100644
--- a/source/gfoidl.Stochastics/SpecialFunctions.cs
+++ b/source/gfoidl.Stochastics/SpecialFunctions.cs
@@ -10,19 +10,19 @@ namespace gfoidl.Stochastics
///
public static class SpecialFunctions
{
- private static readonly bool _isDotNetCore;
- private static readonly bool _erfNative;
- private static readonly bool _erfNativeLinux;
+ private static readonly bool s_isDotNetCore;
+ private static readonly bool s_erfNative;
+ private static readonly bool s_erfNativeLinux;
//---------------------------------------------------------------------
static SpecialFunctions()
{
- _isDotNetCore = RuntimeHelper.IsRunningOnDotNetCore();
+ s_isDotNetCore = RuntimeHelper.IsRunningOnDotNetCore();
- _erfNative =
+ s_erfNative =
RuntimeInformation.OSArchitecture == Architecture.X64
&& (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux));
- _erfNativeLinux = _erfNative && RuntimeInformation.IsOSPlatform(OSPlatform.Linux);
+ s_erfNativeLinux = s_erfNative && RuntimeInformation.IsOSPlatform(OSPlatform.Linux);
}
//---------------------------------------------------------------------
///
@@ -34,7 +34,7 @@ static SpecialFunctions()
// https://math.stackexchange.com/questions/263216/error-function-erf-with-better-precision/1889960#1889960
public static double Erf(double x)
{
- if (_isDotNetCore && _erfNativeLinux)
+ if (s_isDotNetCore && s_erfNativeLinux)
return NativeMethods.gaussian_error_function(x);
/*
@@ -197,7 +197,7 @@ public static double Erf(double x)
///
public static double Erfc(double x)
{
- if (_isDotNetCore && _erfNativeLinux)
+ if (s_isDotNetCore && s_erfNativeLinux)
return NativeMethods.gaussian_error_function_complementary(x);
/*
@@ -412,7 +412,7 @@ internal static unsafe void Erf(double* values, double* result, int size)
// Is a JIT compile-time constant, due the cctor. Note only a static readonly field may not be
// sufficient (on the first access). See https://github.com/dotnet/coreclr/issues/1193
// So the not taken branch(es) will be removed.
- if (_isDotNetCore && _erfNative)
+ if (s_isDotNetCore && s_erfNative)
NativeMethods.gaussian_error_function_vector(values, result, size);
else
{
@@ -423,7 +423,7 @@ internal static unsafe void Erf(double* values, double* result, int size)
//---------------------------------------------------------------------
internal static unsafe void Erfc(double* values, double* result, int size)
{
- if (_isDotNetCore && _erfNative)
+ if (s_isDotNetCore && s_erfNative)
NativeMethods.gaussian_error_function_complementary_vector(values, result, size);
else
{
diff --git a/source/gfoidl.Stochastics/Statistics/Sample.Calculations.cs b/source/gfoidl.Stochastics/Statistics/Sample.Calculations.cs
index 546a37b..2ca98d5 100644
--- a/source/gfoidl.Stochastics/Statistics/Sample.Calculations.cs
+++ b/source/gfoidl.Stochastics/Statistics/Sample.Calculations.cs
@@ -1,5 +1,6 @@
using System;
using System.Threading.Tasks;
+using gfoidl.Stochastics.Native;
namespace gfoidl.Stochastics.Statistics
{
@@ -8,11 +9,36 @@ partial class Sample
private static ParallelOptions GetParallelOptions()
=> new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount };
//---------------------------------------------------------------------
+ ///
+ /// Calculates the statistics for .
+ ///
+ ///
+ /// The statistical properties of are lazy-evaluated.
+ /// With this method these properties are instantly evalualted / calculated.
+ ///
+ public void CalculateStats()
+ {
+ // is threadsafe, because from shared state is just read
+ Task medianTask = Task.Run(() => this.CalculateMedian());
+
+ if (Gpu.IsAvailable && (Gpu.IsUseOfGpuForced || this.Count > SampleThresholds.ThresholdForGpu))
+ Gpu.CalculateSampleStats(this);
+ else
+ {
+ this.CalculateAverageAndVarianceCore();
+ this.GetMinMax();
+ this.CalculateDelta();
+ this.CalculateSkewnessAndKurtosis();
+ }
+
+ medianTask.GetAwaiter().GetResult();
+ }
+ //---------------------------------------------------------------------
private double CalculateMedian()
{
int n = this.SortedValues.Count;
- if (n % 2 == 0)
+ if ((n & (2 - 1)) == 0) // n % 2 == 0
return (_sortedValues[(n >> 1) - 1] + _sortedValues[n >> 1]) * 0.5;
else
// this is correct, but n is an int, so the next line is
@@ -25,7 +51,7 @@ private double CalculateMedian()
private double CalculateSampleVariance() => this.VarianceCore / (this.Count - 1d);
//---------------------------------------------------------------------
private double _varianceCore = double.NaN;
- private double VarianceCore
+ internal double VarianceCore
{
get
{
@@ -34,6 +60,7 @@ private double VarianceCore
return _varianceCore;
}
+ set => _varianceCore = value;
}
}
}
diff --git a/source/gfoidl.Stochastics/Statistics/Sample.cs b/source/gfoidl.Stochastics/Statistics/Sample.cs
index 766f3f7..c26b3cf 100644
--- a/source/gfoidl.Stochastics/Statistics/Sample.cs
+++ b/source/gfoidl.Stochastics/Statistics/Sample.cs
@@ -160,6 +160,7 @@ public double Delta
return _delta;
}
+ internal set => _delta = value;
}
//---------------------------------------------------------------------
///
@@ -261,6 +262,7 @@ public double Skewness
return _skewness;
}
+ internal set => _skewness = value;
}
//---------------------------------------------------------------------
private double _kurtosis = double.NaN;
@@ -282,6 +284,7 @@ public double Kurtosis
return _kurtosis;
}
+ internal set => _kurtosis = value;
}
//---------------------------------------------------------------------
///
diff --git a/source/gfoidl.Stochastics/Statistics/SampleThresholds.cs b/source/gfoidl.Stochastics/Statistics/SampleThresholds.cs
index 45ba420..1d10c21 100644
--- a/source/gfoidl.Stochastics/Statistics/SampleThresholds.cs
+++ b/source/gfoidl.Stochastics/Statistics/SampleThresholds.cs
@@ -23,5 +23,10 @@ public static class SampleThresholds
/// and .
///
public static int ThresholdForMinMax { get; set; } = 1_750_000;
+
+ ///
+ /// Threshold for cpu vs. gpu execution for .
+ ///
+ public static int ThresholdForGpu { get; set; } = 5_000_000;
}
}
diff --git a/source/gfoidl.Stochastics/Strings.Designer.cs b/source/gfoidl.Stochastics/Strings.Designer.cs
index b5b90f6..56d7968 100644
--- a/source/gfoidl.Stochastics/Strings.Designer.cs
+++ b/source/gfoidl.Stochastics/Strings.Designer.cs
@@ -60,6 +60,15 @@ internal Strings() {
}
}
+ ///
+ /// Looks up a localized string similar to No GPU for CUDA is available, or GFOIDL_STOCHASTICS_USE_GPU is set to "0" (in order to disable usage of the GPU). To use the GPU the process must be x64 and running on .NET Core..
+ ///
+ internal static string Gpu_not_available {
+ get {
+ return ResourceManager.GetString("Gpu_not_available", resourceCulture);
+ }
+ }
+
///
/// Looks up a localized string similar to The value must be greater than 0..
///
diff --git a/source/gfoidl.Stochastics/Strings.resx b/source/gfoidl.Stochastics/Strings.resx
index a1af411..c9a0fcb 100644
--- a/source/gfoidl.Stochastics/Strings.resx
+++ b/source/gfoidl.Stochastics/Strings.resx
@@ -117,6 +117,9 @@
System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
+
+ No GPU for CUDA is available, or GFOIDL_STOCHASTICS_USE_GPU is set to "0" (in order to disable usage of the GPU). To use the GPU the process must be x64 and running on .NET Core.
+
The value must be greater than 0.
diff --git a/source/gfoidl.Stochastics/gfoidl.Stochastics.csproj b/source/gfoidl.Stochastics/gfoidl.Stochastics.csproj
index 99fb2f5..9442c60 100644
--- a/source/gfoidl.Stochastics/gfoidl.Stochastics.csproj
+++ b/source/gfoidl.Stochastics/gfoidl.Stochastics.csproj
@@ -32,11 +32,16 @@
-
+
+
+
+
+
+
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj
new file mode 100644
index 0000000..692f45d
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj
@@ -0,0 +1,93 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+ 15.0
+ {1C7AA347-76CE-4C7F-A468-B46ADDD93215}
+ Win32Proj
+ gfoidlStochasticsGpuwin64TestsConsole
+ 10.0.16299.0
+
+
+
+ Application
+ true
+ v141
+ Unicode
+
+
+ Application
+ false
+ v141
+ true
+ Unicode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+ false
+
+
+
+ Level3
+ Disabled
+ _DEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ true
+ ..\..\source\gfoidl.Stochastics.Gpu-linux-64;%(AdditionalIncludeDirectories)
+
+
+ true
+ Console
+
+
+
+
+ Level3
+ MaxSpeed
+ true
+ true
+ NDEBUG;_CONSOLE;%(PreprocessorDefinitions)
+ true
+ ..\..\source\gfoidl.Stochastics.Gpu-linux-64;%(AdditionalIncludeDirectories)
+
+
+ true
+ true
+ true
+ Console
+
+
+
+
+ {60d3cec5-e8de-45e4-8834-86c9940d942a}
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj.filters b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj.filters
new file mode 100644
index 0000000..a91ad0e
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj.filters
@@ -0,0 +1,22 @@
+
+
+
+
+ {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
+ cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
+
+
+ {93995380-89BD-4b04-88EB-625FBE52EBFB}
+ h;hh;hpp;hxx;hm;inl;inc;ipp;xsd
+
+
+ {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Source Files
+
+
+
\ No newline at end of file
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/main.cpp b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/main.cpp
new file mode 100644
index 0000000..5c351c3
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/main.cpp
@@ -0,0 +1,69 @@
+//#define SMALL_SAMPLE
+//#define MEDIUM_SAMPLE
+//-----------------------------------------------------------------------------
+#include "gpu_core.h"
+#include
+#include
+//-----------------------------------------------------------------------------
+using std::cout;
+using std::cerr;
+using std::endl;
+//-----------------------------------------------------------------------------
+int main()
+{
+#if defined(SMALL_SAMPLE)
+ const int N = 3;
+ double sample[3] = {1, 2, 3};
+ double avgExpected = 2;
+ double deltaExpected = 2.0 / 3;
+#elif defined(MEDIUM_SAMPLE)
+ const int N = 20;
+ double sample[] = {0, 3, 4, 1, 2, 3, 0, 2, 1, 3, 2, 0, 2, 2, 3, 2, 5, 2, 3, 999};
+ double avgExpected = 51.95;
+ double deltaExpected = 94.705;
+#else
+ const int N = 2000000;
+ double* sample = new double[N];
+ double avgExpected = 0;
+ double deltaExpected = 0;
+
+ for (int i = 0; i < N; ++i)
+ {
+ double t = (double)rand() / RAND_MAX;
+ sample[i] = t;
+ avgExpected += t;
+ }
+
+ avgExpected /= N;
+
+ for (int i = 0; i < N; ++i)
+ deltaExpected += abs(sample[i] - avgExpected);
+
+ deltaExpected /= N;
+#endif
+
+ SampleStats sampleStats;
+ int errorCode = gpu_sample_calc_stats(sample, N, &sampleStats);
+
+ cout << endl;
+ cout << "CUDA errorcode: " << errorCode << endl;
+
+ if (errorCode != 0)
+ {
+ const char* msg = gpu_get_error_string(errorCode);
+ cerr << msg << endl;
+ return 1;
+ }
+
+ cout << endl;
+ cout << "avg" << endl;
+ cout << "expected: " << avgExpected << endl;
+ cout << "actual: " << sampleStats.Mean << endl;
+ assert(abs(avgExpected - sampleStats.Mean) < 1e-3);
+
+ cout << endl;
+ cout << "delta" << endl;
+ cout << "expected: " << deltaExpected << endl;
+ cout << "actual: " << sampleStats.Delta << endl;
+ assert(abs(deltaExpected - sampleStats.Delta) < 1e-3);
+}
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.cpp b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.cpp
new file mode 100644
index 0000000..5868fc2
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.cpp
@@ -0,0 +1,23 @@
+#include "stdafx.h"
+#include "TestHelper.h"
+#include "gpu_core.h"
+//-----------------------------------------------------------------------------
+using namespace Microsoft::VisualStudio::CppUnitTestFramework;
+//-----------------------------------------------------------------------------
+void TestHelper::FailIfError(const int errorCode)
+{
+ if (errorCode == 0) return;
+
+ const char* errorMsg = gpu_get_error_string(errorCode);
+ wchar_t* msg = TestHelper::ToWchar(errorMsg);
+ Assert::Fail(msg);
+}
+//-----------------------------------------------------------------------------
+wchar_t* TestHelper::ToWchar(const char* c)
+{
+ const size_t strLen = strlen(c) + 1;
+ wchar_t* wc = new wchar_t[strLen];
+ mbstowcs(wc, c, strLen);
+
+ return wc;
+}
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.h b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.h
new file mode 100644
index 0000000..2234388
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.h
@@ -0,0 +1,10 @@
+#pragma once
+//-----------------------------------------------------------------------------
+class TestHelper
+{
+public:
+ static void FailIfError(const int errorCode);
+
+private:
+ static wchar_t* ToWchar(const char* c);
+};
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj
new file mode 100644
index 0000000..7591514
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj
@@ -0,0 +1,108 @@
+
+
+
+
+ Debug
+ x64
+
+
+ Release
+ x64
+
+
+
+ 15.0
+ {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}
+ Win32Proj
+ gfoidlStochasticsGpuwin64Tests
+ 10.0.16299.0
+ NativeUnitTestProject
+
+
+
+ DynamicLibrary
+ true
+ v141
+ Unicode
+ false
+
+
+ DynamicLibrary
+ false
+ v141
+ true
+ Unicode
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+
+
+ true
+
+
+
+ Use
+ Level3
+ Disabled
+ ..\..\source\gfoidl.Stochastics.Gpu-linux-64;$(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories)
+ _DEBUG;%(PreprocessorDefinitions)
+ true
+
+
+ Windows
+ $(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories)
+
+
+
+
+ Level3
+ Use
+ MaxSpeed
+ true
+ true
+ ..\..\source\gfoidl.Stochastics.Gpu-linux-64;$(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories)
+ NDEBUG;%(PreprocessorDefinitions)
+ true
+
+
+ Windows
+ true
+ true
+ $(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories)
+
+
+
+
+
+
+
+
+
+ Create
+ Create
+
+
+
+
+
+
+ {60d3cec5-e8de-45e4-8834-86c9940d942a}
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj.filters b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj.filters
new file mode 100644
index 0000000..8836c10
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj.filters
@@ -0,0 +1,39 @@
+
+
+
+
+ {4FC737F1-C7A5-4376-A066-2A32D752A2FF}
+ cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx
+
+
+ {93995380-89BD-4b04-88EB-625FBE52EBFB}
+ h;hh;hpp;hxx;hm;inl;inc;ipp;xsd
+
+
+ {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms
+
+
+
+
+ Header Files
+
+
+ Header Files
+
+
+ Header Files
+
+
+
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
+
+
\ No newline at end of file
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gpu_core.cpp b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gpu_core.cpp
new file mode 100644
index 0000000..5ee0e22
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gpu_core.cpp
@@ -0,0 +1,67 @@
+#include "stdafx.h"
+#include "TestHelper.h"
+#include "gpu_core.h"
+//-----------------------------------------------------------------------------
+using namespace Microsoft::VisualStudio::CppUnitTestFramework;
+//-----------------------------------------------------------------------------
+namespace gfoidlStochasticsGpuwin64Tests
+{
+ TEST_CLASS(gpu_core_Tests)
+ {
+ private:
+ static double _sample[];
+
+ public:
+ TEST_METHOD(gpu_available___true)
+ {
+ bool actual = gpu_available();
+
+ Assert::IsTrue(actual);
+ }
+ //---------------------------------------------------------------------
+ TEST_METHOD(gpu_sample_calc_mean___OK)
+ {
+ const int N = 1000000;
+ double* sample = new double[N];
+ double expected = 0.0;
+
+ for (int i = 0; i < N; ++i)
+ {
+ double t = (double)rand() / RAND_MAX;
+ sample[i] = t;
+ expected += t;
+ }
+ expected /= N;
+
+ SampleStats sampleStats;
+ int errorCode = gpu_sample_calc_stats(sample, N, &sampleStats);
+
+ TestHelper::FailIfError(errorCode);
+
+ Assert::AreEqual(expected, sampleStats.Mean, 1e-3);
+
+ delete[] sample;
+ }
+ //---------------------------------------------------------------------
+ TEST_METHOD(gpu_sample_calc_stats___OK)
+ {
+ SampleStats sampleStats;
+ int errorCode = gpu_sample_calc_stats(_sample, 20, &sampleStats);
+
+ TestHelper::FailIfError(errorCode);
+
+ // Expected values calculated with gnuplot 5.0 patchlevel 1
+ double standardDeviation = sqrt(sampleStats.VarianceCore / 20);
+ double sampleStandardDeviation = sqrt(sampleStats.VarianceCore / (20 - 1));
+
+ Assert::AreEqual(51.9500 , sampleStats.Mean , 1e-3);
+ Assert::AreEqual(217.2718, standardDeviation , 1e-3);
+ Assert::AreEqual(222.9162, sampleStandardDeviation, 1e-3);
+ Assert::AreEqual(94.7050 , sampleStats.Delta , 1e-3);
+ Assert::AreEqual(4.1293 , sampleStats.Skewness , 1e-3);
+ Assert::AreEqual(18.0514 , sampleStats.Kurtosis , 1e-3);
+ }
+ };
+ //-------------------------------------------------------------------------
+ double gpu_core_Tests::_sample[] = {0, 3, 4, 1, 2, 3, 0, 2, 1, 3, 2, 0, 2, 2, 3, 2, 5, 2, 3, 999};
+}
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.cpp b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.cpp
new file mode 100644
index 0000000..b43eab4
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.cpp
@@ -0,0 +1,8 @@
+// stdafx.cpp : source file that includes just the standard includes
+// gfoidl.Stochastics.Gpu-win-64.Tests.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.h b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.h
new file mode 100644
index 0000000..43280fc
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.h
@@ -0,0 +1,13 @@
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+
+#include "targetver.h"
+
+// Headers for CppUnitTest
+#include "CppUnitTest.h"
+
+// TODO: reference additional headers your program requires here
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/targetver.h b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/targetver.h
new file mode 100644
index 0000000..87c0086
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/targetver.h
@@ -0,0 +1,8 @@
+#pragma once
+
+// Including SDKDDKVer.h defines the highest available Windows platform.
+
+// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
+// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
+
+#include
diff --git a/tests/gfoidl.Stochastics.Tests/Native/GpuTests/CalculateSampleStats.cs b/tests/gfoidl.Stochastics.Tests/Native/GpuTests/CalculateSampleStats.cs
new file mode 100644
index 0000000..3be1602
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Tests/Native/GpuTests/CalculateSampleStats.cs
@@ -0,0 +1,33 @@
+using gfoidl.Stochastics.Native;
+using gfoidl.Stochastics.Statistics;
+using NUnit.Framework;
+
+namespace gfoidl.Stochastics.Tests.Native.GpuTests
+{
+ [TestFixture, Explicit("GPU CUDA must be available")]
+ public class CalculateSampleStats
+ {
+ [Test]
+ public void Values_given___correct_stats()
+ {
+ Assume.That(Gpu.IsAvailable, "GPU is not available");
+
+ double[] values = { 0, 3, 4, 1, 2, 3, 0, 2, 1, 3, 2, 0, 2, 2, 3, 2, 5, 2, 3, 999 };
+
+ var sample = new Sample(values);
+
+ Gpu.CalculateSampleStats(sample);
+
+ Assert.Multiple(() =>
+ {
+ // Expected values calculated with gnuplot 5.0 patchlevel 1
+ Assert.AreEqual(51.9500 , sample.Mean , 1e-3, nameof(sample.Mean));
+ Assert.AreEqual(217.2718, sample.StandardDeviation , 1e-3, nameof(sample.StandardDeviation));
+ Assert.AreEqual(222.9162, sample.SampleStandardDeviation, 1e-3, nameof(sample.SampleStandardDeviation));
+ Assert.AreEqual(4.1293 , sample.Skewness , 1e-3, nameof(sample.Skewness));
+ Assert.AreEqual(18.0514 , sample.Kurtosis , 1e-3, nameof(sample.Kurtosis));
+ Assert.AreEqual(94.7050 , sample.Delta , 1e-3, nameof(sample.Delta));
+ });
+ }
+ }
+}
diff --git a/tests/gfoidl.Stochastics.Tests/Native/GpuTests/IsAvailable.cs b/tests/gfoidl.Stochastics.Tests/Native/GpuTests/IsAvailable.cs
new file mode 100644
index 0000000..3d74c34
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Tests/Native/GpuTests/IsAvailable.cs
@@ -0,0 +1,26 @@
+using System;
+using gfoidl.Stochastics.Native;
+using NUnit.Framework;
+
+namespace gfoidl.Stochastics.Tests.Native.GpuTests
+{
+ [TestFixture, Explicit("Mutual exclusive tests")]
+ public class IsAvailable
+ {
+ [Test]
+ public void Env_set_to_0___false()
+ {
+ Environment.SetEnvironmentVariable(Gpu.EnvVariableName, "0");
+
+ Assert.IsFalse(Gpu.IsAvailable);
+ }
+ //---------------------------------------------------------------------
+ [Test]
+ public void Env_set_to_force___true()
+ {
+ Environment.SetEnvironmentVariable(Gpu.EnvVariableName, "force");
+
+ Assert.IsTrue(Gpu.IsAvailable);
+ }
+ }
+}