diff --git a/.gitattributes b/.gitattributes index ba05560..0d9f7ef 100644 --- a/.gitattributes +++ b/.gitattributes @@ -24,6 +24,7 @@ Dockerfile text eol=lf *.f03 filter=tabs2spaces *.sql filter=tabs2spaces *.ino filter=tabs2spaces +*.cu filter=tabs2spaces # Project files *.csproj filter=tabs2spaces diff --git a/gfoidl.Stochastics.sln b/gfoidl.Stochastics.sln index f9609a9..93a5f9b 100644 --- a/gfoidl.Stochastics.sln +++ b/gfoidl.Stochastics.sln @@ -72,6 +72,14 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "images", "images", "{A741E7 doc\release-notes\images\perf-avg-var-v1.1.0-preview-3.png = doc\release-notes\images\perf-avg-var-v1.1.0-preview-3.png EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gfoidl.Stochastics.Gpu-win-64", "source\gfoidl.Stochastics.Gpu-win-64\gfoidl.Stochastics.Gpu-win-64.vcxproj", "{60D3CEC5-E8DE-45E4-8834-86C9940D942A}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gfoidl.Stochastics.Gpu-win-64.Tests", "tests\gfoidl.Stochastics.Gpu-win-64.Tests\gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj", "{90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gfoidl.Stochastics.Gpu-win-64.Tests.Console", "tests\gfoidl.Stochastics.Gpu-win-64.Tests.Console\gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj", "{1C7AA347-76CE-4C7F-A468-B46ADDD93215}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gfoidl.Stochastics.Gpu-linux-64", "source\gfoidl.Stochastics.Gpu-linux-64\gfoidl.Stochastics.Gpu-linux-64.vcxproj", "{0729F977-87F1-4139-A9B1-569BFB4A352E}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -83,27 +91,21 @@ Global {B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Debug|Any CPU.Build.0 = Debug|Any CPU {B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Debug|x64.ActiveCfg = Debug|Any CPU - {B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Debug|x64.Build.0 = Debug|Any CPU {B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Release|Any CPU.ActiveCfg = Release|Any CPU {B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Release|Any CPU.Build.0 = Release|Any CPU {B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Release|x64.ActiveCfg = Release|Any CPU - {B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Release|x64.Build.0 = Release|Any CPU {902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Debug|Any CPU.Build.0 = Debug|Any CPU {902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Debug|x64.ActiveCfg = Debug|Any CPU - {902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Debug|x64.Build.0 = Debug|Any CPU {902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Release|Any CPU.ActiveCfg = Release|Any CPU {902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Release|Any CPU.Build.0 = Release|Any CPU {902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Release|x64.ActiveCfg = Release|Any CPU - {902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Release|x64.Build.0 = Release|Any CPU {1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Debug|Any CPU.Build.0 = Debug|Any CPU {1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Debug|x64.ActiveCfg = Debug|Any CPU - {1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Debug|x64.Build.0 = Debug|Any CPU {1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Release|Any CPU.ActiveCfg = Release|Any CPU {1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Release|Any CPU.Build.0 = Release|Any CPU {1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Release|x64.ActiveCfg = Release|Any CPU - {1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Release|x64.Build.0 = Release|Any CPU {813695B2-09E2-4111-BD6B-18EEF40299C0}.Debug|Any CPU.ActiveCfg = Debug|x64 {813695B2-09E2-4111-BD6B-18EEF40299C0}.Debug|x64.ActiveCfg = Debug|x64 {813695B2-09E2-4111-BD6B-18EEF40299C0}.Release|Any CPU.ActiveCfg = Release|x64 @@ -120,6 +122,28 @@ Global {7A4AAA49-6071-4FEA-8920-D8EFC6B8A019}.Release|Any CPU.ActiveCfg = Release|x64 {7A4AAA49-6071-4FEA-8920-D8EFC6B8A019}.Release|x64.ActiveCfg = Release|x64 {7A4AAA49-6071-4FEA-8920-D8EFC6B8A019}.Release|x64.Build.0 = Release|x64 + {60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Debug|Any CPU.ActiveCfg = Debug|x64 + {60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Debug|x64.ActiveCfg = Debug|x64 + {60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Debug|x64.Build.0 = Debug|x64 + {60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Release|Any CPU.ActiveCfg = Release|x64 + {60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Release|x64.ActiveCfg = Release|x64 + {60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Release|x64.Build.0 = Release|x64 + {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Debug|Any CPU.ActiveCfg = Debug|x64 + {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Debug|x64.ActiveCfg = Debug|x64 + {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Debug|x64.Build.0 = Debug|x64 + {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Release|Any CPU.ActiveCfg = Release|x64 + {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Release|x64.ActiveCfg = Release|x64 + {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Release|x64.Build.0 = Release|x64 + {1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Debug|Any CPU.ActiveCfg = Debug|x64 + {1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Debug|x64.ActiveCfg = Debug|x64 + {1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Debug|x64.Build.0 = Debug|x64 + {1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Release|Any CPU.ActiveCfg = Release|x64 + {1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Release|x64.ActiveCfg = Release|x64 + {1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Release|x64.Build.0 = Release|x64 + {0729F977-87F1-4139-A9B1-569BFB4A352E}.Debug|Any CPU.ActiveCfg = Debug|x64 + {0729F977-87F1-4139-A9B1-569BFB4A352E}.Debug|x64.ActiveCfg = Debug|x64 + {0729F977-87F1-4139-A9B1-569BFB4A352E}.Release|Any CPU.ActiveCfg = Release|x64 + {0729F977-87F1-4139-A9B1-569BFB4A352E}.Release|x64.ActiveCfg = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -137,6 +161,10 @@ Global {7A4AAA49-6071-4FEA-8920-D8EFC6B8A019} = {EFBEAA6C-2E09-4A10-898C-D20897F3609F} {2EE5949B-0F90-49E5-B487-CC04C0940BC4} = {7917B2B4-78F0-4BDE-86FE-316549A84E4E} {A741E7D0-1788-4145-BA6C-2C4698CCECF1} = {2EE5949B-0F90-49E5-B487-CC04C0940BC4} + {60D3CEC5-E8DE-45E4-8834-86C9940D942A} = {0B4051AE-E1F2-4601-B0A8-B4CA660CC4B2} + {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3} = {EFBEAA6C-2E09-4A10-898C-D20897F3609F} + {1C7AA347-76CE-4C7F-A468-B46ADDD93215} = {EFBEAA6C-2E09-4A10-898C-D20897F3609F} + {0729F977-87F1-4139-A9B1-569BFB4A352E} = {0B4051AE-E1F2-4601-B0A8-B4CA660CC4B2} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {A594189A-069D-4FEE-8CD8-1A5786CF97AA} diff --git a/native-out/gfoidl-Stochastics-gpu.dll b/native-out/gfoidl-Stochastics-gpu.dll new file mode 100644 index 0000000..df5b6cf Binary files /dev/null and b/native-out/gfoidl-Stochastics-gpu.dll differ diff --git a/native-out/libgfoidl-Stochastics-gpu.so b/native-out/libgfoidl-Stochastics-gpu.so new file mode 100644 index 0000000..5394c54 Binary files /dev/null and b/native-out/libgfoidl-Stochastics-gpu.so differ diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/SampleStats.h b/source/gfoidl.Stochastics.Gpu-linux-64/SampleStats.h new file mode 100644 index 0000000..d43a23c --- /dev/null +++ b/source/gfoidl.Stochastics.Gpu-linux-64/SampleStats.h @@ -0,0 +1,12 @@ +#pragma once +//----------------------------------------------------------------------------- +struct SampleStats +{ + double Mean; + double Max; + double Min; + double Delta; + double VarianceCore; + double Skewness; + double Kurtosis; +}; diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/build.sh b/source/gfoidl.Stochastics.Gpu-linux-64/build.sh new file mode 100644 index 0000000..ed73bbe --- /dev/null +++ b/source/gfoidl.Stochastics.Gpu-linux-64/build.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -e + +libName=libgfoidl-Stochastics-gpu.so + +# https://docs.nvidia.com/cuda/pdf/CUDA_Compiler_Driver_NVCC.pdf +nvcc -std=c++14 \ + --disable-warnings \ + -Wno-deprecated-declarations \ + -o "$libName" \ + -O3 \ + --ptxas-options=-v \ + --machine 64 \ + -x cu \ + -cudart static \ + -shared -rdc=true \ + -Xcompiler -fPIC,-fvisibility=hidden \ + -gencode=arch=compute_60,code=sm_60 \ + -gencode=arch=compute_35,code=sm_35 \ + kernel_utils.cu kernel.cu gpu_core.cu + +nm -DC "$libName" | grep " T " diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/dll.h b/source/gfoidl.Stochastics.Gpu-linux-64/dll.h new file mode 100644 index 0000000..fc8d4b3 --- /dev/null +++ b/source/gfoidl.Stochastics.Gpu-linux-64/dll.h @@ -0,0 +1,23 @@ +#pragma once +//----------------------------------------------------------------------------- +#if defined _WIN32 || defined __CYGWIN__ + #ifdef GFOIDL_STOCHASTICS_GPU_EXPORTS + #define GPU_API __declspec(dllexport) + #else + #define GPU_API __declspec(dllimport) + #endif +#else + #if __GNUC__ >= 4 + #define GPU_API __attribute__ ((visibility ("default"))) + #else + #define GPU_API + #endif +#endif +//----------------------------------------------------------------------------- +#ifdef __cplusplus + #define BEGIN_EXTERN_C extern "C" { + #define END_EXTERN_C } +#else + #define BEGIN_EXTERN_C + #define END_EXTERN_C +#endif diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/gfoidl.Stochastics.Gpu-linux-64.vcxproj b/source/gfoidl.Stochastics.Gpu-linux-64/gfoidl.Stochastics.Gpu-linux-64.vcxproj new file mode 100644 index 0000000..af8f40a --- /dev/null +++ b/source/gfoidl.Stochastics.Gpu-linux-64/gfoidl.Stochastics.Gpu-linux-64.vcxproj @@ -0,0 +1,51 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {0729f977-87f1-4139-a9b1-569bfb4a352e} + Linux + gfoidl_Stochastics_Gpu_linux_64 + 15.0 + Linux + 1.0 + Generic + {2238F9CD-F817-4ECC-BD14-2524D2669B35} + + + + true + + + false + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.cu b/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.cu new file mode 100644 index 0000000..30f5c2b --- /dev/null +++ b/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.cu @@ -0,0 +1,90 @@ +//#define SINGLE_THREAD +//----------------------------------------------------------------------------- +#include "gpu_core.h" +#include +#include "kernel.h" + +#if defined(DEBUG) || defined(_DEBUG) + #include + #include +#endif +//----------------------------------------------------------------------------- +// Forward declarations +inline cudaError_t checkCuda(cudaError_t result); +//----------------------------------------------------------------------------- +bool gpu_available() +{ + int deviceCount; + cudaError_t errorId = cudaGetDeviceCount(&deviceCount); + + return errorId == cudaSuccess + && deviceCount > 0; +} +//----------------------------------------------------------------------------- +const char* gpu_get_error_string(const int errorCode) +{ + return cudaGetErrorString(static_cast(errorCode)); +} +//----------------------------------------------------------------------------- +int gpu_sample_calc_stats(double* sample, const int sampleSize, SampleStats* sampleStats) +{ + double* deviceSample; + SampleStats* deviceSampleStats; + + try + { + checkCuda(cudaMalloc(&deviceSample, sizeof(double) * sampleSize)); + checkCuda(cudaMalloc(&deviceSampleStats, sizeof(SampleStats))); + + checkCuda(cudaMemcpy(deviceSample, sample, sizeof(double) * sampleSize, cudaMemcpyHostToDevice)); + checkCuda(cudaMemset(deviceSampleStats, 0, sizeof(SampleStats))); + +#ifndef SINGLE_THREAD + const int blockSize = 256; + int numBlocks = (sampleSize + blockSize - 1) / blockSize; +#else + const int blockSize = 1; + const int numBlocks = 1; +#endif + +#if defined(DEBUG) || defined(_DEBUG) + printf("blockSize: %d\nnumBlocks: %d\n", blockSize, numBlocks); +#endif + + // For final fixup of values a separate kernel is queued to the device. + // Otherwise there's no way of syncing all threads in the grid. + Kernel::CalculateAverageAndVarianceCore<<>>(deviceSample, sampleSize, deviceSampleStats); + Kernel::CalculateAverageAndVarianceCoreFinal<<<1, 1>>>(deviceSampleStats, sampleSize); + + Kernel::CalculateDeltaSkewnessKurtosis<<>>(deviceSample, sampleSize, deviceSampleStats); + + //checkCuda(cudaDeviceSynchronize()); // not necessary + checkCuda(cudaMemcpy(sampleStats, deviceSampleStats, sizeof(SampleStats), cudaMemcpyDeviceToHost)); + + checkCuda(cudaFree(deviceSample)); + checkCuda(cudaFree(deviceSampleStats)); + + // Final fixup of values, not done yet + // No need to launch a kernel for a simple division. + sampleStats->Delta /= sampleSize; + + double sigma = sqrt(sampleStats->VarianceCore / sampleSize); + double t = sampleSize * sigma * sigma * sigma; + sampleStats->Skewness /= t; + sampleStats->Kurtosis /= t * sigma; + } + catch (const int e) + { + return e; + } + + return 0; +} +//----------------------------------------------------------------------------- +cudaError_t checkCuda(cudaError_t result) +{ + if (result != cudaSuccess) + throw static_cast(result); + + return result; +} diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.h b/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.h new file mode 100644 index 0000000..9e887b4 --- /dev/null +++ b/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.h @@ -0,0 +1,13 @@ +#pragma once +//----------------------------------------------------------------------------- +#include "dll.h" +#include "SampleStats.h" +//----------------------------------------------------------------------------- +BEGIN_EXTERN_C + +GPU_API bool gpu_available(); +GPU_API const char* gpu_get_error_string(const int errorCode); + +GPU_API int gpu_sample_calc_stats(double* sample, const int sampleSize, SampleStats* sampleStats); + +END_EXTERN_C diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/kernel.cu b/source/gfoidl.Stochastics.Gpu-linux-64/kernel.cu new file mode 100644 index 0000000..d3240e9 --- /dev/null +++ b/source/gfoidl.Stochastics.Gpu-linux-64/kernel.cu @@ -0,0 +1,88 @@ +#include "kernel.h" +#include "kernel_utils.h" +//----------------------------------------------------------------------------- +namespace Kernel +{ + __global__ + void CalculateAverageAndVarianceCore(const double* sample, const int n, SampleStats* sampleStats) + { + const int index = blockDim.x * blockIdx.x + threadIdx.x; + const int stride = gridDim.x * blockDim.x; + + double avg = 0; + double variance = 0; + + for (int i = index; i < n; i += stride) + { + avg += sample[i]; + variance += sample[i] * sample[i]; + } + + Utils::TwoDoubles twoDoubles {avg, variance}; + twoDoubles = Utils::BlockReduceSum(twoDoubles); + + // Final sum in first thread of each block + if (threadIdx.x == 0) + { +#if __CUDA_ARCH__ < 600 + Utils::atomicAdd(&sampleStats->Mean , twoDoubles.A); + Utils::atomicAdd(&sampleStats->VarianceCore, twoDoubles.B); +#else + atomicAdd(&sampleStats->Mean , twoDoubles.A); + atomicAdd(&sampleStats->VarianceCore, twoDoubles.B); +#endif + } + } + //----------------------------------------------------------------------------- + __global__ + void CalculateAverageAndVarianceCoreFinal(SampleStats* sampleStats, const int n) + { + const int index = blockDim.x * blockIdx.x + threadIdx.x; + + if (index == 0) + { + double avg = sampleStats->Mean / n; + sampleStats->Mean = avg; + sampleStats->VarianceCore -= n * avg*avg; + } + } + //----------------------------------------------------------------------------- + __global__ + void CalculateDeltaSkewnessKurtosis(const double* sample, const int n, SampleStats* sampleStats) + { + const int index = blockDim.x * blockIdx.x + threadIdx.x; + const int stride = gridDim.x * blockDim.x; + + double avg = sampleStats->Mean; + double delta = 0; + double skewness = 0; + double kurtosis = 0; + + for (int i = index; i < n; i += stride) + { + double t = sample[i] - avg; + double t1 = t * t*t; + + delta += abs(t); + skewness += t1; + kurtosis += t1 * t; + } + + Utils::ThreeDoubles threeDoubles {delta, skewness,kurtosis}; + threeDoubles = Utils::BlockReduceSum(threeDoubles); + + // Final sum in first thread of each block + if (threadIdx.x == 0) + { +#if __CUDA_ARCH__ < 600 + Utils::atomicAdd(&sampleStats->Delta , threeDoubles.A); + Utils::atomicAdd(&sampleStats->Skewness, threeDoubles.B); + Utils::atomicAdd(&sampleStats->Kurtosis, threeDoubles.C); +#else + atomicAdd(&sampleStats->Delta , threeDoubles.A); + atomicAdd(&sampleStats->Skewness, threeDoubles.B); + atomicAdd(&sampleStats->Kurtosis, threeDoubles.C); +#endif + } + } +} diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/kernel.h b/source/gfoidl.Stochastics.Gpu-linux-64/kernel.h new file mode 100644 index 0000000..64fd921 --- /dev/null +++ b/source/gfoidl.Stochastics.Gpu-linux-64/kernel.h @@ -0,0 +1,13 @@ +#pragma once +//----------------------------------------------------------------------------- +#include +#include +#include "SampleStats.h" +//----------------------------------------------------------------------------- +namespace Kernel +{ + __global__ void CalculateAverageAndVarianceCore(const double* sample, const int n, SampleStats* sampleStats); + __global__ void CalculateAverageAndVarianceCoreFinal(SampleStats* sampleStats, const int n); + + __global__ void CalculateDeltaSkewnessKurtosis(const double* sample, const int n, SampleStats* sampleStats); +} diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.cu b/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.cu new file mode 100644 index 0000000..f3fffae --- /dev/null +++ b/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.cu @@ -0,0 +1,193 @@ +#include "kernel_utils.h" +#include + +#ifdef _DEBUG + #include +#endif +//----------------------------------------------------------------------------- +// Code for reduction taken from https://devblogs.nvidia.com/faster-parallel-reductions-kepler/ +//----------------------------------------------------------------------------- +namespace Kernel +{ + namespace Utils + { + using uint = unsigned int; + const uint FULL_MASK = 0xffffffff; + //--------------------------------------------------------------------- +#if __CUDA_ARCH__ < 600 + __device__ + double atomicAdd(double* address, double val) + { + using ulli = unsigned long long int; + + ulli* tmp = reinterpret_cast(address); + ulli old = *tmp; + ulli assumed; + + do + { + assumed = old; + + old = atomicCAS(tmp, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); + } while (assumed != old); + + return __longlong_as_double(old); + } +#endif + //--------------------------------------------------------------------- + __device__ + double WarpReduceSum(double value) + { + for (int offset = warpSize / 2; offset > 0; offset /= 2) + value += __shfl_down_sync(FULL_MASK, value, offset); + + return value; + } + //--------------------------------------------------------------------- + __device__ + TwoDoubles WarpReduceSum(TwoDoubles twoDoubles) + { + for (int offset = warpSize / 2; offset > 0; offset /= 2) + { + twoDoubles.A += __shfl_down_sync(FULL_MASK, twoDoubles.A, offset); + twoDoubles.B += __shfl_down_sync(FULL_MASK, twoDoubles.B, offset); + } + + return twoDoubles; + } + //--------------------------------------------------------------------- + __device__ + ThreeDoubles WarpReduceSum(ThreeDoubles threeDoubles) + { + for (int offset = warpSize / 2; offset > 0; offset /= 2) + { + threeDoubles.A += __shfl_down_sync(FULL_MASK, threeDoubles.A, offset); + threeDoubles.B += __shfl_down_sync(FULL_MASK, threeDoubles.B, offset); + threeDoubles.C += __shfl_down_sync(FULL_MASK, threeDoubles.C, offset); + } + + return threeDoubles; + } + //--------------------------------------------------------------------- + __device__ + double BlockReduceSum(double value) + { +#ifdef _DEBUG + assert(warpSize == 32); +#endif + static __shared__ double shared[32]; + const int lane = threadIdx.x & (warpSize - 1); // threadIdx.x % warpSize + const int warpId = threadIdx.x / warpSize; + + value = WarpReduceSum(value); + + if (lane == 0) + shared[warpId] = value; + + __syncthreads(); + + // Read from shared memory only if that warp existed + bool warpExisted = (threadIdx.x < blockDim.x / warpSize) || (threadIdx.x == 0 && blockDim.x == 1); + value = warpExisted ? shared[lane] : 0; + + // Final reduce within first warp + if (warpId == 0) + value = WarpReduceSum(value); + + return value; + } + //--------------------------------------------------------------------- + __device__ + TwoDoubles BlockReduceSum(TwoDoubles twoDoubles) + { +#ifdef _DEBUG + assert(warpSize == 32); +#endif + static __shared__ TwoDoubles shared[32]; + const int lane = threadIdx.x & (warpSize - 1); // threadIdx.x % warpSize + const int warpId = threadIdx.x / warpSize; + + twoDoubles = WarpReduceSum(twoDoubles); + + if (lane == 0) + shared[warpId] = twoDoubles; + + __syncthreads(); + + // Read from shared memory only if that warp existed + bool warpExisted = (threadIdx.x < blockDim.x / warpSize) || (threadIdx.x == 0 && blockDim.x == 1); + twoDoubles = warpExisted ? shared[lane] : TwoDoubles {0,0}; + + // Final reduce within first warp + if (warpId == 0) + twoDoubles = WarpReduceSum(twoDoubles); + + return twoDoubles; + } + //--------------------------------------------------------------------- + __device__ + ThreeDoubles BlockReduceSum(ThreeDoubles threeDoubles) + { +#ifdef _DEBUG + assert(warpSize == 32); +#endif + static __shared__ ThreeDoubles shared[32]; + const int lane = threadIdx.x & (warpSize - 1); // threadIdx.x % warpSize + const int warpId = threadIdx.x / warpSize; + + threeDoubles = WarpReduceSum(threeDoubles); + + if (lane == 0) + shared[warpId] = threeDoubles; + + __syncthreads(); + + // Read from shared memory only if that warp existed + bool warpExisted = (threadIdx.x < blockDim.x / warpSize) || (threadIdx.x == 0 && blockDim.x == 1); + threeDoubles = warpExisted ? shared[lane] : ThreeDoubles {0,0,0}; + + // Final reduce within first warp + if (warpId == 0) + threeDoubles = WarpReduceSum(threeDoubles); + + return threeDoubles; + } + //--------------------------------------------------------------------- + __device__ + void ReduceSum(double value, double* result) + { + value = BlockReduceSum(value); + + // Final sum in first thread of each block + if (threadIdx.x == 0) + atomicAdd(result, value); + } + //--------------------------------------------------------------------- + __device__ + void ReduceSum(TwoDoubles twoDoubles, TwoDoubles* result) + { + twoDoubles = BlockReduceSum(twoDoubles); + + // Final sum in first thread of each block + if (threadIdx.x == 0) + { + atomicAdd(&result->A, twoDoubles.A); + atomicAdd(&result->B, twoDoubles.B); + } + } + //--------------------------------------------------------------------- + __device__ + void ReduceSum(const double* in, const int n, double* result) + { + double blockSum = 0; + + const int index = blockDim.x * blockIdx.x + threadIdx.x; + const int stride = gridDim.x * blockDim.x; + + for (int i = index; i < n; i += stride) + blockSum += in[i]; + + ReduceSum(blockSum, result); + } + } +} diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.h b/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.h new file mode 100644 index 0000000..1cbf5fc --- /dev/null +++ b/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.h @@ -0,0 +1,53 @@ +#pragma once +//----------------------------------------------------------------------------- +#include +//----------------------------------------------------------------------------- +namespace Kernel +{ + namespace Utils + { + struct TwoDoubles + { + double A; + double B; + + __device__ TwoDoubles() {} + __device__ TwoDoubles(const double a, const double b) + { + this->A = a; + this->B = b; + } + }; + //--------------------------------------------------------------------- + struct ThreeDoubles + { + double A; + double B; + double C; + + __device__ ThreeDoubles() {} + __device__ ThreeDoubles(const double a, const double b, const double c) + { + this->A = a; + this->B = b; + this->C = c; + } + }; + //--------------------------------------------------------------------- + __device__ double WarpReduceSum(double value); + __device__ TwoDoubles WarpReduceSum(TwoDoubles twoDoubles); + __device__ ThreeDoubles WarpReduceSum(ThreeDoubles threeDoubles); + + __device__ double BlockReduceSum(double value); + __device__ TwoDoubles BlockReduceSum(TwoDoubles twoDoubles); + __device__ ThreeDoubles BlockReduceSum(ThreeDoubles threeDoubles); + + __device__ void ReduceSum(double value, double* result); + __device__ void ReduceSum(TwoDoubles twoDoubles, TwoDoubles* result); + __device__ void ReduceSum(const double* in, const int n, double* result); + +#if __CUDA_ARCH__ < 600 + __device__ double atomicAdd(double* address, double val); +#endif + } +} diff --git a/source/gfoidl.Stochastics.Gpu-win-64/cpp.hint b/source/gfoidl.Stochastics.Gpu-win-64/cpp.hint new file mode 100644 index 0000000..f02e3a1 --- /dev/null +++ b/source/gfoidl.Stochastics.Gpu-win-64/cpp.hint @@ -0,0 +1,5 @@ +// Hint files help the Visual Studio IDE interpret Visual C++ identifiers +// such as names of functions and macros. +// For more information see https://go.microsoft.com/fwlink/?linkid=865984 +#define GPU_API __declspec(dllexport) +#define GPU_API __declspec(dllimport) diff --git a/source/gfoidl.Stochastics.Gpu-win-64/dummy.cpp b/source/gfoidl.Stochastics.Gpu-win-64/dummy.cpp new file mode 100644 index 0000000..21c8c45 --- /dev/null +++ b/source/gfoidl.Stochastics.Gpu-win-64/dummy.cpp @@ -0,0 +1 @@ +// dummy.cpp needed for VS to show C++ compiler properties diff --git a/source/gfoidl.Stochastics.Gpu-win-64/gfoidl.Stochastics.Gpu-win-64.vcxproj b/source/gfoidl.Stochastics.Gpu-win-64/gfoidl.Stochastics.Gpu-win-64.vcxproj new file mode 100644 index 0000000..e425d55 --- /dev/null +++ b/source/gfoidl.Stochastics.Gpu-win-64/gfoidl.Stochastics.Gpu-win-64.vcxproj @@ -0,0 +1,114 @@ + + + + + Debug + x64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + {60D3CEC5-E8DE-45E4-8834-86C9940D942A} + gfoidl_Stochastics_Gpu_win_64 + 10.0.16299.0 + + + + DynamicLibrary + true + MultiByte + v141 + + + DynamicLibrary + false + true + MultiByte + v141 + + + + + + + + + + + + + + true + gfoidl-Stochastics-gpu + + + gfoidl-Stochastics-gpu + + + + Level3 + Disabled + GFOIDL_STOCHASTICS_GPU_EXPORTS;WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\gfoidl.Stochastics.Gpu-linux-64;%(AdditionalIncludeDirectories) + + + true + Console + cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + 64 + true + true + compute_60,sm_60;%(CodeGeneration) + + + + + Level3 + MaxSpeed + true + true + GFOIDL_STOCHASTICS_GPU_EXPORTS;WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + ..\gfoidl.Stochastics.Gpu-linux-64;%(AdditionalIncludeDirectories) + + + true + true + true + Console + cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) + + + 64 + true + true + compute_60,sm_60;%(CodeGeneration) + + + + + + + \ No newline at end of file diff --git a/source/gfoidl.Stochastics.Native-win-x64/dll.h b/source/gfoidl.Stochastics.Native-win-x64/dll.h index 6ede07c..63f4194 100644 --- a/source/gfoidl.Stochastics.Native-win-x64/dll.h +++ b/source/gfoidl.Stochastics.Native-win-x64/dll.h @@ -1,12 +1,6 @@ #pragma once //----------------------------------------------------------------------------- -// The following ifdef block is the standard way of creating macros which make exporting -// from a DLL simpler. All files within this DLL are compiled with the GFOIDLSTOCHASTICSNATIVE_EXPORTS -// symbol defined on the command line. This symbol should not be defined on any project -// that uses this DLL. This way any other project whose source files include this file see -// DLL_API functions as being imported from a DLL, whereas this DLL sees symbols -// defined with this macro as being exported. -#ifdef GFOIDLSTOCHASTICSNATIVE_EXPORTS +#ifdef GFOIDL_STOCHASTICS_NATIVE_EXPORTS #define DLL_API __declspec(dllexport) #else #define DLL_API __declspec(dllimport) diff --git a/source/gfoidl.Stochastics.Native-win-x64/gfoidl.Stochastics.Native-win-x64.vcxproj b/source/gfoidl.Stochastics.Native-win-x64/gfoidl.Stochastics.Native-win-x64.vcxproj index 5791532..bf5f842 100644 --- a/source/gfoidl.Stochastics.Native-win-x64/gfoidl.Stochastics.Native-win-x64.vcxproj +++ b/source/gfoidl.Stochastics.Native-win-x64/gfoidl.Stochastics.Native-win-x64.vcxproj @@ -57,7 +57,7 @@ Use Level3 Disabled - _DEBUG;GFOIDLSTOCHASTICSNATIVE_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) + _DEBUG;GFOIDL_STOCHASTICS_NATIVE_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) true @@ -72,7 +72,7 @@ MaxSpeed true true - NDEBUG;GFOIDLSTOCHASTICSNATIVE_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) + NDEBUG;GFOIDL_STOCHASTICS_NATIVE_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) true diff --git a/source/gfoidl.Stochastics/Enumerators/ArrayEnumerator.cs b/source/gfoidl.Stochastics/Enumerators/ArrayEnumerator.cs index f495a69..7bf7147 100644 --- a/source/gfoidl.Stochastics/Enumerators/ArrayEnumerator.cs +++ b/source/gfoidl.Stochastics/Enumerators/ArrayEnumerator.cs @@ -21,6 +21,8 @@ public ArrayEnumerable(T[] array) //--------------------------------------------------------------------- public static implicit operator ArrayEnumerable(T[] array) => new ArrayEnumerable(array); public static implicit operator T[] (ArrayEnumerable enumerable) => enumerable._array; + //--------------------------------------------------------------------- + public ref T GetPinnableReference() => ref _array[0]; } //--------------------------------------------------------------------- public struct ArrayEnumerator : IEnumerator diff --git a/source/gfoidl.Stochastics/Native/Gpu.cs b/source/gfoidl.Stochastics/Native/Gpu.cs new file mode 100644 index 0000000..5394a59 --- /dev/null +++ b/source/gfoidl.Stochastics/Native/Gpu.cs @@ -0,0 +1,114 @@ +using System; +using System.Runtime.InteropServices; +using gfoidl.Stochastics.Statistics; + +namespace gfoidl.Stochastics.Native +{ + internal static class Gpu + { + public const string EnvVariableName = "GFOIDL_STOCHASTICS_USE_GPU"; + //--------------------------------------------------------------------- + private static readonly bool s_isAvailable; + private static readonly bool s_isUseOfGpuForced; + //--------------------------------------------------------------------- + static Gpu() + { + string env = Environment.GetEnvironmentVariable(EnvVariableName); + + s_isAvailable = RuntimeInformation.OSArchitecture == Architecture.X64 + && RuntimeHelper.IsRunningOnDotNetCore() + && GpuMethods.gpu_available() + && env != "0"; + + s_isUseOfGpuForced = string.Equals(env, "force", StringComparison.OrdinalIgnoreCase); + } + //--------------------------------------------------------------------- + public static bool IsAvailable => s_isAvailable; + public static bool IsUseOfGpuForced => s_isUseOfGpuForced; + //--------------------------------------------------------------------- + public static unsafe void CalculateSampleStats(Sample sample) + { + if (!s_isAvailable) + throw new InvalidOperationException(Strings.Gpu_not_available); + + fixed (double* ptr = sample.Values) + { + SampleStats sampleStats = default; + int errorCode = GpuMethods.gpu_sample_calc_stats(ptr, sample.Count, &sampleStats); + + if (errorCode != 0) ThrowGpuException(errorCode); + + sample.Mean = sampleStats.Mean; + sample.Max = sampleStats.Max; + sample.Min = sampleStats.Min; + sample.Delta = sampleStats.Delta; + sample.VarianceCore = sampleStats.VarianceCore; + sample.Skewness = sampleStats.Skewness; + sample.Kurtosis = sampleStats.Kurtosis; + } + } + //--------------------------------------------------------------------- + private static void ThrowGpuException(int errorCode) + { + IntPtr ptr = GpuMethods.gpu_get_error_string(errorCode); + string msg = $"CUDA runtime error: {Marshal.PtrToStringAnsi(ptr)}"; + throw new GpuException(msg); + } + //--------------------------------------------------------------------- + private static class GpuMethods + { + private const string LibName = "gfoidl-Stochastics-gpu"; + //--------------------------------------------------------------------- + [DllImport(LibName)] + public static extern bool gpu_available(); + //--------------------------------------------------------------------- + [DllImport(LibName)] + public static extern IntPtr gpu_get_error_string(int errorCode); + //--------------------------------------------------------------------- + [DllImport(LibName)] + public static extern unsafe int gpu_sample_calc_stats(double* sample, int sampleSize, SampleStats* sampleStats); + } + //--------------------------------------------------------------------- + [StructLayout(LayoutKind.Sequential)] + private struct SampleStats + { + public double Mean; + public double Max; + public double Min; + public double Delta; + public double VarianceCore; + public double Skewness; + public double Kurtosis; + } + } + //------------------------------------------------------------------------- + /// + /// Represents an error that is caused by the GPU. + /// + public class GpuException : Exception + { + /// + /// Initializes a new instance of the class. + /// + public GpuException() : base() { } + + /// + /// Initializes a new instance of the class + /// with a specified error message. + /// + /// The message that describes the error. + public GpuException(string message) : base(message) { } + + /// + /// Initializes a new instance of the + /// class with a specified error message and a reference to the inner + /// exception that is the cause of this exception. + /// + /// The error message that explains the reason for the exception. + /// + /// The exception that is the cause of the current exception, or a null reference (Nothing in Visual Basic) + /// if no inner exception is specified. + /// + public GpuException(string message, Exception innerException) : base(message, innerException) { } + } +} diff --git a/source/gfoidl.Stochastics/RuntimeHelper.cs b/source/gfoidl.Stochastics/RuntimeHelper.cs index 1e6d1b9..c651fd7 100644 --- a/source/gfoidl.Stochastics/RuntimeHelper.cs +++ b/source/gfoidl.Stochastics/RuntimeHelper.cs @@ -12,7 +12,7 @@ public static bool IsRunningOnDotNetCore() if (!_isRunningOnDotNetCore.HasValue) { string frameworkName = Assembly.GetEntryAssembly()?.GetCustomAttribute()?.FrameworkName; - _isRunningOnDotNetCore = frameworkName.Contains("NETCoreApp"); + _isRunningOnDotNetCore = frameworkName?.Contains("NETCoreApp") ?? false; } return _isRunningOnDotNetCore.Value; diff --git a/source/gfoidl.Stochastics/SpecialFunctions.cs b/source/gfoidl.Stochastics/SpecialFunctions.cs index fd83902..dbff91f 100644 --- a/source/gfoidl.Stochastics/SpecialFunctions.cs +++ b/source/gfoidl.Stochastics/SpecialFunctions.cs @@ -10,19 +10,19 @@ namespace gfoidl.Stochastics /// public static class SpecialFunctions { - private static readonly bool _isDotNetCore; - private static readonly bool _erfNative; - private static readonly bool _erfNativeLinux; + private static readonly bool s_isDotNetCore; + private static readonly bool s_erfNative; + private static readonly bool s_erfNativeLinux; //--------------------------------------------------------------------- static SpecialFunctions() { - _isDotNetCore = RuntimeHelper.IsRunningOnDotNetCore(); + s_isDotNetCore = RuntimeHelper.IsRunningOnDotNetCore(); - _erfNative = + s_erfNative = RuntimeInformation.OSArchitecture == Architecture.X64 && (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux)); - _erfNativeLinux = _erfNative && RuntimeInformation.IsOSPlatform(OSPlatform.Linux); + s_erfNativeLinux = s_erfNative && RuntimeInformation.IsOSPlatform(OSPlatform.Linux); } //--------------------------------------------------------------------- /// @@ -34,7 +34,7 @@ static SpecialFunctions() // https://math.stackexchange.com/questions/263216/error-function-erf-with-better-precision/1889960#1889960 public static double Erf(double x) { - if (_isDotNetCore && _erfNativeLinux) + if (s_isDotNetCore && s_erfNativeLinux) return NativeMethods.gaussian_error_function(x); /* @@ -197,7 +197,7 @@ public static double Erf(double x) /// public static double Erfc(double x) { - if (_isDotNetCore && _erfNativeLinux) + if (s_isDotNetCore && s_erfNativeLinux) return NativeMethods.gaussian_error_function_complementary(x); /* @@ -412,7 +412,7 @@ internal static unsafe void Erf(double* values, double* result, int size) // Is a JIT compile-time constant, due the cctor. Note only a static readonly field may not be // sufficient (on the first access). See https://github.com/dotnet/coreclr/issues/1193 // So the not taken branch(es) will be removed. - if (_isDotNetCore && _erfNative) + if (s_isDotNetCore && s_erfNative) NativeMethods.gaussian_error_function_vector(values, result, size); else { @@ -423,7 +423,7 @@ internal static unsafe void Erf(double* values, double* result, int size) //--------------------------------------------------------------------- internal static unsafe void Erfc(double* values, double* result, int size) { - if (_isDotNetCore && _erfNative) + if (s_isDotNetCore && s_erfNative) NativeMethods.gaussian_error_function_complementary_vector(values, result, size); else { diff --git a/source/gfoidl.Stochastics/Statistics/Sample.Calculations.cs b/source/gfoidl.Stochastics/Statistics/Sample.Calculations.cs index 546a37b..2ca98d5 100644 --- a/source/gfoidl.Stochastics/Statistics/Sample.Calculations.cs +++ b/source/gfoidl.Stochastics/Statistics/Sample.Calculations.cs @@ -1,5 +1,6 @@ using System; using System.Threading.Tasks; +using gfoidl.Stochastics.Native; namespace gfoidl.Stochastics.Statistics { @@ -8,11 +9,36 @@ partial class Sample private static ParallelOptions GetParallelOptions() => new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount }; //--------------------------------------------------------------------- + /// + /// Calculates the statistics for . + /// + /// + /// The statistical properties of are lazy-evaluated. + /// With this method these properties are instantly evalualted / calculated. + /// + public void CalculateStats() + { + // is threadsafe, because from shared state is just read + Task medianTask = Task.Run(() => this.CalculateMedian()); + + if (Gpu.IsAvailable && (Gpu.IsUseOfGpuForced || this.Count > SampleThresholds.ThresholdForGpu)) + Gpu.CalculateSampleStats(this); + else + { + this.CalculateAverageAndVarianceCore(); + this.GetMinMax(); + this.CalculateDelta(); + this.CalculateSkewnessAndKurtosis(); + } + + medianTask.GetAwaiter().GetResult(); + } + //--------------------------------------------------------------------- private double CalculateMedian() { int n = this.SortedValues.Count; - if (n % 2 == 0) + if ((n & (2 - 1)) == 0) // n % 2 == 0 return (_sortedValues[(n >> 1) - 1] + _sortedValues[n >> 1]) * 0.5; else // this is correct, but n is an int, so the next line is @@ -25,7 +51,7 @@ private double CalculateMedian() private double CalculateSampleVariance() => this.VarianceCore / (this.Count - 1d); //--------------------------------------------------------------------- private double _varianceCore = double.NaN; - private double VarianceCore + internal double VarianceCore { get { @@ -34,6 +60,7 @@ private double VarianceCore return _varianceCore; } + set => _varianceCore = value; } } } diff --git a/source/gfoidl.Stochastics/Statistics/Sample.cs b/source/gfoidl.Stochastics/Statistics/Sample.cs index 766f3f7..c26b3cf 100644 --- a/source/gfoidl.Stochastics/Statistics/Sample.cs +++ b/source/gfoidl.Stochastics/Statistics/Sample.cs @@ -160,6 +160,7 @@ public double Delta return _delta; } + internal set => _delta = value; } //--------------------------------------------------------------------- /// @@ -261,6 +262,7 @@ public double Skewness return _skewness; } + internal set => _skewness = value; } //--------------------------------------------------------------------- private double _kurtosis = double.NaN; @@ -282,6 +284,7 @@ public double Kurtosis return _kurtosis; } + internal set => _kurtosis = value; } //--------------------------------------------------------------------- /// diff --git a/source/gfoidl.Stochastics/Statistics/SampleThresholds.cs b/source/gfoidl.Stochastics/Statistics/SampleThresholds.cs index 45ba420..1d10c21 100644 --- a/source/gfoidl.Stochastics/Statistics/SampleThresholds.cs +++ b/source/gfoidl.Stochastics/Statistics/SampleThresholds.cs @@ -23,5 +23,10 @@ public static class SampleThresholds /// and . /// public static int ThresholdForMinMax { get; set; } = 1_750_000; + + /// + /// Threshold for cpu vs. gpu execution for . + /// + public static int ThresholdForGpu { get; set; } = 5_000_000; } } diff --git a/source/gfoidl.Stochastics/Strings.Designer.cs b/source/gfoidl.Stochastics/Strings.Designer.cs index b5b90f6..56d7968 100644 --- a/source/gfoidl.Stochastics/Strings.Designer.cs +++ b/source/gfoidl.Stochastics/Strings.Designer.cs @@ -60,6 +60,15 @@ internal Strings() { } } + /// + /// Looks up a localized string similar to No GPU for CUDA is available, or GFOIDL_STOCHASTICS_USE_GPU is set to "0" (in order to disable usage of the GPU). To use the GPU the process must be x64 and running on .NET Core.. + /// + internal static string Gpu_not_available { + get { + return ResourceManager.GetString("Gpu_not_available", resourceCulture); + } + } + /// /// Looks up a localized string similar to The value must be greater than 0.. /// diff --git a/source/gfoidl.Stochastics/Strings.resx b/source/gfoidl.Stochastics/Strings.resx index a1af411..c9a0fcb 100644 --- a/source/gfoidl.Stochastics/Strings.resx +++ b/source/gfoidl.Stochastics/Strings.resx @@ -117,6 +117,9 @@ System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + No GPU for CUDA is available, or GFOIDL_STOCHASTICS_USE_GPU is set to "0" (in order to disable usage of the GPU). To use the GPU the process must be x64 and running on .NET Core. + The value must be greater than 0. diff --git a/source/gfoidl.Stochastics/gfoidl.Stochastics.csproj b/source/gfoidl.Stochastics/gfoidl.Stochastics.csproj index 99fb2f5..9442c60 100644 --- a/source/gfoidl.Stochastics/gfoidl.Stochastics.csproj +++ b/source/gfoidl.Stochastics/gfoidl.Stochastics.csproj @@ -32,11 +32,16 @@ - + + + + + + diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj new file mode 100644 index 0000000..692f45d --- /dev/null +++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj @@ -0,0 +1,93 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {1C7AA347-76CE-4C7F-A468-B46ADDD93215} + Win32Proj + gfoidlStochasticsGpuwin64TestsConsole + 10.0.16299.0 + + + + Application + true + v141 + Unicode + + + Application + false + v141 + true + Unicode + + + + + + + + + + + + + + + true + + + false + + + + Level3 + Disabled + _DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + ..\..\source\gfoidl.Stochastics.Gpu-linux-64;%(AdditionalIncludeDirectories) + + + true + Console + + + + + Level3 + MaxSpeed + true + true + NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + ..\..\source\gfoidl.Stochastics.Gpu-linux-64;%(AdditionalIncludeDirectories) + + + true + true + true + Console + + + + + {60d3cec5-e8de-45e4-8834-86c9940d942a} + + + + + + + + + \ No newline at end of file diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj.filters b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj.filters new file mode 100644 index 0000000..a91ad0e --- /dev/null +++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj.filters @@ -0,0 +1,22 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Source Files + + + \ No newline at end of file diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/main.cpp b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/main.cpp new file mode 100644 index 0000000..5c351c3 --- /dev/null +++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/main.cpp @@ -0,0 +1,69 @@ +//#define SMALL_SAMPLE +//#define MEDIUM_SAMPLE +//----------------------------------------------------------------------------- +#include "gpu_core.h" +#include +#include +//----------------------------------------------------------------------------- +using std::cout; +using std::cerr; +using std::endl; +//----------------------------------------------------------------------------- +int main() +{ +#if defined(SMALL_SAMPLE) + const int N = 3; + double sample[3] = {1, 2, 3}; + double avgExpected = 2; + double deltaExpected = 2.0 / 3; +#elif defined(MEDIUM_SAMPLE) + const int N = 20; + double sample[] = {0, 3, 4, 1, 2, 3, 0, 2, 1, 3, 2, 0, 2, 2, 3, 2, 5, 2, 3, 999}; + double avgExpected = 51.95; + double deltaExpected = 94.705; +#else + const int N = 2000000; + double* sample = new double[N]; + double avgExpected = 0; + double deltaExpected = 0; + + for (int i = 0; i < N; ++i) + { + double t = (double)rand() / RAND_MAX; + sample[i] = t; + avgExpected += t; + } + + avgExpected /= N; + + for (int i = 0; i < N; ++i) + deltaExpected += abs(sample[i] - avgExpected); + + deltaExpected /= N; +#endif + + SampleStats sampleStats; + int errorCode = gpu_sample_calc_stats(sample, N, &sampleStats); + + cout << endl; + cout << "CUDA errorcode: " << errorCode << endl; + + if (errorCode != 0) + { + const char* msg = gpu_get_error_string(errorCode); + cerr << msg << endl; + return 1; + } + + cout << endl; + cout << "avg" << endl; + cout << "expected: " << avgExpected << endl; + cout << "actual: " << sampleStats.Mean << endl; + assert(abs(avgExpected - sampleStats.Mean) < 1e-3); + + cout << endl; + cout << "delta" << endl; + cout << "expected: " << deltaExpected << endl; + cout << "actual: " << sampleStats.Delta << endl; + assert(abs(deltaExpected - sampleStats.Delta) < 1e-3); +} diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.cpp b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.cpp new file mode 100644 index 0000000..5868fc2 --- /dev/null +++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.cpp @@ -0,0 +1,23 @@ +#include "stdafx.h" +#include "TestHelper.h" +#include "gpu_core.h" +//----------------------------------------------------------------------------- +using namespace Microsoft::VisualStudio::CppUnitTestFramework; +//----------------------------------------------------------------------------- +void TestHelper::FailIfError(const int errorCode) +{ + if (errorCode == 0) return; + + const char* errorMsg = gpu_get_error_string(errorCode); + wchar_t* msg = TestHelper::ToWchar(errorMsg); + Assert::Fail(msg); +} +//----------------------------------------------------------------------------- +wchar_t* TestHelper::ToWchar(const char* c) +{ + const size_t strLen = strlen(c) + 1; + wchar_t* wc = new wchar_t[strLen]; + mbstowcs(wc, c, strLen); + + return wc; +} diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.h b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.h new file mode 100644 index 0000000..2234388 --- /dev/null +++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.h @@ -0,0 +1,10 @@ +#pragma once +//----------------------------------------------------------------------------- +class TestHelper +{ +public: + static void FailIfError(const int errorCode); + +private: + static wchar_t* ToWchar(const char* c); +}; diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj new file mode 100644 index 0000000..7591514 --- /dev/null +++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj @@ -0,0 +1,108 @@ + + + + + Debug + x64 + + + Release + x64 + + + + 15.0 + {90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3} + Win32Proj + gfoidlStochasticsGpuwin64Tests + 10.0.16299.0 + NativeUnitTestProject + + + + DynamicLibrary + true + v141 + Unicode + false + + + DynamicLibrary + false + v141 + true + Unicode + false + + + + + + + + + + + + + + + true + + + true + + + + Use + Level3 + Disabled + ..\..\source\gfoidl.Stochastics.Gpu-linux-64;$(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories) + _DEBUG;%(PreprocessorDefinitions) + true + + + Windows + $(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories) + + + + + Level3 + Use + MaxSpeed + true + true + ..\..\source\gfoidl.Stochastics.Gpu-linux-64;$(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories) + NDEBUG;%(PreprocessorDefinitions) + true + + + Windows + true + true + $(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories) + + + + + + + + + + Create + Create + + + + + + + {60d3cec5-e8de-45e4-8834-86c9940d942a} + + + + + + \ No newline at end of file diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj.filters b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj.filters new file mode 100644 index 0000000..8836c10 --- /dev/null +++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj.filters @@ -0,0 +1,39 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gpu_core.cpp b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gpu_core.cpp new file mode 100644 index 0000000..5ee0e22 --- /dev/null +++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gpu_core.cpp @@ -0,0 +1,67 @@ +#include "stdafx.h" +#include "TestHelper.h" +#include "gpu_core.h" +//----------------------------------------------------------------------------- +using namespace Microsoft::VisualStudio::CppUnitTestFramework; +//----------------------------------------------------------------------------- +namespace gfoidlStochasticsGpuwin64Tests +{ + TEST_CLASS(gpu_core_Tests) + { + private: + static double _sample[]; + + public: + TEST_METHOD(gpu_available___true) + { + bool actual = gpu_available(); + + Assert::IsTrue(actual); + } + //--------------------------------------------------------------------- + TEST_METHOD(gpu_sample_calc_mean___OK) + { + const int N = 1000000; + double* sample = new double[N]; + double expected = 0.0; + + for (int i = 0; i < N; ++i) + { + double t = (double)rand() / RAND_MAX; + sample[i] = t; + expected += t; + } + expected /= N; + + SampleStats sampleStats; + int errorCode = gpu_sample_calc_stats(sample, N, &sampleStats); + + TestHelper::FailIfError(errorCode); + + Assert::AreEqual(expected, sampleStats.Mean, 1e-3); + + delete[] sample; + } + //--------------------------------------------------------------------- + TEST_METHOD(gpu_sample_calc_stats___OK) + { + SampleStats sampleStats; + int errorCode = gpu_sample_calc_stats(_sample, 20, &sampleStats); + + TestHelper::FailIfError(errorCode); + + // Expected values calculated with gnuplot 5.0 patchlevel 1 + double standardDeviation = sqrt(sampleStats.VarianceCore / 20); + double sampleStandardDeviation = sqrt(sampleStats.VarianceCore / (20 - 1)); + + Assert::AreEqual(51.9500 , sampleStats.Mean , 1e-3); + Assert::AreEqual(217.2718, standardDeviation , 1e-3); + Assert::AreEqual(222.9162, sampleStandardDeviation, 1e-3); + Assert::AreEqual(94.7050 , sampleStats.Delta , 1e-3); + Assert::AreEqual(4.1293 , sampleStats.Skewness , 1e-3); + Assert::AreEqual(18.0514 , sampleStats.Kurtosis , 1e-3); + } + }; + //------------------------------------------------------------------------- + double gpu_core_Tests::_sample[] = {0, 3, 4, 1, 2, 3, 0, 2, 1, 3, 2, 0, 2, 2, 3, 2, 5, 2, 3, 999}; +} diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.cpp b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.cpp new file mode 100644 index 0000000..b43eab4 --- /dev/null +++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.cpp @@ -0,0 +1,8 @@ +// stdafx.cpp : source file that includes just the standard includes +// gfoidl.Stochastics.Gpu-win-64.Tests.pch will be the pre-compiled header +// stdafx.obj will contain the pre-compiled type information + +#include "stdafx.h" + +// TODO: reference any additional headers you need in STDAFX.H +// and not in this file diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.h b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.h new file mode 100644 index 0000000..43280fc --- /dev/null +++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.h @@ -0,0 +1,13 @@ +// stdafx.h : include file for standard system include files, +// or project specific include files that are used frequently, but +// are changed infrequently +// + +#pragma once + +#include "targetver.h" + +// Headers for CppUnitTest +#include "CppUnitTest.h" + +// TODO: reference additional headers your program requires here diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/targetver.h b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/targetver.h new file mode 100644 index 0000000..87c0086 --- /dev/null +++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/targetver.h @@ -0,0 +1,8 @@ +#pragma once + +// Including SDKDDKVer.h defines the highest available Windows platform. + +// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and +// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h. + +#include diff --git a/tests/gfoidl.Stochastics.Tests/Native/GpuTests/CalculateSampleStats.cs b/tests/gfoidl.Stochastics.Tests/Native/GpuTests/CalculateSampleStats.cs new file mode 100644 index 0000000..3be1602 --- /dev/null +++ b/tests/gfoidl.Stochastics.Tests/Native/GpuTests/CalculateSampleStats.cs @@ -0,0 +1,33 @@ +using gfoidl.Stochastics.Native; +using gfoidl.Stochastics.Statistics; +using NUnit.Framework; + +namespace gfoidl.Stochastics.Tests.Native.GpuTests +{ + [TestFixture, Explicit("GPU CUDA must be available")] + public class CalculateSampleStats + { + [Test] + public void Values_given___correct_stats() + { + Assume.That(Gpu.IsAvailable, "GPU is not available"); + + double[] values = { 0, 3, 4, 1, 2, 3, 0, 2, 1, 3, 2, 0, 2, 2, 3, 2, 5, 2, 3, 999 }; + + var sample = new Sample(values); + + Gpu.CalculateSampleStats(sample); + + Assert.Multiple(() => + { + // Expected values calculated with gnuplot 5.0 patchlevel 1 + Assert.AreEqual(51.9500 , sample.Mean , 1e-3, nameof(sample.Mean)); + Assert.AreEqual(217.2718, sample.StandardDeviation , 1e-3, nameof(sample.StandardDeviation)); + Assert.AreEqual(222.9162, sample.SampleStandardDeviation, 1e-3, nameof(sample.SampleStandardDeviation)); + Assert.AreEqual(4.1293 , sample.Skewness , 1e-3, nameof(sample.Skewness)); + Assert.AreEqual(18.0514 , sample.Kurtosis , 1e-3, nameof(sample.Kurtosis)); + Assert.AreEqual(94.7050 , sample.Delta , 1e-3, nameof(sample.Delta)); + }); + } + } +} diff --git a/tests/gfoidl.Stochastics.Tests/Native/GpuTests/IsAvailable.cs b/tests/gfoidl.Stochastics.Tests/Native/GpuTests/IsAvailable.cs new file mode 100644 index 0000000..3d74c34 --- /dev/null +++ b/tests/gfoidl.Stochastics.Tests/Native/GpuTests/IsAvailable.cs @@ -0,0 +1,26 @@ +using System; +using gfoidl.Stochastics.Native; +using NUnit.Framework; + +namespace gfoidl.Stochastics.Tests.Native.GpuTests +{ + [TestFixture, Explicit("Mutual exclusive tests")] + public class IsAvailable + { + [Test] + public void Env_set_to_0___false() + { + Environment.SetEnvironmentVariable(Gpu.EnvVariableName, "0"); + + Assert.IsFalse(Gpu.IsAvailable); + } + //--------------------------------------------------------------------- + [Test] + public void Env_set_to_force___true() + { + Environment.SetEnvironmentVariable(Gpu.EnvVariableName, "force"); + + Assert.IsTrue(Gpu.IsAvailable); + } + } +}