diff --git a/.gitattributes b/.gitattributes
index ba05560..0d9f7ef 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -24,6 +24,7 @@ Dockerfile          text eol=lf
 *.f03               filter=tabs2spaces
 *.sql               filter=tabs2spaces
 *.ino               filter=tabs2spaces
+*.cu                filter=tabs2spaces
 
 # Project files
 *.csproj            filter=tabs2spaces
diff --git a/gfoidl.Stochastics.sln b/gfoidl.Stochastics.sln
index f9609a9..93a5f9b 100644
--- a/gfoidl.Stochastics.sln
+++ b/gfoidl.Stochastics.sln
@@ -72,6 +72,14 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "images", "images", "{A741E7
 		doc\release-notes\images\perf-avg-var-v1.1.0-preview-3.png = doc\release-notes\images\perf-avg-var-v1.1.0-preview-3.png
 	EndProjectSection
 EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gfoidl.Stochastics.Gpu-win-64", "source\gfoidl.Stochastics.Gpu-win-64\gfoidl.Stochastics.Gpu-win-64.vcxproj", "{60D3CEC5-E8DE-45E4-8834-86C9940D942A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gfoidl.Stochastics.Gpu-win-64.Tests", "tests\gfoidl.Stochastics.Gpu-win-64.Tests\gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj", "{90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gfoidl.Stochastics.Gpu-win-64.Tests.Console", "tests\gfoidl.Stochastics.Gpu-win-64.Tests.Console\gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj", "{1C7AA347-76CE-4C7F-A468-B46ADDD93215}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gfoidl.Stochastics.Gpu-linux-64", "source\gfoidl.Stochastics.Gpu-linux-64\gfoidl.Stochastics.Gpu-linux-64.vcxproj", "{0729F977-87F1-4139-A9B1-569BFB4A352E}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -83,27 +91,21 @@ Global
 		{B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Debug|x64.ActiveCfg = Debug|Any CPU
-		{B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Debug|x64.Build.0 = Debug|Any CPU
 		{B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Release|Any CPU.Build.0 = Release|Any CPU
 		{B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Release|x64.ActiveCfg = Release|Any CPU
-		{B071CBBC-D096-4E62-A7D1-146FFC9B0431}.Release|x64.Build.0 = Release|Any CPU
 		{902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Debug|x64.ActiveCfg = Debug|Any CPU
-		{902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Debug|x64.Build.0 = Debug|Any CPU
 		{902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Release|Any CPU.Build.0 = Release|Any CPU
 		{902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Release|x64.ActiveCfg = Release|Any CPU
-		{902CB41A-DDB5-4AEB-B5E8-AD6494EF4DB3}.Release|x64.Build.0 = Release|Any CPU
 		{1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 		{1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Debug|x64.ActiveCfg = Debug|Any CPU
-		{1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Debug|x64.Build.0 = Debug|Any CPU
 		{1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Release|Any CPU.Build.0 = Release|Any CPU
 		{1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Release|x64.ActiveCfg = Release|Any CPU
-		{1B138AD4-AB52-4011-A0CB-C9AD6BB10D7B}.Release|x64.Build.0 = Release|Any CPU
 		{813695B2-09E2-4111-BD6B-18EEF40299C0}.Debug|Any CPU.ActiveCfg = Debug|x64
 		{813695B2-09E2-4111-BD6B-18EEF40299C0}.Debug|x64.ActiveCfg = Debug|x64
 		{813695B2-09E2-4111-BD6B-18EEF40299C0}.Release|Any CPU.ActiveCfg = Release|x64
@@ -120,6 +122,28 @@ Global
 		{7A4AAA49-6071-4FEA-8920-D8EFC6B8A019}.Release|Any CPU.ActiveCfg = Release|x64
 		{7A4AAA49-6071-4FEA-8920-D8EFC6B8A019}.Release|x64.ActiveCfg = Release|x64
 		{7A4AAA49-6071-4FEA-8920-D8EFC6B8A019}.Release|x64.Build.0 = Release|x64
+		{60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Debug|Any CPU.ActiveCfg = Debug|x64
+		{60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Debug|x64.ActiveCfg = Debug|x64
+		{60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Debug|x64.Build.0 = Debug|x64
+		{60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Release|Any CPU.ActiveCfg = Release|x64
+		{60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Release|x64.ActiveCfg = Release|x64
+		{60D3CEC5-E8DE-45E4-8834-86C9940D942A}.Release|x64.Build.0 = Release|x64
+		{90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Debug|Any CPU.ActiveCfg = Debug|x64
+		{90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Debug|x64.ActiveCfg = Debug|x64
+		{90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Debug|x64.Build.0 = Debug|x64
+		{90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Release|Any CPU.ActiveCfg = Release|x64
+		{90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Release|x64.ActiveCfg = Release|x64
+		{90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}.Release|x64.Build.0 = Release|x64
+		{1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Debug|Any CPU.ActiveCfg = Debug|x64
+		{1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Debug|x64.ActiveCfg = Debug|x64
+		{1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Debug|x64.Build.0 = Debug|x64
+		{1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Release|Any CPU.ActiveCfg = Release|x64
+		{1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Release|x64.ActiveCfg = Release|x64
+		{1C7AA347-76CE-4C7F-A468-B46ADDD93215}.Release|x64.Build.0 = Release|x64
+		{0729F977-87F1-4139-A9B1-569BFB4A352E}.Debug|Any CPU.ActiveCfg = Debug|x64
+		{0729F977-87F1-4139-A9B1-569BFB4A352E}.Debug|x64.ActiveCfg = Debug|x64
+		{0729F977-87F1-4139-A9B1-569BFB4A352E}.Release|Any CPU.ActiveCfg = Release|x64
+		{0729F977-87F1-4139-A9B1-569BFB4A352E}.Release|x64.ActiveCfg = Release|x64
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -137,6 +161,10 @@ Global
 		{7A4AAA49-6071-4FEA-8920-D8EFC6B8A019} = {EFBEAA6C-2E09-4A10-898C-D20897F3609F}
 		{2EE5949B-0F90-49E5-B487-CC04C0940BC4} = {7917B2B4-78F0-4BDE-86FE-316549A84E4E}
 		{A741E7D0-1788-4145-BA6C-2C4698CCECF1} = {2EE5949B-0F90-49E5-B487-CC04C0940BC4}
+		{60D3CEC5-E8DE-45E4-8834-86C9940D942A} = {0B4051AE-E1F2-4601-B0A8-B4CA660CC4B2}
+		{90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3} = {EFBEAA6C-2E09-4A10-898C-D20897F3609F}
+		{1C7AA347-76CE-4C7F-A468-B46ADDD93215} = {EFBEAA6C-2E09-4A10-898C-D20897F3609F}
+		{0729F977-87F1-4139-A9B1-569BFB4A352E} = {0B4051AE-E1F2-4601-B0A8-B4CA660CC4B2}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {A594189A-069D-4FEE-8CD8-1A5786CF97AA}
diff --git a/native-out/gfoidl-Stochastics-gpu.dll b/native-out/gfoidl-Stochastics-gpu.dll
new file mode 100644
index 0000000..df5b6cf
Binary files /dev/null and b/native-out/gfoidl-Stochastics-gpu.dll differ
diff --git a/native-out/libgfoidl-Stochastics-gpu.so b/native-out/libgfoidl-Stochastics-gpu.so
new file mode 100644
index 0000000..5394c54
Binary files /dev/null and b/native-out/libgfoidl-Stochastics-gpu.so differ
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/SampleStats.h b/source/gfoidl.Stochastics.Gpu-linux-64/SampleStats.h
new file mode 100644
index 0000000..d43a23c
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/SampleStats.h
@@ -0,0 +1,12 @@
+#pragma once
+//-----------------------------------------------------------------------------
+struct SampleStats
+{
+    double Mean;
+    double Max;
+    double Min;
+    double Delta;
+    double VarianceCore;
+    double Skewness;
+    double Kurtosis;
+};
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/build.sh b/source/gfoidl.Stochastics.Gpu-linux-64/build.sh
new file mode 100644
index 0000000..ed73bbe
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/build.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+set -e
+
+libName=libgfoidl-Stochastics-gpu.so
+
+# https://docs.nvidia.com/cuda/pdf/CUDA_Compiler_Driver_NVCC.pdf
+nvcc    -std=c++14 \
+        --disable-warnings \
+        -Wno-deprecated-declarations \
+        -o "$libName" \
+        -O3 \
+        --ptxas-options=-v \
+        --machine 64 \
+        -x cu \
+        -cudart static \
+        -shared -rdc=true \
+        -Xcompiler -fPIC,-fvisibility=hidden  \
+        -gencode=arch=compute_60,code=sm_60 \
+        -gencode=arch=compute_35,code=sm_35 \
+        kernel_utils.cu kernel.cu gpu_core.cu
+
+nm -DC "$libName" | grep " T "
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/dll.h b/source/gfoidl.Stochastics.Gpu-linux-64/dll.h
new file mode 100644
index 0000000..fc8d4b3
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/dll.h
@@ -0,0 +1,23 @@
+#pragma once
+//-----------------------------------------------------------------------------
+#if defined _WIN32 || defined __CYGWIN__
+    #ifdef GFOIDL_STOCHASTICS_GPU_EXPORTS
+        #define GPU_API __declspec(dllexport)
+    #else
+        #define GPU_API __declspec(dllimport)
+    #endif
+#else
+    #if __GNUC__ >= 4
+        #define GPU_API __attribute__ ((visibility ("default")))
+    #else
+        #define GPU_API
+    #endif
+#endif
+//-----------------------------------------------------------------------------
+#ifdef __cplusplus
+    #define BEGIN_EXTERN_C  extern "C" {
+    #define END_EXTERN_C    }
+#else
+    #define BEGIN_EXTERN_C
+    #define END_EXTERN_C
+#endif
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/gfoidl.Stochastics.Gpu-linux-64.vcxproj b/source/gfoidl.Stochastics.Gpu-linux-64/gfoidl.Stochastics.Gpu-linux-64.vcxproj
new file mode 100644
index 0000000..af8f40a
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/gfoidl.Stochastics.Gpu-linux-64.vcxproj
@@ -0,0 +1,51 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{0729f977-87f1-4139-a9b1-569bfb4a352e}</ProjectGuid>
+    <Keyword>Linux</Keyword>
+    <RootNamespace>gfoidl_Stochastics_Gpu_linux_64</RootNamespace>
+    <MinimumVisualStudioVersion>15.0</MinimumVisualStudioVersion>
+    <ApplicationType>Linux</ApplicationType>
+    <ApplicationTypeRevision>1.0</ApplicationTypeRevision>
+    <TargetLinuxPlatform>Generic</TargetLinuxPlatform>
+    <LinuxProjectType>{2238F9CD-F817-4ECC-BD14-2524D2669B35}</LinuxProjectType>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <UseDebugLibraries>true</UseDebugLibraries>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <UseDebugLibraries>false</UseDebugLibraries>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings" />
+  <ImportGroup Label="Shared" />
+  <ImportGroup Label="PropertySheets" />
+  <PropertyGroup Label="UserMacros" />
+  <ItemGroup>
+    <None Include="build.sh" />
+    <None Include="gpu_core.cu" />
+    <None Include="kernel.cu" />
+    <None Include="kernel_utils.cu" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="dll.h" />
+    <ClInclude Include="gpu_core.h" />
+    <ClInclude Include="kernel.h" />
+    <ClInclude Include="kernel_utils.h" />
+    <ClInclude Include="SampleStats.h" />
+  </ItemGroup>
+  <ItemDefinitionGroup />
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets" />
+</Project>
\ No newline at end of file
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.cu b/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.cu
new file mode 100644
index 0000000..30f5c2b
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.cu
@@ -0,0 +1,90 @@
+//#define SINGLE_THREAD
+//-----------------------------------------------------------------------------
+#include "gpu_core.h"
+#include <cuda_runtime.h>
+#include "kernel.h"
+
+#if defined(DEBUG) || defined(_DEBUG)
+    #include <stdio.h>
+    #include <assert.h>
+#endif
+//-----------------------------------------------------------------------------
+// Forward declarations
+inline cudaError_t checkCuda(cudaError_t result);
+//-----------------------------------------------------------------------------
+bool gpu_available()
+{
+    int deviceCount;
+    cudaError_t errorId = cudaGetDeviceCount(&deviceCount);
+
+    return errorId == cudaSuccess
+        && deviceCount > 0;
+}
+//-----------------------------------------------------------------------------
+const char* gpu_get_error_string(const int errorCode)
+{
+    return cudaGetErrorString(static_cast<cudaError>(errorCode));
+}
+//-----------------------------------------------------------------------------
+int gpu_sample_calc_stats(double* sample, const int sampleSize, SampleStats* sampleStats)
+{
+    double*      deviceSample;
+    SampleStats* deviceSampleStats;
+
+    try
+    {
+        checkCuda(cudaMalloc(&deviceSample, sizeof(double) * sampleSize));
+        checkCuda(cudaMalloc(&deviceSampleStats, sizeof(SampleStats)));
+
+        checkCuda(cudaMemcpy(deviceSample, sample, sizeof(double) * sampleSize, cudaMemcpyHostToDevice));
+        checkCuda(cudaMemset(deviceSampleStats, 0, sizeof(SampleStats)));
+
+#ifndef SINGLE_THREAD
+        const int blockSize = 256;
+        int numBlocks       = (sampleSize + blockSize - 1) / blockSize;
+#else
+        const int blockSize = 1;
+        const int numBlocks = 1;
+#endif
+
+#if defined(DEBUG) || defined(_DEBUG)
+        printf("blockSize: %d\nnumBlocks: %d\n", blockSize, numBlocks);
+#endif
+
+        // For final fixup of values a separate kernel is queued to the device.
+        // Otherwise there's no way of syncing all threads in the grid.
+        Kernel::CalculateAverageAndVarianceCore<<<numBlocks, blockSize>>>(deviceSample, sampleSize, deviceSampleStats);
+        Kernel::CalculateAverageAndVarianceCoreFinal<<<1, 1>>>(deviceSampleStats, sampleSize);
+
+        Kernel::CalculateDeltaSkewnessKurtosis<<<numBlocks, blockSize>>>(deviceSample, sampleSize, deviceSampleStats);
+
+        //checkCuda(cudaDeviceSynchronize());       // not necessary
+        checkCuda(cudaMemcpy(sampleStats, deviceSampleStats, sizeof(SampleStats), cudaMemcpyDeviceToHost));
+
+        checkCuda(cudaFree(deviceSample));
+        checkCuda(cudaFree(deviceSampleStats));
+
+        // Final fixup of values, not done yet
+        // No need to launch a kernel for a simple division.
+        sampleStats->Delta /= sampleSize;
+
+        double sigma = sqrt(sampleStats->VarianceCore / sampleSize);
+        double t     = sampleSize * sigma * sigma * sigma;
+        sampleStats->Skewness /= t;
+        sampleStats->Kurtosis /= t * sigma;
+    }
+    catch (const int e)
+    {
+        return e;
+    }
+
+    return 0;
+}
+//-----------------------------------------------------------------------------
+cudaError_t checkCuda(cudaError_t result)
+{
+    if (result != cudaSuccess)
+        throw static_cast<int>(result);
+
+    return result;
+}
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.h b/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.h
new file mode 100644
index 0000000..9e887b4
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/gpu_core.h
@@ -0,0 +1,13 @@
+#pragma once
+//-----------------------------------------------------------------------------
+#include "dll.h"
+#include "SampleStats.h"
+//-----------------------------------------------------------------------------
+BEGIN_EXTERN_C
+
+GPU_API bool gpu_available();
+GPU_API const char* gpu_get_error_string(const int errorCode);
+
+GPU_API int gpu_sample_calc_stats(double* sample, const int sampleSize, SampleStats* sampleStats);
+
+END_EXTERN_C
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/kernel.cu b/source/gfoidl.Stochastics.Gpu-linux-64/kernel.cu
new file mode 100644
index 0000000..d3240e9
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/kernel.cu
@@ -0,0 +1,88 @@
+#include "kernel.h"
+#include "kernel_utils.h"
+//-----------------------------------------------------------------------------
+namespace Kernel
+{
+    __global__
+    void CalculateAverageAndVarianceCore(const double* sample, const int n, SampleStats* sampleStats)
+    {
+        const int index  = blockDim.x * blockIdx.x + threadIdx.x;
+        const int stride = gridDim.x * blockDim.x;
+
+        double avg      = 0;
+        double variance = 0;
+
+        for (int i = index; i < n; i += stride)
+        {
+            avg      += sample[i];
+            variance += sample[i] * sample[i];
+        }
+
+        Utils::TwoDoubles twoDoubles {avg, variance};
+        twoDoubles = Utils::BlockReduceSum(twoDoubles);
+
+        // Final sum in first thread of each block
+        if (threadIdx.x == 0)
+        {
+#if __CUDA_ARCH__ < 600
+            Utils::atomicAdd(&sampleStats->Mean        , twoDoubles.A);
+            Utils::atomicAdd(&sampleStats->VarianceCore, twoDoubles.B);
+#else
+            atomicAdd(&sampleStats->Mean        , twoDoubles.A);
+            atomicAdd(&sampleStats->VarianceCore, twoDoubles.B);
+#endif
+        }
+    }
+    //-----------------------------------------------------------------------------
+    __global__
+    void CalculateAverageAndVarianceCoreFinal(SampleStats* sampleStats, const int n)
+    {
+        const int index = blockDim.x * blockIdx.x + threadIdx.x;
+
+        if (index == 0)
+        {
+            double avg                 = sampleStats->Mean / n;
+            sampleStats->Mean          = avg;
+            sampleStats->VarianceCore -= n * avg*avg;
+        }
+    }
+    //-----------------------------------------------------------------------------
+    __global__
+    void CalculateDeltaSkewnessKurtosis(const double* sample, const int n, SampleStats* sampleStats)
+    {
+        const int index  = blockDim.x * blockIdx.x + threadIdx.x;
+        const int stride = gridDim.x * blockDim.x;
+
+        double avg      = sampleStats->Mean;
+        double delta    = 0;
+        double skewness = 0;
+        double kurtosis = 0;
+
+        for (int i = index; i < n; i += stride)
+        {
+            double t = sample[i] - avg;
+            double t1 = t * t*t;
+
+            delta    += abs(t);
+            skewness += t1;
+            kurtosis += t1 * t;
+        }
+
+        Utils::ThreeDoubles threeDoubles {delta, skewness,kurtosis};
+        threeDoubles = Utils::BlockReduceSum(threeDoubles);
+
+        // Final sum in first thread of each block
+        if (threadIdx.x == 0)
+        {
+#if __CUDA_ARCH__ < 600
+            Utils::atomicAdd(&sampleStats->Delta   , threeDoubles.A);
+            Utils::atomicAdd(&sampleStats->Skewness, threeDoubles.B);
+            Utils::atomicAdd(&sampleStats->Kurtosis, threeDoubles.C);
+#else
+            atomicAdd(&sampleStats->Delta   , threeDoubles.A);
+            atomicAdd(&sampleStats->Skewness, threeDoubles.B);
+            atomicAdd(&sampleStats->Kurtosis, threeDoubles.C);
+#endif
+        }
+    }
+}
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/kernel.h b/source/gfoidl.Stochastics.Gpu-linux-64/kernel.h
new file mode 100644
index 0000000..64fd921
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/kernel.h
@@ -0,0 +1,13 @@
+#pragma once
+//-----------------------------------------------------------------------------
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+#include "SampleStats.h"
+//-----------------------------------------------------------------------------
+namespace Kernel
+{
+    __global__ void CalculateAverageAndVarianceCore(const double* sample, const int n, SampleStats* sampleStats);
+    __global__ void CalculateAverageAndVarianceCoreFinal(SampleStats* sampleStats, const int n);
+
+    __global__ void CalculateDeltaSkewnessKurtosis(const double* sample, const int n, SampleStats* sampleStats);
+}
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.cu b/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.cu
new file mode 100644
index 0000000..f3fffae
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.cu
@@ -0,0 +1,193 @@
+#include "kernel_utils.h"
+#include <device_launch_parameters.h>
+
+#ifdef _DEBUG
+    #include <cassert>
+#endif
+//-----------------------------------------------------------------------------
+// Code for reduction taken from https://devblogs.nvidia.com/faster-parallel-reductions-kepler/
+//-----------------------------------------------------------------------------
+namespace Kernel
+{
+    namespace Utils
+    {
+        using uint = unsigned int;
+        const uint FULL_MASK = 0xffffffff;
+        //---------------------------------------------------------------------
+#if __CUDA_ARCH__ < 600
+        __device__
+        double atomicAdd(double* address, double val)
+        {
+            using ulli = unsigned long long int;
+
+            ulli* tmp = reinterpret_cast<ulli*>(address);
+            ulli old = *tmp;
+            ulli assumed;
+
+            do
+            {
+                assumed = old;
+
+                old = atomicCAS(tmp, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
+            } while (assumed != old);
+
+            return __longlong_as_double(old);
+        }
+#endif
+        //---------------------------------------------------------------------
+        __device__
+        double WarpReduceSum(double value)
+        {
+            for (int offset = warpSize / 2; offset > 0; offset /= 2)
+                value += __shfl_down_sync(FULL_MASK, value, offset);
+
+            return value;
+        }
+        //---------------------------------------------------------------------
+        __device__
+        TwoDoubles WarpReduceSum(TwoDoubles twoDoubles)
+        {
+            for (int offset = warpSize / 2; offset > 0; offset /= 2)
+            {
+                twoDoubles.A += __shfl_down_sync(FULL_MASK, twoDoubles.A, offset);
+                twoDoubles.B += __shfl_down_sync(FULL_MASK, twoDoubles.B, offset);
+            }
+
+            return twoDoubles;
+        }
+        //---------------------------------------------------------------------
+        __device__
+        ThreeDoubles WarpReduceSum(ThreeDoubles threeDoubles)
+        {
+            for (int offset = warpSize / 2; offset > 0; offset /= 2)
+            {
+                threeDoubles.A += __shfl_down_sync(FULL_MASK, threeDoubles.A, offset);
+                threeDoubles.B += __shfl_down_sync(FULL_MASK, threeDoubles.B, offset);
+                threeDoubles.C += __shfl_down_sync(FULL_MASK, threeDoubles.C, offset);
+            }
+
+            return threeDoubles;
+        }
+        //---------------------------------------------------------------------
+        __device__
+        double BlockReduceSum(double value)
+        {
+#ifdef _DEBUG
+            assert(warpSize == 32);
+#endif
+            static __shared__ double shared[32];
+            const int lane   = threadIdx.x & (warpSize - 1);        // threadIdx.x % warpSize
+            const int warpId = threadIdx.x / warpSize;
+
+            value = WarpReduceSum(value);
+
+            if (lane == 0)
+                shared[warpId] = value;
+
+            __syncthreads();
+
+            // Read from shared memory only if that warp existed
+            bool warpExisted = (threadIdx.x < blockDim.x / warpSize) || (threadIdx.x == 0 && blockDim.x == 1);
+            value = warpExisted ? shared[lane] : 0;
+
+            // Final reduce within first warp
+            if (warpId == 0)
+                value = WarpReduceSum(value);
+
+            return value;
+        }
+        //---------------------------------------------------------------------
+        __device__
+        TwoDoubles BlockReduceSum(TwoDoubles twoDoubles)
+        {
+#ifdef _DEBUG
+            assert(warpSize == 32);
+#endif
+            static __shared__ TwoDoubles shared[32];
+            const int lane   = threadIdx.x & (warpSize - 1);        // threadIdx.x % warpSize
+            const int warpId = threadIdx.x / warpSize;
+
+            twoDoubles = WarpReduceSum(twoDoubles);
+
+            if (lane == 0)
+                shared[warpId] = twoDoubles;
+
+            __syncthreads();
+
+            // Read from shared memory only if that warp existed
+            bool warpExisted = (threadIdx.x < blockDim.x / warpSize) || (threadIdx.x == 0 && blockDim.x == 1);
+            twoDoubles = warpExisted ? shared[lane] : TwoDoubles {0,0};
+
+            // Final reduce within first warp
+            if (warpId == 0)
+                twoDoubles = WarpReduceSum(twoDoubles);
+
+            return twoDoubles;
+        }
+        //---------------------------------------------------------------------
+        __device__
+        ThreeDoubles BlockReduceSum(ThreeDoubles threeDoubles)
+        {
+#ifdef _DEBUG
+            assert(warpSize == 32);
+#endif
+            static __shared__ ThreeDoubles shared[32];
+            const int lane = threadIdx.x & (warpSize - 1);      // threadIdx.x % warpSize
+            const int warpId = threadIdx.x / warpSize;
+
+            threeDoubles = WarpReduceSum(threeDoubles);
+
+            if (lane == 0)
+                shared[warpId] = threeDoubles;
+
+            __syncthreads();
+
+            // Read from shared memory only if that warp existed
+            bool warpExisted = (threadIdx.x < blockDim.x / warpSize) || (threadIdx.x == 0 && blockDim.x == 1);
+            threeDoubles = warpExisted ? shared[lane] : ThreeDoubles {0,0,0};
+
+            // Final reduce within first warp
+            if (warpId == 0)
+                threeDoubles = WarpReduceSum(threeDoubles);
+
+            return threeDoubles;
+        }
+        //---------------------------------------------------------------------
+        __device__
+        void ReduceSum(double value, double* result)
+        {
+            value = BlockReduceSum(value);
+
+            // Final sum in first thread of each block
+            if (threadIdx.x == 0)
+                atomicAdd(result, value);
+        }
+        //---------------------------------------------------------------------
+        __device__
+        void ReduceSum(TwoDoubles twoDoubles, TwoDoubles* result)
+        {
+            twoDoubles = BlockReduceSum(twoDoubles);
+
+            // Final sum in first thread of each block
+            if (threadIdx.x == 0)
+            {
+                atomicAdd(&result->A, twoDoubles.A);
+                atomicAdd(&result->B, twoDoubles.B);
+            }
+        }
+        //---------------------------------------------------------------------
+        __device__
+        void ReduceSum(const double* in, const int n, double* result)
+        {
+            double blockSum = 0;
+
+            const int index  = blockDim.x * blockIdx.x + threadIdx.x;
+            const int stride = gridDim.x * blockDim.x;
+
+            for (int i = index; i < n; i += stride)
+                blockSum += in[i];
+
+            ReduceSum(blockSum, result);
+        }
+    }
+}
diff --git a/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.h b/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.h
new file mode 100644
index 0000000..1cbf5fc
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-linux-64/kernel_utils.h
@@ -0,0 +1,53 @@
+#pragma once
+//-----------------------------------------------------------------------------
+#include <cuda_runtime.h>
+//-----------------------------------------------------------------------------
+namespace Kernel
+{
+    namespace Utils
+    {
+        struct TwoDoubles
+        {
+            double A;
+            double B;
+
+            __device__ TwoDoubles() {}
+            __device__ TwoDoubles(const double a, const double b)
+            {
+                this->A = a;
+                this->B = b;
+            }
+        };
+        //---------------------------------------------------------------------
+        struct ThreeDoubles
+        {
+            double A;
+            double B;
+            double C;
+
+            __device__ ThreeDoubles() {}
+            __device__ ThreeDoubles(const double a, const double b, const double c)
+            {
+                this->A = a;
+                this->B = b;
+                this->C = c;
+            }
+        };
+        //---------------------------------------------------------------------
+        __device__ double WarpReduceSum(double value);
+        __device__ TwoDoubles WarpReduceSum(TwoDoubles twoDoubles);
+        __device__ ThreeDoubles WarpReduceSum(ThreeDoubles threeDoubles);
+
+        __device__ double BlockReduceSum(double value);
+        __device__ TwoDoubles BlockReduceSum(TwoDoubles twoDoubles);
+        __device__ ThreeDoubles BlockReduceSum(ThreeDoubles threeDoubles);
+
+        __device__ void ReduceSum(double value, double* result);
+        __device__ void ReduceSum(TwoDoubles twoDoubles, TwoDoubles* result);
+        __device__ void ReduceSum(const double* in, const int n, double* result);
+
+#if __CUDA_ARCH__ < 600
+        __device__ double atomicAdd(double* address, double val);
+#endif
+    }
+}
diff --git a/source/gfoidl.Stochastics.Gpu-win-64/cpp.hint b/source/gfoidl.Stochastics.Gpu-win-64/cpp.hint
new file mode 100644
index 0000000..f02e3a1
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-win-64/cpp.hint
@@ -0,0 +1,5 @@
+// Hint files help the Visual Studio IDE interpret Visual C++ identifiers
+// such as names of functions and macros.
+// For more information see https://go.microsoft.com/fwlink/?linkid=865984
+#define GPU_API __declspec(dllexport)
+#define GPU_API __declspec(dllimport)
diff --git a/source/gfoidl.Stochastics.Gpu-win-64/dummy.cpp b/source/gfoidl.Stochastics.Gpu-win-64/dummy.cpp
new file mode 100644
index 0000000..21c8c45
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-win-64/dummy.cpp
@@ -0,0 +1 @@
+// dummy.cpp needed for VS to show C++ compiler properties
diff --git a/source/gfoidl.Stochastics.Gpu-win-64/gfoidl.Stochastics.Gpu-win-64.vcxproj b/source/gfoidl.Stochastics.Gpu-win-64/gfoidl.Stochastics.Gpu-win-64.vcxproj
new file mode 100644
index 0000000..e425d55
--- /dev/null
+++ b/source/gfoidl.Stochastics.Gpu-win-64/gfoidl.Stochastics.Gpu-win-64.vcxproj
@@ -0,0 +1,114 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\gfoidl.Stochastics.Gpu-linux-64\dll.h" />
+    <ClInclude Include="..\gfoidl.Stochastics.Gpu-linux-64\gpu_core.h" />
+    <ClInclude Include="..\gfoidl.Stochastics.Gpu-linux-64\kernel.h" />
+    <ClInclude Include="..\gfoidl.Stochastics.Gpu-linux-64\kernel_utils.h" />
+    <ClInclude Include="..\gfoidl.Stochastics.Gpu-linux-64\SampleStats.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <CudaCompile Include="..\gfoidl.Stochastics.Gpu-linux-64\gpu_core.cu" />
+    <CudaCompile Include="..\gfoidl.Stochastics.Gpu-linux-64\kernel.cu" />
+    <CudaCompile Include="..\gfoidl.Stochastics.Gpu-linux-64\kernel_utils.cu" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="dummy.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="cpp.hint" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{60D3CEC5-E8DE-45E4-8834-86C9940D942A}</ProjectGuid>
+    <RootNamespace>gfoidl_Stochastics_Gpu_win_64</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v141</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.0.props" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <TargetName>gfoidl-Stochastics-gpu</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <TargetName>gfoidl-Stochastics-gpu</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>GFOIDL_STOCHASTICS_GPU_EXPORTS;WIN32;WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\gfoidl.Stochastics.Gpu-linux-64;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <CudaCompile>
+      <TargetMachinePlatform>64</TargetMachinePlatform>
+      <PtxAsOptionV>true</PtxAsOptionV>
+      <GenerateRelocatableDeviceCode>true</GenerateRelocatableDeviceCode>
+      <CodeGeneration>compute_60,sm_60;%(CodeGeneration)</CodeGeneration>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>GFOIDL_STOCHASTICS_GPU_EXPORTS;WIN32;WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\gfoidl.Stochastics.Gpu-linux-64;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <SubSystem>Console</SubSystem>
+      <AdditionalDependencies>cudart_static.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+    <CudaCompile>
+      <TargetMachinePlatform>64</TargetMachinePlatform>
+      <PtxAsOptionV>true</PtxAsOptionV>
+      <GenerateRelocatableDeviceCode>true</GenerateRelocatableDeviceCode>
+      <CodeGeneration>compute_60,sm_60;%(CodeGeneration)</CodeGeneration>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.0.targets" />
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/source/gfoidl.Stochastics.Native-win-x64/dll.h b/source/gfoidl.Stochastics.Native-win-x64/dll.h
index 6ede07c..63f4194 100644
--- a/source/gfoidl.Stochastics.Native-win-x64/dll.h
+++ b/source/gfoidl.Stochastics.Native-win-x64/dll.h
@@ -1,12 +1,6 @@
 #pragma once
 //-----------------------------------------------------------------------------
-// The following ifdef block is the standard way of creating macros which make exporting 
-// from a DLL simpler. All files within this DLL are compiled with the GFOIDLSTOCHASTICSNATIVE_EXPORTS
-// symbol defined on the command line. This symbol should not be defined on any project
-// that uses this DLL. This way any other project whose source files include this file see 
-// DLL_API functions as being imported from a DLL, whereas this DLL sees symbols
-// defined with this macro as being exported.
-#ifdef GFOIDLSTOCHASTICSNATIVE_EXPORTS
+#ifdef GFOIDL_STOCHASTICS_NATIVE_EXPORTS
     #define DLL_API __declspec(dllexport)
 #else
     #define DLL_API __declspec(dllimport)
diff --git a/source/gfoidl.Stochastics.Native-win-x64/gfoidl.Stochastics.Native-win-x64.vcxproj b/source/gfoidl.Stochastics.Native-win-x64/gfoidl.Stochastics.Native-win-x64.vcxproj
index 5791532..bf5f842 100644
--- a/source/gfoidl.Stochastics.Native-win-x64/gfoidl.Stochastics.Native-win-x64.vcxproj
+++ b/source/gfoidl.Stochastics.Native-win-x64/gfoidl.Stochastics.Native-win-x64.vcxproj
@@ -57,7 +57,7 @@
       <PrecompiledHeader>Use</PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>_DEBUG;GFOIDLSTOCHASTICSNATIVE_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>_DEBUG;GFOIDL_STOCHASTICS_NATIVE_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <ConformanceMode>true</ConformanceMode>
     </ClCompile>
     <Link>
@@ -72,7 +72,7 @@
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>NDEBUG;GFOIDLSTOCHASTICSNATIVE_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>NDEBUG;GFOIDL_STOCHASTICS_NATIVE_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <ConformanceMode>true</ConformanceMode>
     </ClCompile>
     <Link>
diff --git a/source/gfoidl.Stochastics/Enumerators/ArrayEnumerator.cs b/source/gfoidl.Stochastics/Enumerators/ArrayEnumerator.cs
index f495a69..7bf7147 100644
--- a/source/gfoidl.Stochastics/Enumerators/ArrayEnumerator.cs
+++ b/source/gfoidl.Stochastics/Enumerators/ArrayEnumerator.cs
@@ -21,6 +21,8 @@ public ArrayEnumerable(T[] array)
         //---------------------------------------------------------------------
         public static implicit operator ArrayEnumerable<T>(T[] array)       => new ArrayEnumerable<T>(array);
         public static implicit operator T[] (ArrayEnumerable<T> enumerable) => enumerable._array;
+        //---------------------------------------------------------------------
+        public ref T GetPinnableReference() => ref _array[0];
     }
     //---------------------------------------------------------------------
     public struct ArrayEnumerator<T> : IEnumerator<T>
diff --git a/source/gfoidl.Stochastics/Native/Gpu.cs b/source/gfoidl.Stochastics/Native/Gpu.cs
new file mode 100644
index 0000000..5394a59
--- /dev/null
+++ b/source/gfoidl.Stochastics/Native/Gpu.cs
@@ -0,0 +1,114 @@
+﻿using System;
+using System.Runtime.InteropServices;
+using gfoidl.Stochastics.Statistics;
+
+namespace gfoidl.Stochastics.Native
+{
+    internal static class Gpu
+    {
+        public const string EnvVariableName = "GFOIDL_STOCHASTICS_USE_GPU";
+        //---------------------------------------------------------------------
+        private static readonly bool s_isAvailable;
+        private static readonly bool s_isUseOfGpuForced;
+        //---------------------------------------------------------------------
+        static Gpu()
+        {
+            string env = Environment.GetEnvironmentVariable(EnvVariableName);
+
+            s_isAvailable = RuntimeInformation.OSArchitecture == Architecture.X64
+                && RuntimeHelper.IsRunningOnDotNetCore()
+                && GpuMethods.gpu_available()
+                && env != "0";
+
+            s_isUseOfGpuForced = string.Equals(env, "force", StringComparison.OrdinalIgnoreCase);
+        }
+        //---------------------------------------------------------------------
+        public static bool IsAvailable      => s_isAvailable;
+        public static bool IsUseOfGpuForced => s_isUseOfGpuForced;
+        //---------------------------------------------------------------------
+        public static unsafe void CalculateSampleStats(Sample sample)
+        {
+            if (!s_isAvailable)
+                throw new InvalidOperationException(Strings.Gpu_not_available);
+
+            fixed (double* ptr = sample.Values)
+            {
+                SampleStats sampleStats = default;
+                int errorCode = GpuMethods.gpu_sample_calc_stats(ptr, sample.Count, &sampleStats);
+
+                if (errorCode != 0) ThrowGpuException(errorCode);
+
+                sample.Mean         = sampleStats.Mean;
+                sample.Max          = sampleStats.Max;
+                sample.Min          = sampleStats.Min;
+                sample.Delta        = sampleStats.Delta;
+                sample.VarianceCore = sampleStats.VarianceCore;
+                sample.Skewness     = sampleStats.Skewness;
+                sample.Kurtosis     = sampleStats.Kurtosis;
+            }
+        }
+        //---------------------------------------------------------------------
+        private static void ThrowGpuException(int errorCode)
+        {
+            IntPtr ptr = GpuMethods.gpu_get_error_string(errorCode);
+            string msg = $"CUDA runtime error: {Marshal.PtrToStringAnsi(ptr)}";
+            throw new GpuException(msg);
+        }
+        //---------------------------------------------------------------------
+        private static class GpuMethods
+        {
+            private const string LibName = "gfoidl-Stochastics-gpu";
+            //---------------------------------------------------------------------
+            [DllImport(LibName)]
+            public static extern bool gpu_available();
+            //---------------------------------------------------------------------
+            [DllImport(LibName)]
+            public static extern IntPtr gpu_get_error_string(int errorCode);
+            //---------------------------------------------------------------------
+            [DllImport(LibName)]
+            public static extern unsafe int gpu_sample_calc_stats(double* sample, int sampleSize, SampleStats* sampleStats);
+        }
+        //---------------------------------------------------------------------
+        [StructLayout(LayoutKind.Sequential)]
+        private struct SampleStats
+        {
+            public double Mean;
+            public double Max;
+            public double Min;
+            public double Delta;
+            public double VarianceCore;
+            public double Skewness;
+            public double Kurtosis;
+        }
+    }
+    //-------------------------------------------------------------------------
+    /// <summary>
+    /// Represents an error that is caused by the GPU.
+    /// </summary>
+    public class GpuException : Exception
+    {
+        /// <summary>
+        /// Initializes a new instance of the <see cref="GpuException" /> class.
+        /// </summary>
+        public GpuException() : base() { }
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="GpuException" /> class 
+        /// with a specified error message.
+        /// </summary>
+        /// <param name="message">The message that describes the error.</param>
+        public GpuException(string message) : base(message) { }
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="GpuException" />
+        /// class with a specified error message and a reference to the inner 
+        /// exception that is the cause of this exception.
+        /// </summary>
+        /// <param name="message">The error message that explains the reason for the exception.</param>
+        /// <param name="innerException">
+        /// The exception that is the cause of the current exception, or a null reference (Nothing in Visual Basic) 
+        /// if no inner exception is specified.
+        /// </param>
+        public GpuException(string message, Exception innerException) : base(message, innerException) { }
+    }
+}
diff --git a/source/gfoidl.Stochastics/RuntimeHelper.cs b/source/gfoidl.Stochastics/RuntimeHelper.cs
index 1e6d1b9..c651fd7 100644
--- a/source/gfoidl.Stochastics/RuntimeHelper.cs
+++ b/source/gfoidl.Stochastics/RuntimeHelper.cs
@@ -12,7 +12,7 @@ public static bool IsRunningOnDotNetCore()
             if (!_isRunningOnDotNetCore.HasValue)
             {
                 string frameworkName = Assembly.GetEntryAssembly()?.GetCustomAttribute<TargetFrameworkAttribute>()?.FrameworkName;
-                _isRunningOnDotNetCore = frameworkName.Contains("NETCoreApp");
+                _isRunningOnDotNetCore = frameworkName?.Contains("NETCoreApp") ?? false;
             }
 
             return _isRunningOnDotNetCore.Value;
diff --git a/source/gfoidl.Stochastics/SpecialFunctions.cs b/source/gfoidl.Stochastics/SpecialFunctions.cs
index fd83902..dbff91f 100644
--- a/source/gfoidl.Stochastics/SpecialFunctions.cs
+++ b/source/gfoidl.Stochastics/SpecialFunctions.cs
@@ -10,19 +10,19 @@ namespace gfoidl.Stochastics
     /// </summary>
     public static class SpecialFunctions
     {
-        private static readonly bool _isDotNetCore;
-        private static readonly bool _erfNative;
-        private static readonly bool _erfNativeLinux;
+        private static readonly bool s_isDotNetCore;
+        private static readonly bool s_erfNative;
+        private static readonly bool s_erfNativeLinux;
         //---------------------------------------------------------------------
         static SpecialFunctions()
         {
-            _isDotNetCore = RuntimeHelper.IsRunningOnDotNetCore();
+            s_isDotNetCore = RuntimeHelper.IsRunningOnDotNetCore();
 
-            _erfNative =
+            s_erfNative =
                 RuntimeInformation.OSArchitecture == Architecture.X64
                 && (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) || RuntimeInformation.IsOSPlatform(OSPlatform.Linux));
 
-            _erfNativeLinux = _erfNative && RuntimeInformation.IsOSPlatform(OSPlatform.Linux);
+            s_erfNativeLinux = s_erfNative && RuntimeInformation.IsOSPlatform(OSPlatform.Linux);
         }
         //---------------------------------------------------------------------
         /// <summary>
@@ -34,7 +34,7 @@ static SpecialFunctions()
         // https://math.stackexchange.com/questions/263216/error-function-erf-with-better-precision/1889960#1889960
         public static double Erf(double x)
         {
-            if (_isDotNetCore && _erfNativeLinux)
+            if (s_isDotNetCore && s_erfNativeLinux)
                 return NativeMethods.gaussian_error_function(x);
 
             /*
@@ -197,7 +197,7 @@ public static double Erf(double x)
         /// <seealso cref="Erfc(double[])" />
         public static double Erfc(double x)
         {
-            if (_isDotNetCore && _erfNativeLinux)
+            if (s_isDotNetCore && s_erfNativeLinux)
                 return NativeMethods.gaussian_error_function_complementary(x);
 
             /*
@@ -412,7 +412,7 @@ internal static unsafe void Erf(double* values, double* result, int size)
             // Is a JIT compile-time constant, due the cctor. Note only a static readonly field may not be
             // sufficient (on the first access). See https://github.com/dotnet/coreclr/issues/1193
             // So the not taken branch(es) will be removed.
-            if (_isDotNetCore && _erfNative)
+            if (s_isDotNetCore && s_erfNative)
                 NativeMethods.gaussian_error_function_vector(values, result, size);
             else
             {
@@ -423,7 +423,7 @@ internal static unsafe void Erf(double* values, double* result, int size)
         //---------------------------------------------------------------------
         internal static unsafe void Erfc(double* values, double* result, int size)
         {
-            if (_isDotNetCore && _erfNative)
+            if (s_isDotNetCore && s_erfNative)
                 NativeMethods.gaussian_error_function_complementary_vector(values, result, size);
             else
             {
diff --git a/source/gfoidl.Stochastics/Statistics/Sample.Calculations.cs b/source/gfoidl.Stochastics/Statistics/Sample.Calculations.cs
index 546a37b..2ca98d5 100644
--- a/source/gfoidl.Stochastics/Statistics/Sample.Calculations.cs
+++ b/source/gfoidl.Stochastics/Statistics/Sample.Calculations.cs
@@ -1,5 +1,6 @@
 ﻿using System;
 using System.Threading.Tasks;
+using gfoidl.Stochastics.Native;
 
 namespace gfoidl.Stochastics.Statistics
 {
@@ -8,11 +9,36 @@ partial class Sample
         private static ParallelOptions GetParallelOptions()
             => new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount };
         //---------------------------------------------------------------------
+        /// <summary>
+        /// Calculates the statistics for <see cref="Sample" />.
+        /// </summary>
+        /// <remarks>
+        /// The statistical properties of <see cref="Sample" /> are lazy-evaluated.
+        /// With this method these properties are instantly evalualted / calculated.
+        /// </remarks>
+        public void CalculateStats()
+        {
+            // is threadsafe, because from shared state is just read
+            Task<double> medianTask = Task.Run(() => this.CalculateMedian());
+
+            if (Gpu.IsAvailable && (Gpu.IsUseOfGpuForced || this.Count > SampleThresholds.ThresholdForGpu))
+                Gpu.CalculateSampleStats(this);
+            else
+            {
+                this.CalculateAverageAndVarianceCore();
+                this.GetMinMax();
+                this.CalculateDelta();
+                this.CalculateSkewnessAndKurtosis();
+            }
+
+            medianTask.GetAwaiter().GetResult();
+        }
+        //---------------------------------------------------------------------
         private double CalculateMedian()
         {
             int n = this.SortedValues.Count;
 
-            if (n % 2 == 0)
+            if ((n & (2 - 1)) == 0)     // n % 2 == 0
                 return (_sortedValues[(n >> 1) - 1] + _sortedValues[n >> 1]) * 0.5;
             else
                 // this is correct, but n is an int, so the next line is 
@@ -25,7 +51,7 @@ private double CalculateMedian()
         private double CalculateSampleVariance() => this.VarianceCore / (this.Count - 1d);
         //---------------------------------------------------------------------
         private double _varianceCore = double.NaN;
-        private double VarianceCore
+        internal double VarianceCore
         {
             get
             {
@@ -34,6 +60,7 @@ private double VarianceCore
 
                 return _varianceCore;
             }
+            set => _varianceCore = value;
         }
     }
 }
diff --git a/source/gfoidl.Stochastics/Statistics/Sample.cs b/source/gfoidl.Stochastics/Statistics/Sample.cs
index 766f3f7..c26b3cf 100644
--- a/source/gfoidl.Stochastics/Statistics/Sample.cs
+++ b/source/gfoidl.Stochastics/Statistics/Sample.cs
@@ -160,6 +160,7 @@ public double Delta
 
                 return _delta;
             }
+            internal set => _delta = value;
         }
         //---------------------------------------------------------------------
         /// <summary>
@@ -261,6 +262,7 @@ public double Skewness
 
                 return _skewness;
             }
+            internal set => _skewness = value;
         }
         //---------------------------------------------------------------------
         private double _kurtosis = double.NaN;
@@ -282,6 +284,7 @@ public double Kurtosis
 
                 return _kurtosis;
             }
+            internal set => _kurtosis = value;
         }
         //---------------------------------------------------------------------
         /// <summary>
diff --git a/source/gfoidl.Stochastics/Statistics/SampleThresholds.cs b/source/gfoidl.Stochastics/Statistics/SampleThresholds.cs
index 45ba420..1d10c21 100644
--- a/source/gfoidl.Stochastics/Statistics/SampleThresholds.cs
+++ b/source/gfoidl.Stochastics/Statistics/SampleThresholds.cs
@@ -23,5 +23,10 @@ public static class SampleThresholds
         ///  and <see cref="Sample.Max" />.
         /// </summary>
         public static int ThresholdForMinMax { get; set; } = 1_750_000;
+
+        /// <summary>
+        /// Threshold for cpu vs. gpu execution for <see cref="Sample.CalculateStats" />.
+        /// </summary>
+        public static int ThresholdForGpu { get; set; } = 5_000_000;
     }
 }
diff --git a/source/gfoidl.Stochastics/Strings.Designer.cs b/source/gfoidl.Stochastics/Strings.Designer.cs
index b5b90f6..56d7968 100644
--- a/source/gfoidl.Stochastics/Strings.Designer.cs
+++ b/source/gfoidl.Stochastics/Strings.Designer.cs
@@ -60,6 +60,15 @@ internal Strings() {
             }
         }
         
+        /// <summary>
+        ///   Looks up a localized string similar to No GPU for CUDA is available, or GFOIDL_STOCHASTICS_USE_GPU is set to &quot;0&quot; (in order to disable usage of the GPU). To use the GPU the process must be x64 and running on .NET Core..
+        /// </summary>
+        internal static string Gpu_not_available {
+            get {
+                return ResourceManager.GetString("Gpu_not_available", resourceCulture);
+            }
+        }
+        
         /// <summary>
         ///   Looks up a localized string similar to The value must be greater than 0..
         /// </summary>
diff --git a/source/gfoidl.Stochastics/Strings.resx b/source/gfoidl.Stochastics/Strings.resx
index a1af411..c9a0fcb 100644
--- a/source/gfoidl.Stochastics/Strings.resx
+++ b/source/gfoidl.Stochastics/Strings.resx
@@ -117,6 +117,9 @@
   <resheader name="writer">
     <value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
   </resheader>
+  <data name="Gpu_not_available" xml:space="preserve">
+    <value>No GPU for CUDA is available, or GFOIDL_STOCHASTICS_USE_GPU is set to "0" (in order to disable usage of the GPU). To use the GPU the process must be x64 and running on .NET Core.</value>
+  </data>
   <data name="Value_must_be_greater_than_zero" xml:space="preserve">
     <value>The value must be greater than 0.</value>
   </data>
diff --git a/source/gfoidl.Stochastics/gfoidl.Stochastics.csproj b/source/gfoidl.Stochastics/gfoidl.Stochastics.csproj
index 99fb2f5..9442c60 100644
--- a/source/gfoidl.Stochastics/gfoidl.Stochastics.csproj
+++ b/source/gfoidl.Stochastics/gfoidl.Stochastics.csproj
@@ -32,11 +32,16 @@
         <PackageReference Include="System.Runtime.Intrinsics.Experimental" Version="$(SystemRuntimeIntrinsicsExperimentalPackageVersion)" />
     </ItemGroup>
 
-    <ItemGroup>
+    <ItemGroup Label="native">
         <Content Include="..\..\native-out\gfoidl-Stochastics-Native.dll" PackagePath="runtimes/win-x64/native" Visible="false" CopyToOutputDirectory="PreserveNewest" />
         <Content Include="..\..\native-out\libgfoidl-Stochastics-Native.so" PackagePath="runtimes/linux-x64/native" Visible="false" CopyToOutputDirectory="PreserveNewest" />
     </ItemGroup>
 
+    <ItemGroup Label="gpu">
+        <Content Include="..\..\native-out\gfoidl-Stochastics-gpu.dll" PackagePath="runtimes/win-x64/native" Visible="false" CopyToOutputDirectory="PreserveNewest" />
+        <Content Include="..\..\native-out\libgfoidl-Stochastics-gpu.so" PackagePath="runtimes/linux-x64/native" Visible="false" CopyToOutputDirectory="PreserveNewest" />
+    </ItemGroup>
+
     <ItemGroup>
         <Compile Update="Statistics\Sample.*.cs" DependentUpon="Sample.cs" />
     </ItemGroup>
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj
new file mode 100644
index 0000000..692f45d
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj
@@ -0,0 +1,93 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{1C7AA347-76CE-4C7F-A468-B46ADDD93215}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>gfoidlStochasticsGpuwin64TestsConsole</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\source\gfoidl.Stochastics.Gpu-linux-64;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalIncludeDirectories>..\..\source\gfoidl.Stochastics.Gpu-linux-64;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\source\gfoidl.Stochastics.Gpu-win-64\gfoidl.Stochastics.Gpu-win-64.vcxproj">
+      <Project>{60d3cec5-e8de-45e4-8834-86c9940d942a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="main.cpp" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj.filters b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj.filters
new file mode 100644
index 0000000..a91ad0e
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/gfoidl.Stochastics.Gpu-win-64.Tests.Console.vcxproj.filters
@@ -0,0 +1,22 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="main.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/main.cpp b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/main.cpp
new file mode 100644
index 0000000..5c351c3
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests.Console/main.cpp
@@ -0,0 +1,69 @@
+//#define SMALL_SAMPLE
+//#define MEDIUM_SAMPLE
+//-----------------------------------------------------------------------------
+#include "gpu_core.h"
+#include <cassert>
+#include <iostream>
+//-----------------------------------------------------------------------------
+using std::cout;
+using std::cerr;
+using std::endl;
+//-----------------------------------------------------------------------------
+int main()
+{
+#if defined(SMALL_SAMPLE)
+    const int N          = 3;
+    double sample[3]     = {1, 2, 3};
+    double avgExpected   = 2;
+    double deltaExpected = 2.0 / 3;
+#elif defined(MEDIUM_SAMPLE)
+    const int N          = 20;
+    double sample[]      = {0, 3, 4, 1, 2, 3, 0, 2, 1, 3, 2, 0, 2, 2, 3, 2, 5, 2, 3, 999};
+    double avgExpected   = 51.95;
+    double deltaExpected = 94.705;
+#else
+    const int N          = 2000000;
+    double* sample       = new double[N];
+    double avgExpected   = 0;
+    double deltaExpected = 0;
+    
+    for (int i = 0; i < N; ++i)
+    {
+        double t  = (double)rand() / RAND_MAX;
+        sample[i] = t;
+        avgExpected += t;
+    }
+
+    avgExpected /= N;
+
+    for (int i = 0; i < N; ++i)
+        deltaExpected += abs(sample[i] - avgExpected);
+
+    deltaExpected /= N;
+#endif
+
+    SampleStats sampleStats;
+    int errorCode = gpu_sample_calc_stats(sample, N, &sampleStats);
+
+    cout << endl;
+    cout << "CUDA errorcode: " << errorCode << endl;
+
+    if (errorCode != 0)
+    {
+        const char* msg = gpu_get_error_string(errorCode);
+        cerr << msg << endl;
+        return 1;
+    }
+
+    cout << endl;
+    cout << "avg" << endl;
+    cout << "expected: " << avgExpected << endl;
+    cout << "actual:   " << sampleStats.Mean << endl;
+    assert(abs(avgExpected - sampleStats.Mean) < 1e-3);
+
+    cout << endl;
+    cout << "delta" << endl;
+    cout << "expected: " << deltaExpected << endl;
+    cout << "actual:   " << sampleStats.Delta << endl;
+    assert(abs(deltaExpected - sampleStats.Delta) < 1e-3);
+}
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.cpp b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.cpp
new file mode 100644
index 0000000..5868fc2
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.cpp
@@ -0,0 +1,23 @@
+#include "stdafx.h"
+#include "TestHelper.h"
+#include "gpu_core.h"
+//-----------------------------------------------------------------------------
+using namespace Microsoft::VisualStudio::CppUnitTestFramework;
+//-----------------------------------------------------------------------------
+void TestHelper::FailIfError(const int errorCode)
+{
+    if (errorCode == 0) return;
+
+    const char* errorMsg = gpu_get_error_string(errorCode);
+    wchar_t* msg         = TestHelper::ToWchar(errorMsg);
+    Assert::Fail(msg);
+}
+//-----------------------------------------------------------------------------
+wchar_t* TestHelper::ToWchar(const char* c)
+{
+    const size_t strLen = strlen(c) + 1;
+    wchar_t* wc         = new wchar_t[strLen];
+    mbstowcs(wc, c, strLen);
+
+    return wc;
+}
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.h b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.h
new file mode 100644
index 0000000..2234388
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/TestHelper.h
@@ -0,0 +1,10 @@
+#pragma once
+//-----------------------------------------------------------------------------
+class TestHelper
+{
+public:
+    static void FailIfError(const int errorCode);
+
+private:
+    static wchar_t* ToWchar(const char* c);
+};
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj
new file mode 100644
index 0000000..7591514
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <VCProjectVersion>15.0</VCProjectVersion>
+    <ProjectGuid>{90DA6C71-5A2B-4D26-9EA4-B6BF60A7B7E3}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>gfoidlStochasticsGpuwin64Tests</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
+    <ProjectSubType>NativeUnitTestProject</ProjectSubType>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseOfMfc>false</UseOfMfc>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+    <UseOfMfc>false</UseOfMfc>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\source\gfoidl.Stochastics.Gpu-linux-64;$(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <UseFullPaths>true</UseFullPaths>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <AdditionalLibraryDirectories>$(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>..\..\source\gfoidl.Stochastics.Gpu-linux-64;$(VCInstallDir)UnitTest\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <UseFullPaths>true</UseFullPaths>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(VCInstallDir)UnitTest\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="stdafx.h" />
+    <ClInclude Include="targetver.h" />
+    <ClInclude Include="TestHelper.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="stdafx.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="gpu_core.cpp" />
+    <ClCompile Include="TestHelper.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\..\source\gfoidl.Stochastics.Gpu-win-64\gfoidl.Stochastics.Gpu-win-64.vcxproj">
+      <Project>{60d3cec5-e8de-45e4-8834-86c9940d942a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj.filters b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj.filters
new file mode 100644
index 0000000..8836c10
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gfoidl.Stochastics.Gpu-win-64.Tests.vcxproj.filters
@@ -0,0 +1,39 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="stdafx.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="targetver.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="TestHelper.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="stdafx.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="gpu_core.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="TestHelper.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gpu_core.cpp b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gpu_core.cpp
new file mode 100644
index 0000000..5ee0e22
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/gpu_core.cpp
@@ -0,0 +1,67 @@
+#include "stdafx.h"
+#include "TestHelper.h"
+#include "gpu_core.h"
+//-----------------------------------------------------------------------------
+using namespace Microsoft::VisualStudio::CppUnitTestFramework;
+//-----------------------------------------------------------------------------
+namespace gfoidlStochasticsGpuwin64Tests
+{
+    TEST_CLASS(gpu_core_Tests)
+    {
+    private:
+        static double _sample[];
+
+    public:
+        TEST_METHOD(gpu_available___true)
+        {
+            bool actual = gpu_available();
+
+            Assert::IsTrue(actual);
+        }
+        //---------------------------------------------------------------------
+        TEST_METHOD(gpu_sample_calc_mean___OK)
+        {
+            const int N     = 1000000;
+            double* sample  = new double[N];
+            double expected = 0.0;
+
+            for (int i = 0; i < N; ++i)
+            {
+                double t  = (double)rand() / RAND_MAX;
+                sample[i] = t;
+                expected += t;
+            }
+            expected /= N;
+
+            SampleStats sampleStats;
+            int errorCode = gpu_sample_calc_stats(sample, N, &sampleStats);
+
+            TestHelper::FailIfError(errorCode);
+
+            Assert::AreEqual(expected, sampleStats.Mean, 1e-3);
+
+            delete[] sample;
+        }
+        //---------------------------------------------------------------------
+        TEST_METHOD(gpu_sample_calc_stats___OK)
+        {
+            SampleStats sampleStats;
+            int errorCode = gpu_sample_calc_stats(_sample, 20, &sampleStats);
+
+            TestHelper::FailIfError(errorCode);
+
+            // Expected values calculated with gnuplot 5.0 patchlevel 1
+            double standardDeviation       = sqrt(sampleStats.VarianceCore / 20);
+            double sampleStandardDeviation = sqrt(sampleStats.VarianceCore / (20 - 1));
+
+            Assert::AreEqual(51.9500 , sampleStats.Mean       , 1e-3);
+            Assert::AreEqual(217.2718, standardDeviation      , 1e-3);
+            Assert::AreEqual(222.9162, sampleStandardDeviation, 1e-3);
+            Assert::AreEqual(94.7050 , sampleStats.Delta      , 1e-3);
+            Assert::AreEqual(4.1293  , sampleStats.Skewness   , 1e-3);
+            Assert::AreEqual(18.0514 , sampleStats.Kurtosis   , 1e-3);
+        }
+    };
+    //-------------------------------------------------------------------------
+    double gpu_core_Tests::_sample[] = {0, 3, 4, 1, 2, 3, 0, 2, 1, 3, 2, 0, 2, 2, 3, 2, 5, 2, 3, 999};
+}
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.cpp b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.cpp
new file mode 100644
index 0000000..b43eab4
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.cpp
@@ -0,0 +1,8 @@
+// stdafx.cpp : source file that includes just the standard includes
+// gfoidl.Stochastics.Gpu-win-64.Tests.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.h b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.h
new file mode 100644
index 0000000..43280fc
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/stdafx.h
@@ -0,0 +1,13 @@
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+
+#include "targetver.h"
+
+// Headers for CppUnitTest
+#include "CppUnitTest.h"
+
+// TODO: reference additional headers your program requires here
diff --git a/tests/gfoidl.Stochastics.Gpu-win-64.Tests/targetver.h b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/targetver.h
new file mode 100644
index 0000000..87c0086
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Gpu-win-64.Tests/targetver.h
@@ -0,0 +1,8 @@
+#pragma once
+
+// Including SDKDDKVer.h defines the highest available Windows platform.
+
+// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
+// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
+
+#include <SDKDDKVer.h>
diff --git a/tests/gfoidl.Stochastics.Tests/Native/GpuTests/CalculateSampleStats.cs b/tests/gfoidl.Stochastics.Tests/Native/GpuTests/CalculateSampleStats.cs
new file mode 100644
index 0000000..3be1602
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Tests/Native/GpuTests/CalculateSampleStats.cs
@@ -0,0 +1,33 @@
+﻿using gfoidl.Stochastics.Native;
+using gfoidl.Stochastics.Statistics;
+using NUnit.Framework;
+
+namespace gfoidl.Stochastics.Tests.Native.GpuTests
+{
+    [TestFixture, Explicit("GPU CUDA must be available")]
+    public class CalculateSampleStats
+    {
+        [Test]
+        public void Values_given___correct_stats()
+        {
+            Assume.That(Gpu.IsAvailable, "GPU is not available");
+
+            double[] values = { 0, 3, 4, 1, 2, 3, 0, 2, 1, 3, 2, 0, 2, 2, 3, 2, 5, 2, 3, 999 };
+
+            var sample = new Sample(values);
+
+            Gpu.CalculateSampleStats(sample);
+
+            Assert.Multiple(() =>
+            {
+                // Expected values calculated with gnuplot 5.0 patchlevel 1
+                Assert.AreEqual(51.9500 , sample.Mean                   , 1e-3, nameof(sample.Mean));
+                Assert.AreEqual(217.2718, sample.StandardDeviation      , 1e-3, nameof(sample.StandardDeviation));
+                Assert.AreEqual(222.9162, sample.SampleStandardDeviation, 1e-3, nameof(sample.SampleStandardDeviation));
+                Assert.AreEqual(4.1293  , sample.Skewness               , 1e-3, nameof(sample.Skewness));
+                Assert.AreEqual(18.0514 , sample.Kurtosis               , 1e-3, nameof(sample.Kurtosis));
+                Assert.AreEqual(94.7050 , sample.Delta                  , 1e-3, nameof(sample.Delta));
+            });
+        }
+    }
+}
diff --git a/tests/gfoidl.Stochastics.Tests/Native/GpuTests/IsAvailable.cs b/tests/gfoidl.Stochastics.Tests/Native/GpuTests/IsAvailable.cs
new file mode 100644
index 0000000..3d74c34
--- /dev/null
+++ b/tests/gfoidl.Stochastics.Tests/Native/GpuTests/IsAvailable.cs
@@ -0,0 +1,26 @@
+﻿using System;
+using gfoidl.Stochastics.Native;
+using NUnit.Framework;
+
+namespace gfoidl.Stochastics.Tests.Native.GpuTests
+{
+    [TestFixture, Explicit("Mutual exclusive tests")]
+    public class IsAvailable
+    {
+        [Test]
+        public void Env_set_to_0___false()
+        {
+            Environment.SetEnvironmentVariable(Gpu.EnvVariableName, "0");
+
+            Assert.IsFalse(Gpu.IsAvailable);
+        }
+        //---------------------------------------------------------------------
+        [Test]
+        public void Env_set_to_force___true()
+        {
+            Environment.SetEnvironmentVariable(Gpu.EnvVariableName, "force");
+
+            Assert.IsTrue(Gpu.IsAvailable);
+        }
+    }
+}