diff --git a/Cpp_main_OpenMP.cpp b/Cpp_main_OpenMP.cpp
index 1180b30..820fb91 100644
--- a/Cpp_main_OpenMP.cpp
+++ b/Cpp_main_OpenMP.cpp
@@ -147,7 +147,7 @@ int main()
   //--------------------------------//
 
   // Grid for x
-  const int nx              = 15000;
+  const int nx              = 1500;
   const float xmin          = 0.1;
   const float xmax          = 4.0;
 
diff --git a/Julia_CUDAnative.jl b/Julia_CUDAnative.jl
new file mode 100644
index 0000000..7088439
--- /dev/null
+++ b/Julia_CUDAnative.jl
@@ -0,0 +1,184 @@
+using Distributions
+using Dates
+using CUDAnative, CuArrays, CUDAdrv
+using Crayons
+
+println(Crayon(foreground = :yellow), "\nYour CUDA-device: " * CUDAdrv.name(CuDevice(0)), Crayon(foreground = :white))
+
+struct params
+    ne::Int64
+    nx::Int64
+    T::Int64
+    ssigma::Float64
+    bbeta::Float64
+    w::Float64
+    r::Float64
+end
+
+# Function that computes value function, given vector of state variables
+function value(params::params, age::Int64, xgrid, egrid, P, V)
+    
+    ix = (blockIdx().x-1) * blockDim().x + threadIdx().x
+    ie = threadIdx().y
+    
+    ne      = params.ne
+    nx      = params.nx
+    T       = params.T
+    ssigma  = params.ssigma
+    bbeta   = params.bbeta
+    w       = params.w
+    r       = params.r
+
+    VV      = -10.0^3;
+    ixpopt  = 0;
+
+    # @inbounds disables bounds checking
+    @inbounds for ixp = 1:nx
+        expected = 0.0;
+        if(age < T)
+            for iep = 1:ne
+                expected = expected + P[ie, iep]*V[age + 1, ixp, iep];
+            end
+        end
+
+        cons  = (1 + r)*xgrid[ix] + egrid[ie]*w - xgrid[ixp];
+
+        # The ^ operator seems to broken on the GPU. One can always use the CUDAnative internals instead.
+        utility = (CUDAnative.pow.(cons,1-ssigma))/(1-ssigma) + bbeta*expected;
+
+        if(cons <= 0)
+            utility = -10.0^(5);
+        end
+
+        if(utility >= VV)
+            VV = utility;
+            ixpopt = ixp;
+        end
+
+        utility = 0.0;
+    end
+
+    V[age, ix, ie] = VV
+
+    return nothing
+end
+
+function main()    
+
+    # Grid for x
+    nx = 1500;
+    xmin = 0.1;
+    xmax = 4.0;
+
+    # Grid for e: parameters for Tauchen
+    ne = 15;
+    ssigma_eps = 0.02058;
+    llambda_eps = 0.99;
+    m = 1.5;
+
+    # Utility function
+    ssigma = 2;
+    bbeta = 0.97;
+    T = 10;
+
+    # Prices
+    r = 0.07;
+    w = 5;
+
+    # The following arrays are CUDA-arrays. 
+
+    # Initialize the grid for X
+    xgrid = CuArray{Float64,1}(zeros(nx))
+
+    # Initialize the grid for E and the transition probability matrix
+    egrid = CuArray{Float64,1}(zeros(ne))
+    P = CuArray{Float64,2}(zeros(ne, ne))
+
+    # Initialize value function V
+    V = CuArray{Float64,3}(zeros(T, nx, ne))
+
+    #--------------------------------#
+    #         Grid creation          #
+    #--------------------------------#
+
+    # The following operations on the CUDA-arrays trigger the warning "Performing scalar operations on GPU arrays: This is very slow..."
+    # One could as well create regular arrays first and transfer them to the GPU later. However, this doesn't change the runtime significantly.
+
+    # Grid for capital (x)
+    size = nx;
+    xstep = (xmax - xmin) /(size - 1);
+    for i = 1:nx
+    xgrid[i] = xmin + (i-1)*xstep;
+    end
+
+    # Grid for productivity (e) with Tauchen (1986)
+    size = ne;
+    ssigma_y = sqrt((ssigma_eps^2) / (1 - (llambda_eps^2)));
+    estep = 2*ssigma_y*m / (size-1);
+    for i = 1:ne
+    egrid[i] = (-m*sqrt((ssigma_eps^2) / (1 - (llambda_eps^2))) + (i-1)*estep);
+    end
+
+    # Transition probability matrix (P) Tauchen (1986)
+    mm = egrid[2] - egrid[1];
+    for j = 1:ne
+    for k = 1:ne
+        if(k == 1)
+        P[j, k] = cdf(Normal(), (egrid[k] - llambda_eps*egrid[j] + (mm/2))/ssigma_eps);
+        elseif(k == ne)
+        P[j, k] = 1 - cdf(Normal(), (egrid[k] - llambda_eps*egrid[j] - (mm/2))/ssigma_eps);
+        else
+        P[j, k] = cdf(Normal(), (egrid[k] - llambda_eps*egrid[j] + (mm/2))/ssigma_eps) - cdf(Normal(), (egrid[k] - llambda_eps*egrid[j] - (mm/2))/ssigma_eps);
+        end
+    end
+    end
+
+    # Exponential of the grid e
+    for i = 1:ne
+    egrid[i] = exp(egrid[i]);
+    end
+
+    #--------------------------------#
+    #     Life-cycle computation     #
+    #--------------------------------#
+
+    print(" \n")
+    print("Life cycle computation: \n")
+    print(" \n")
+
+    start = Dates.unix2datetime(time())
+    currentState = params(ne,nx,T,ssigma,bbeta,w,r)
+    @inbounds for age = T:-1:1
+        # I need to sync for correct time measurement. Caution: Just @sync refers to Base.@sync
+        CuArrays.@sync @cuda blocks=50 threads=(30, 15) value(currentState, age, xgrid, egrid, P, V)
+        finish = convert(Int, Dates.value(Dates.unix2datetime(time())- start))/1000;
+        # One should keep in mind, that printing in some environments is pretty slow in Julia. 
+        # Without printing, on my machine Julia runs roughly as fast as C++-CUDA (ca. 1-2% slower)
+        print("Age: ", age, ". Time: ", finish, " seconds. \n")
+    end
+    # transfer V back to the CPU to get a fair comparison with C++-CUDA (one can print the results without transfering them back manually, however I don't know what happens under the hood)
+    results = Array(V)
+    print("\n")
+    finish = convert(Int, Dates.value(Dates.unix2datetime(time())- start))/1000;
+    print("TOTAL ELAPSED TIME: ", finish, " seconds. \n")
+
+    #---------------------#
+    #     Some checks     #
+    #---------------------#
+
+    print(" \n")
+    print(" - - - - - - - - - - - - - - - - - - - - - \n")
+    print(" \n")
+    print("The first entries of the value function: \n")
+    print(" \n")
+
+    # I print the first entries of the value function, to check
+    for i = 1:3
+        print(round(results[1, 1, i], digits=5), "\n")
+    end
+end
+
+println(Crayon(foreground = :red), "\nWarmup call -- slow!", Crayon(foreground = :white))
+main()
+println(Crayon(foreground = :green), "\nProper call -- correct time measurement.", Crayon(foreground = :white))
+main()
\ No newline at end of file
diff --git a/Makefile b/Makefile
index e374282..133ece6 100644
--- a/Makefile
+++ b/Makefile
@@ -68,6 +68,9 @@ julia_parallel:
 julia_pmap:
 	julia -p$(CORES) Julia_main_pmap.jl
 
+julia_CUDA:
+	julia -p$(CORES) -O3 --color=yes Julia_CUDAnative.jl
+	
 Rcpp:
 	export OMP_NUM_THREADS=$(CORES); Rscript Rcpp_main.R;