GPU code addition. #23

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

karanchahal wants to merge 7 commits into develop from feature/gpu

buildTensorflow.h

-Original file line number
+Diff line change
@@ -1,4 +1,9 @@
+    // Check whether GPU is accessible or not
+    bool gpu = false;
     #include "types/tensor.h"
     #include "overloads/tensor.h"
     #include "operations/operations_Impl.h"
     #include "layers/dense.h"
+    #include "optims/sgd.h"
+    #include "data/celsius2faranheit.h"

buildTensorflowGpu.h

-Original file line number
+Diff line change
@@ -0,0 +1,11 @@
+    // Check whether GPU is accessible or not
+    bool gpu = true;
+    #include "gpu/defn.h" // Includes GPU Kernel Code Defination for Forward pass
+    #include "types/tensor.h"
+    #include "gpu/impl.h" // Includes GPU Kernel Code Implementation
+    #include "overloads/tensor.h"
+    #include "operations/operations_Impl.h"
+    #include "layers/dense.h"
+    #include "optims/sgd.h"
+    #include "data/celsius2faranheit.h"

data/celsius2faranheit.h

-Original file line number
+Diff line change
@@ -0,0 +1,56 @@
+    /*
+        This file defines the Celsius To Faranheit DataLoader. It's input is variables containing the
+        celsius numbers and the targets are the corresponding faranheit numbers.
+        The way to use this dataset is as follows:
+        Celsius2Faranheit<float,float> dataloader;
+        dataloader.create(10); // Creates 10 training examples
+        for(auto i: dataloader.data) {
+            auto inp = j.first;
+            auto tar = j.second;
+            // And then use this above data in your model for training or inference
+        }
+        Note that the data won't be outputted in tensors. It will simply be of the data type the user
+        signifies in the dataloader defination. In the above case the input and targets are both floats.
+    */
+    #include "data/dataloader.h"
+    #include <stdlib.h>
+    #ifndef __C2F_DATASET_INCLUDED__
+    #define __C2F_DATASET_INCLUDED__
+    template<typename I, typename T>
+    class Celsius2Faranheit: public DataLoader<I,T> {
+        private:
+        int MAX_CELSIUS = 10;
+        // Helper function to convert celsius to faranheit
+        T toFaranheit(I input) {
+            return (9*input)/5 + 32;
+        }
+        public:
+        // Adds a training example into the dataset
+        void add(I input, T target) {
+            this->data.push_back(make_pair(input,target));
+        }
+        // Populates the dataset with the number of examples specified by the user.
+        void create(int num_examples) {
+            for(int i=0; i< num_examples;i++) {
+                I input = rand() % MAX_CELSIUS + 1; // random int value between 1 and MAX_CELSIUS
+                T target = toFaranheit(input);
+                add(input,target);
+            }
+        }
+    };
+    #endif

data/dataloader.h

-Original file line number
+Diff line change
@@ -0,0 +1,23 @@
+    /*
+        This file defines the base class of each Dataset in the project. The data is stored in a simple
+        vector and each object in the vector is a pari signifying input and target (ground truth).
+    */
+    #include<iostream>
+    #include<vector>
+    #ifndef __DATALOADER_INCLUDED__
+    #define __DATALOADER_INCLUDED__
+    template<typename I, typename T>
+    class DataLoader {
+        public:
+        // This variable contains all the data of the dataset
+        vector<pair<I,T>> data;
+        // This function perfroms the operation that populates the "data" variable.
+        virtual void add(I input, T target) = 0;
+    };
+    #endif

gpu/defn.h

Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		#include "gpu/dot/defn.h"

gpu/dot.h → gpu/dot/defn.h

-Original file line number
+Diff line change
@@ -1,5 +1,7 @@
-    #ifndef __GPU_DOT_INCLUDED__
-    #define __GPU_DOT_INCLUDED__
+    #include "utils/common.h"
+    #ifndef __GPU_DOT_DEFN_INCLUDED__
+    #define __GPU_DOT_DEFN_INCLUDED__
     template<typename T>
     struct Matrix;
@@ Expand All / @@ -8,4 +10,3 @@ template<typename T> @@
     void dotGPU(vector<T> &res, const Matrix<T>* lhs, const Matrix<T> &rhs, int start, int startRes);
     #endif

gpu/dot.cu → gpu/dot/impl.cuh

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,17 +1,20 @@
  
    #include "utils/common.h"

    #include "types/matrix.h"

    #ifndef __GPU_DOT_IMPL_INCLUDED__   

    #define __GPU_DOT_IMPL_INCLUDED__  

    // TODO need to refactor this to different files and 

    // figure out a way to link it for the GPU build 

    template<typename T>

    __global__ void mm(T* a, T* b, T* c, T width) {

    __global__ void mm(T* a, T* b, T* c, T width, T second) {

        int x = blockIdx.x; // block id

        int y = threadIdx.x; // thread id

        T temp = 0;

        for(int i = 0;i< width;i++) {

            temp += a[x*width + i]*b[i*width+ y];

            temp += a[x*width + i]*b[i*second+ y];

        }

        c[x*width + y] = temp;

        c[x*second + y] = temp;

    }

    template<typename T>

    @@ -27,8 +30,8 @@ void dotGPU(vector<T> &res, const Matrix<T> *lhs, const Matrix<T> &rhs, int star
  
        // Copy to CUDA memory

        T* h_A = lhs->val.data();

        T* h_B = rhs.val.data();

        const T* h_A = lhs->val.data();

        const T* h_B = rhs.val.data();

        T* h_C = res.data();

        T *d_a, *d_b, *d_c;

    @@ -40,8 +43,16 @@ void dotGPU(vector<T> &res, const Matrix<T> *lhs, const Matrix<T> &rhs, int star
  
        cudaMemcpy((void *)d_a, h_A + start, sizeof(T)*row1*col1, cudaMemcpyHostToDevice);

        cudaMemcpy((void *)d_b, h_B, sizeof(T)*row2*col2, cudaMemcpyHostToDevice);

        mm<T><<<row1,col2>>>(d_a,d_b,d_c,col1);

        mm<T><<<row1,col2>>>(d_a,d_b,d_c,col1,col2); // non blocking function

        // Copy back from cuda memory

        cudaMemcpy(h_C+startRes, (void **)d_c, sizeof(T)*row1*col2, cudaMemcpyDeviceToHost);

        cudaMemcpy(h_C+startRes, (void **)d_c, sizeof(T)*row1*col2, cudaMemcpyDeviceToHost); // waits for kernel to get over

        // Clean Up 

        cudaFree(d_a);

        cudaFree(d_b);

        cudaFree(d_c);

    }

    #endif

gpu/impl.h

Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		#include "gpu/dot/impl.cuh"

layers/dense.h

-Original file line number
+Diff line change
@@ Expand Up / @@ -37,6 +37,9 @@ class Dense{ @@
             if(init == GLOROT) {
                 return utils::glorotInit<T>(fan_in, fan_out);
             }
+            // Default return zero vector
+            return vector<T>(fan_in*fan_out,0);
         }
         public:
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

GPU code addition. #23

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

GPU code addition. #23

Are you sure you want to change the base?

Uh oh!

GPU code addition. #23

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!