Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions buildTensorflow.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
// Check whether GPU is accessible or not
bool gpu = false;

#include "types/tensor.h"
#include "overloads/tensor.h"
#include "operations/operations_Impl.h"
#include "layers/dense.h"
#include "optims/sgd.h"
#include "data/celsius2faranheit.h"
11 changes: 11 additions & 0 deletions buildTensorflowGpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Check whether GPU is accessible or not
bool gpu = true;

#include "gpu/defn.h" // Includes GPU Kernel Code Defination for Forward pass
#include "types/tensor.h"
#include "gpu/impl.h" // Includes GPU Kernel Code Implementation
#include "overloads/tensor.h"
#include "operations/operations_Impl.h"
#include "layers/dense.h"
#include "optims/sgd.h"
#include "data/celsius2faranheit.h"
56 changes: 56 additions & 0 deletions data/celsius2faranheit.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
This file defines the Celsius To Faranheit DataLoader. It's input is variables containing the
celsius numbers and the targets are the corresponding faranheit numbers.

The way to use this dataset is as follows:

Celsius2Faranheit<float,float> dataloader;
dataloader.create(10); // Creates 10 training examples

for(auto i: dataloader.data) {
auto inp = j.first;
auto tar = j.second;

// And then use this above data in your model for training or inference
}

Note that the data won't be outputted in tensors. It will simply be of the data type the user
signifies in the dataloader defination. In the above case the input and targets are both floats.

*/

#include "data/dataloader.h"
#include <stdlib.h>

#ifndef __C2F_DATASET_INCLUDED__
#define __C2F_DATASET_INCLUDED__

template<typename I, typename T>
class Celsius2Faranheit: public DataLoader<I,T> {

private:
int MAX_CELSIUS = 10;

// Helper function to convert celsius to faranheit
T toFaranheit(I input) {
return (9*input)/5 + 32;
}

public:

// Adds a training example into the dataset
void add(I input, T target) {
this->data.push_back(make_pair(input,target));
}

// Populates the dataset with the number of examples specified by the user.
void create(int num_examples) {
for(int i=0; i< num_examples;i++) {
I input = rand() % MAX_CELSIUS + 1; // random int value between 1 and MAX_CELSIUS
T target = toFaranheit(input);
add(input,target);
}
}
};

#endif
23 changes: 23 additions & 0 deletions data/dataloader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
This file defines the base class of each Dataset in the project. The data is stored in a simple
vector and each object in the vector is a pari signifying input and target (ground truth).
*/

#include<iostream>
#include<vector>

#ifndef __DATALOADER_INCLUDED__
#define __DATALOADER_INCLUDED__

template<typename I, typename T>
class DataLoader {

public:
// This variable contains all the data of the dataset
vector<pair<I,T>> data;

// This function perfroms the operation that populates the "data" variable.
virtual void add(I input, T target) = 0;
};

#endif
1 change: 1 addition & 0 deletions gpu/defn.h
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#include "gpu/dot/defn.h"
7 changes: 4 additions & 3 deletions gpu/dot.h → gpu/dot/defn.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#ifndef __GPU_DOT_INCLUDED__
#define __GPU_DOT_INCLUDED__
#include "utils/common.h"

#ifndef __GPU_DOT_DEFN_INCLUDED__
#define __GPU_DOT_DEFN_INCLUDED__

template<typename T>
struct Matrix;
Expand All @@ -8,4 +10,3 @@ template<typename T>
void dotGPU(vector<T> &res, const Matrix<T>* lhs, const Matrix<T> &rhs, int start, int startRes);

#endif

29 changes: 20 additions & 9 deletions gpu/dot.cu → gpu/dot/impl.cuh
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
#include "utils/common.h"
#include "types/matrix.h"

#ifndef __GPU_DOT_IMPL_INCLUDED__
#define __GPU_DOT_IMPL_INCLUDED__

// TODO need to refactor this to different files and
// figure out a way to link it for the GPU build
template<typename T>
__global__ void mm(T* a, T* b, T* c, T width) {
__global__ void mm(T* a, T* b, T* c, T width, T second) {

int x = blockIdx.x; // block id
int y = threadIdx.x; // thread id
T temp = 0;
for(int i = 0;i< width;i++) {
temp += a[x*width + i]*b[i*width+ y];
temp += a[x*width + i]*b[i*second+ y];
}

c[x*width + y] = temp;
c[x*second + y] = temp;
}

template<typename T>
Expand All @@ -27,8 +30,8 @@ void dotGPU(vector<T> &res, const Matrix<T> *lhs, const Matrix<T> &rhs, int star

// Copy to CUDA memory

T* h_A = lhs->val.data();
T* h_B = rhs.val.data();
const T* h_A = lhs->val.data();
const T* h_B = rhs.val.data();
T* h_C = res.data();

T *d_a, *d_b, *d_c;
Expand All @@ -40,8 +43,16 @@ void dotGPU(vector<T> &res, const Matrix<T> *lhs, const Matrix<T> &rhs, int star
cudaMemcpy((void *)d_a, h_A + start, sizeof(T)*row1*col1, cudaMemcpyHostToDevice);
cudaMemcpy((void *)d_b, h_B, sizeof(T)*row2*col2, cudaMemcpyHostToDevice);

mm<T><<<row1,col2>>>(d_a,d_b,d_c,col1);
mm<T><<<row1,col2>>>(d_a,d_b,d_c,col1,col2); // non blocking function

// Copy back from cuda memory
cudaMemcpy(h_C+startRes, (void **)d_c, sizeof(T)*row1*col2, cudaMemcpyDeviceToHost);
cudaMemcpy(h_C+startRes, (void **)d_c, sizeof(T)*row1*col2, cudaMemcpyDeviceToHost); // waits for kernel to get over

// Clean Up
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
}

#endif

1 change: 1 addition & 0 deletions gpu/impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#include "gpu/dot/impl.cuh"
3 changes: 3 additions & 0 deletions layers/dense.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ class Dense{
if(init == GLOROT) {
return utils::glorotInit<T>(fan_in, fan_out);
}

// Default return zero vector
return vector<T>(fan_in*fan_out,0);
}

public:
Expand Down
Loading