diff --git a/buildTensorflow.h b/buildTensorflow.h index cf2cf4c..88d31a2 100644 --- a/buildTensorflow.h +++ b/buildTensorflow.h @@ -1,4 +1,9 @@ +// Check whether GPU is accessible or not +bool gpu = false; + #include "types/tensor.h" #include "overloads/tensor.h" #include "operations/operations_Impl.h" #include "layers/dense.h" +#include "optims/sgd.h" +#include "data/celsius2faranheit.h" \ No newline at end of file diff --git a/buildTensorflowGpu.h b/buildTensorflowGpu.h new file mode 100644 index 0000000..b8d69d1 --- /dev/null +++ b/buildTensorflowGpu.h @@ -0,0 +1,11 @@ +// Check whether GPU is accessible or not +bool gpu = true; + +#include "gpu/defn.h" // Includes GPU Kernel Code Defination for Forward pass +#include "types/tensor.h" +#include "gpu/impl.h" // Includes GPU Kernel Code Implementation +#include "overloads/tensor.h" +#include "operations/operations_Impl.h" +#include "layers/dense.h" +#include "optims/sgd.h" +#include "data/celsius2faranheit.h" \ No newline at end of file diff --git a/data/celsius2faranheit.h b/data/celsius2faranheit.h new file mode 100644 index 0000000..1964abb --- /dev/null +++ b/data/celsius2faranheit.h @@ -0,0 +1,56 @@ +/* + This file defines the Celsius To Faranheit DataLoader. It's input is variables containing the + celsius numbers and the targets are the corresponding faranheit numbers. + + The way to use this dataset is as follows: + + Celsius2Faranheit dataloader; + dataloader.create(10); // Creates 10 training examples + + for(auto i: dataloader.data) { + auto inp = j.first; + auto tar = j.second; + + // And then use this above data in your model for training or inference + } + + Note that the data won't be outputted in tensors. It will simply be of the data type the user + signifies in the dataloader defination. In the above case the input and targets are both floats. + +*/ + +#include "data/dataloader.h" +#include + +#ifndef __C2F_DATASET_INCLUDED__ +#define __C2F_DATASET_INCLUDED__ + +template +class Celsius2Faranheit: public DataLoader { + + private: + int MAX_CELSIUS = 10; + + // Helper function to convert celsius to faranheit + T toFaranheit(I input) { + return (9*input)/5 + 32; + } + + public: + + // Adds a training example into the dataset + void add(I input, T target) { + this->data.push_back(make_pair(input,target)); + } + + // Populates the dataset with the number of examples specified by the user. + void create(int num_examples) { + for(int i=0; i< num_examples;i++) { + I input = rand() % MAX_CELSIUS + 1; // random int value between 1 and MAX_CELSIUS + T target = toFaranheit(input); + add(input,target); + } + } +}; + +#endif diff --git a/data/dataloader.h b/data/dataloader.h new file mode 100644 index 0000000..22d9a8e --- /dev/null +++ b/data/dataloader.h @@ -0,0 +1,23 @@ +/* + This file defines the base class of each Dataset in the project. The data is stored in a simple + vector and each object in the vector is a pari signifying input and target (ground truth). +*/ + +#include +#include + +#ifndef __DATALOADER_INCLUDED__ +#define __DATALOADER_INCLUDED__ + +template +class DataLoader { + + public: + // This variable contains all the data of the dataset + vector> data; + + // This function perfroms the operation that populates the "data" variable. + virtual void add(I input, T target) = 0; +}; + +#endif diff --git a/gpu/defn.h b/gpu/defn.h new file mode 100644 index 0000000..2d2d39f --- /dev/null +++ b/gpu/defn.h @@ -0,0 +1 @@ +#include "gpu/dot/defn.h" \ No newline at end of file diff --git a/gpu/dot.h b/gpu/dot/defn.h similarity index 62% rename from gpu/dot.h rename to gpu/dot/defn.h index a16db39..9da72d8 100644 --- a/gpu/dot.h +++ b/gpu/dot/defn.h @@ -1,5 +1,7 @@ -#ifndef __GPU_DOT_INCLUDED__ -#define __GPU_DOT_INCLUDED__ +#include "utils/common.h" + +#ifndef __GPU_DOT_DEFN_INCLUDED__ +#define __GPU_DOT_DEFN_INCLUDED__ template struct Matrix; @@ -8,4 +10,3 @@ template void dotGPU(vector &res, const Matrix* lhs, const Matrix &rhs, int start, int startRes); #endif - diff --git a/gpu/dot.cu b/gpu/dot/impl.cuh similarity index 63% rename from gpu/dot.cu rename to gpu/dot/impl.cuh index 6b9bfc8..2f7a7f7 100644 --- a/gpu/dot.cu +++ b/gpu/dot/impl.cuh @@ -1,17 +1,20 @@ -#include "utils/common.h" -#include "types/matrix.h" +#ifndef __GPU_DOT_IMPL_INCLUDED__ +#define __GPU_DOT_IMPL_INCLUDED__ + +// TODO need to refactor this to different files and +// figure out a way to link it for the GPU build template -__global__ void mm(T* a, T* b, T* c, T width) { +__global__ void mm(T* a, T* b, T* c, T width, T second) { int x = blockIdx.x; // block id int y = threadIdx.x; // thread id T temp = 0; for(int i = 0;i< width;i++) { - temp += a[x*width + i]*b[i*width+ y]; + temp += a[x*width + i]*b[i*second+ y]; } - c[x*width + y] = temp; + c[x*second + y] = temp; } template @@ -27,8 +30,8 @@ void dotGPU(vector &res, const Matrix *lhs, const Matrix &rhs, int star // Copy to CUDA memory - T* h_A = lhs->val.data(); - T* h_B = rhs.val.data(); + const T* h_A = lhs->val.data(); + const T* h_B = rhs.val.data(); T* h_C = res.data(); T *d_a, *d_b, *d_c; @@ -40,8 +43,16 @@ void dotGPU(vector &res, const Matrix *lhs, const Matrix &rhs, int star cudaMemcpy((void *)d_a, h_A + start, sizeof(T)*row1*col1, cudaMemcpyHostToDevice); cudaMemcpy((void *)d_b, h_B, sizeof(T)*row2*col2, cudaMemcpyHostToDevice); - mm<<>>(d_a,d_b,d_c,col1); + mm<<>>(d_a,d_b,d_c,col1,col2); // non blocking function // Copy back from cuda memory - cudaMemcpy(h_C+startRes, (void **)d_c, sizeof(T)*row1*col2, cudaMemcpyDeviceToHost); + cudaMemcpy(h_C+startRes, (void **)d_c, sizeof(T)*row1*col2, cudaMemcpyDeviceToHost); // waits for kernel to get over + + // Clean Up + cudaFree(d_a); + cudaFree(d_b); + cudaFree(d_c); } + +#endif + diff --git a/gpu/impl.h b/gpu/impl.h new file mode 100644 index 0000000..44e3668 --- /dev/null +++ b/gpu/impl.h @@ -0,0 +1 @@ +#include "gpu/dot/impl.cuh" \ No newline at end of file diff --git a/layers/dense.h b/layers/dense.h index e3b35a1..b323175 100644 --- a/layers/dense.h +++ b/layers/dense.h @@ -37,6 +37,9 @@ class Dense{ if(init == GLOROT) { return utils::glorotInit(fan_in, fan_out); } + + // Default return zero vector + return vector(fan_in*fan_out,0); } public: diff --git a/main.cpp b/main.cpp index 54d6f67..b67c9e9 100644 --- a/main.cpp +++ b/main.cpp @@ -1,171 +1,54 @@ #include "buildTensorflow.h" -void oldSigmoidTest() { - - Tensor w0({2},{1}); - Tensor x0({-1},{1}); - - Tensor w1({-3},{1}); - Tensor x1({-2},{1}); - - Tensor w3({-3},{1}); - - Tensor a = w0*x0; - Tensor b = w1*x1; - Tensor c = a + b; - Tensor d = w3+c; - Tensor e({-1}, {1}); - Tensor f = d*e; - Tensor g = f.exp(); - Tensor h({1}, {1}); - Tensor i = g + h; - Tensor j({1}, {1}); - Tensor k = j/i; - - vector vsl = {1}; - vector sh = {1}; - auto grad = Matrix(vsl,sh); - k.backward(grad); - - - cout< w0({2},{1}); - Tensor x0({-1},{1}); - - Tensor w1({-3},{1}); - Tensor x1({-2},{1}); - - Tensor w3({-3},{1}); - Tensor e({-1}, {1}); - Tensor h({1}, {1}); - Tensor j({1}, {1}); - - Tensor a = e*(w0*x0 + w1*x1 + w3); - Tensor k = j/(a.exp() + h); - - vector vsl = {1}; - vector sh = {1}; - auto grad = Matrix(vsl,sh); - k.backward(grad); - - cout<* w0 = new Tensor({2},{1}); - Tensor* x0= new Tensor({-1},{1}); - - Tensor* w1= new Tensor({-3},{1}); - Tensor* x1= new Tensor({-2},{1}); - - Tensor* w3= new Tensor({-3},{1}); - - auto a = tensorOps::multiply(w0,x0); - auto b = tensorOps::multiply(w1,x1); - auto c = tensorOps::add(a,b); - auto d = tensorOps::add(w3,c); - - Tensor* e = new Tensor({-1}, {1}); - auto f = tensorOps::multiply(d,e); - - auto g = tensorOps::exp(f); // exponent - - Tensor* h = new Tensor({1}, {1}); - auto i = tensorOps::add(g,h); - - Tensor* j = new Tensor({1}, {1}); - auto k = tensorOps::divide(j,i); - - auto grad = Matrix({1},{1}); - k->backward(grad); - - - cout<grad<grad<grad<grad<grad<* w0 = new Tensor({2},{1}); - Tensor* x0= new Tensor({-1},{1}); - - Tensor* w1= new Tensor({-3},{1}); - Tensor* x1= new Tensor({-2},{1}); - - Tensor* w3= new Tensor({-3},{1}); - - auto a = tensorOps::multiply(w0,x0); - auto b = tensorOps::multiply(w1,x1); - auto c = tensorOps::add(a,b); - auto d = tensorOps::add(w3,c); - - auto k = tensorOps::sigmoid(d); - k->backward(); - - cout<grad<grad<grad<grad<grad< a({2},{1}); - Tensor b({4},{1}); - - auto loss = a+b; - cout< dataset; + dataset.create(5); - oldSigmoidTest(); - newSigmoidTest(); - sigmoidPointerTest(); - updatedSigmoidtest(); + // Create Model + Dense fc1(1,1,NO_ACTIVATION); + // Initialise Optimiser + SGD sgd(0.01); + + // Train + cout<<"Training started"<({i.first}, {1,1}); + auto tar = new Tensor({i.second}, {1,1}); + + // Forward Prop + auto out = fc1.forward(inp); + + // Get Loss + auto l = new Tensor({-1}, {1,1}); + auto k = tensorOps::multiply(l,tar); + auto loss = tensorOps::add(out,k); // error in loss + auto finalLoss = tensorOps::power(loss,(float)2); + + // Compute backProp + finalLoss->backward(); + // cout<val<({cel}, {1,1}); + auto out1 = fc1.forward(test); + + cout<<"The conversion of "<val< dataset; + dataset.create(5); + + // Create Model + Dense fc1(1,1,NO_ACTIVATION); + + // Initialise Optimiser + SGD sgd(0.01); + + // Train + cout<<"Training started"<({i.first}, {1,1}); + auto tar = new Tensor({i.second}, {1,1}); + + // Forward Prop + auto out = fc1.forward(inp); + + // Get Loss + auto l = new Tensor({-1}, {1,1}); + auto k = tensorOps::multiply(l,tar); + auto loss = tensorOps::add(out,k); // error in loss + auto finalLoss = tensorOps::power(loss,(float)2); + + // Compute backProp + finalLoss->backward(); + // cout<val<({cel}, {1,1}); + auto out1 = fc1.forward(test); + + cout<<"The conversion of "<val<* forward() = 0; + + ~Operation() { + delete t1; + delete t2; + } }; diff --git a/operations/operations_Impl.h b/operations/operations_Impl.h index 7ed6af9..bc788c6 100644 --- a/operations/operations_Impl.h +++ b/operations/operations_Impl.h @@ -1,5 +1,6 @@ /* - This file includes all the operator implementations + This file includes all the operator implementations. Be sure to include your operation + implementation here for the project to be able to use your operation. */ #ifndef __OP_IMPL_INCLUDED__ @@ -11,6 +12,7 @@ #include "operations/dotOperation_Impl.h" #include "operations/exponentOperation_Impl.h" #include "operations/sigmoidOperation_Impl.h" +#include "operations/powerOperation_Impl.h" #endif diff --git a/operations/powerOperation.h b/operations/powerOperation.h new file mode 100644 index 0000000..05d32c1 --- /dev/null +++ b/operations/powerOperation.h @@ -0,0 +1,28 @@ +/* + This file defines the PowerOperation class which represents the + exponentiation of a tensor with a scalar. +*/ + +#include "operations/operation.h" + +#ifndef __OP_POWER_INCLUDED__ +#define __OP_POWER_INCLUDED__ + +template +class PowerOperation : public Operation { + public: + T pow; + + PowerOperation(Tensor *t1, T pow) { + this->t1 = t1; + this->pow = pow; + } + void backward(Matrix grad); + + Tensor forwardDeprecated(); + + Tensor* forward(); +}; + +#endif + diff --git a/operations/powerOperation_Impl.h b/operations/powerOperation_Impl.h new file mode 100644 index 0000000..f16f05e --- /dev/null +++ b/operations/powerOperation_Impl.h @@ -0,0 +1,44 @@ +/* + This file contains the implementation of the forward and backward pass of + the power operation. +*/ + +#include "operations/powerOperation.h" + +#ifndef __OP_IMPL_POWER_INCLUDED__ +#define __OP_IMPL_POWER_INCLUDED__ + +/* + Backpropogation of the power operation. + + F = x*pow is forward propogation + The gradient would be as follows: + 1. dF/dx = pow*x^(pow-1) +*/ +template +void PowerOperation::backward(Matrix grad) { + this->t1->backward(grad * (pow * matrixOps::power(this->t1->val,pow-1))); +} + +/* + Forward Propogation of the operation. Returns a tensor. + + TODO: Remove: See addition operation impl for more details +*/ +template +Tensor PowerOperation::forwardDeprecated() { + return NULL; +} + +/* + Forward Propogation of the operation. Return pointer to the tensor. + Forward propogation is simply y = x^(pow). +*/ +template +Tensor* PowerOperation::forward() { + this->t3 = new Tensor(matrixOps::power(this->t1->val, this->pow), this); + return this->t3; +} + +#endif + diff --git a/optims/optim.h b/optims/optim.h new file mode 100644 index 0000000..64eee83 --- /dev/null +++ b/optims/optim.h @@ -0,0 +1,37 @@ +/* + This file defines the Base Class for all Optimizers. +*/ + +#include "types/tensor.h" +#include "unordered_set" + +#ifndef __OPTIM_BASE_INCLUDED__ +#define __OPTIM_BASE_INCLUDED__ + +template +class Optimizer { + + public: + + // This variable contains all the tensors that need to be updated via the optimiser + unordered_set*> params; + + // The learning rate + T lr; + + Optimizer() { + + } + + // This function resets the gradients of the tensors in params to zero for the next forward pass + void zeroGrad() { + for(auto i : params) { + i->zeroGrad(); + } + } + + // This overloaded function specifes how one optimisation step will be performed + virtual void step(T learning_rate) {}; +}; + +#endif \ No newline at end of file diff --git a/optims/sgd.h b/optims/sgd.h new file mode 100644 index 0000000..7c48c9b --- /dev/null +++ b/optims/sgd.h @@ -0,0 +1,102 @@ +/* + This file defines the Stochastic Gradient Descent Optimiser. The Stochastic Gradient Descent + Optimizer takes the loss computed over a single training example or the averages of the loss + computed with multiple training examples and "minimises" the loss. + + By minimising, we mean it finds out all the updatable tensors that contributed towards + computing this loss. Once it has these parameters it performs an update step on each + parameter (Tensor) to tweak them into the right direction to minimise the overall loss. + + It performs this update step by this formula: + + val = val - learning_rate*gradient_of_val + + Where val is the value of the tensor and gradient_of_val is the partial gradient of the + tensor with respect to the loss. +*/ + +#include "optims/optim.h" +#include "queue" + +#ifndef __OPTIM_SGD_INCLUDED__ +#define __OPTIM_SGD_INCLUDED__ + + +template +class SGD : public Optimizer { + + public: + + SGD(T lr) { + this->params.clear(); + this->lr = lr; + } + + /* + This function does a full search through the computational graph of the Tensor x and + stores all the Tensor nodes of the graph in the params set. + + The params set represents all the tensors that need t be updated. + + As of now, a BFS style algorithm traverses through the graph to find out all the Tensor + nodes. + */ + void getParams(Tensor* x) { + + this->params.clear(); // Clear out old params. Should we do this ? + + queue*> q; + q.push(x); + + while(!q.empty()) { + + auto v = q.front(); + q.pop(); + auto op = v->backOp; + + if(op) { + + if(op->t1 != NULL && this->params.find(op->t1) == this->params.end()) { + q.push(op->t1); + this->params.insert(op->t1); + } + + if(op->t2 != NULL && this->params.find(op->t2) == this->params.end()) { + q.push(op->t2); + this->params.insert(op->t2); + } + } + } + } + + /* + This function is the function all users will use to perfrom the gradient descent update + for their model. It performs this operation in 3 phases. + 1. Gets all tensor parameters + 2. Updates all these parameters via the step function + 3. Clear's all the gradients of the parameters for the next step. + */ + void minimise(Tensor* x) { + + // Get all tensors in computational graph + getParams(x); + + // step through 1 parameter update + step(this->lr); + + // reset Gradients to zero + this->zeroGrad(); + + } + + // Performs 1 step of gradient descent. See top of the file to see definition of SGD. + void step(T learning_rate) { + + for(auto t: this->params) { + t->val = t->val - learning_rate*t->grad; + } + } + +}; + +#endif \ No newline at end of file diff --git a/overloads/matrix.h b/overloads/matrix.h index dfb5dc8..2ebe183 100644 --- a/overloads/matrix.h +++ b/overloads/matrix.h @@ -7,12 +7,20 @@ #ifndef __MATRIX_OPS_INCLUDED__ #define __MATRIX_OPS_INCLUDED__ -// Sigmoid + namespace matrixOps { + + // Sigmoid Operation template Matrix sigmoid(const Matrix &a) { return (T)1/((T)1 + (((T)-1)*a).exp()); } + + // Power Operation + template + Matrix power(Matrix &a, T pow) { + return a^pow; + } }; // Overloaded function for printing matrix: cout< operator + (const T t, const Matrix &rhs) { return Matrix(res, resShape); } +// Subtraction with a scalar +template +Matrix operator - (const T t, const Matrix &rhs) { + auto res = t-rhs.val; + auto resShape = rhs.shape; + return Matrix(res, resShape); +} + #endif \ No newline at end of file diff --git a/overloads/tensor.h b/overloads/tensor.h index f61733d..695ce0d 100644 --- a/overloads/tensor.h +++ b/overloads/tensor.h @@ -76,6 +76,13 @@ namespace tensorOps { return one->frontOp->forward(); } + // Power + template + Tensor* power(Tensor* one, T t) { + one->frontOp = new PowerOperation(one, t); + return one->frontOp->forward(); + } + }; #endif diff --git a/overloads/vector.h b/overloads/vector.h index 9aa14ba..8d8e7c0 100644 --- a/overloads/vector.h +++ b/overloads/vector.h @@ -62,6 +62,29 @@ vector operator + (T a, const vector &b) { return arr; } +// Subtraction +template +vector operator - (vector &a, const vector &b) { + assert("Tensors are not of the same size !" && a.size() == b.size()); + vector arr; + for(int i = 0;i +vector operator - (T a, const vector &b) { + vector arr; + for(int i = 0;i vector operator / (vector &a, const vector &b) { diff --git a/tests/dense.h b/tests/dense.h index 18b9f8b..4072a94 100644 --- a/tests/dense.h +++ b/tests/dense.h @@ -15,11 +15,14 @@ TEST(DENSE_LAYER_TESTS, SHAPE_CHECKS) { Tensor* x1 = new Tensor({1,2},{1,2}); // put 1 by 2 tensor auto m = fc1.forward(x1); // should work fine - Tensor* x2 = new Tensor({1},{1}); // put 1 by 2 tensor + delete m; ASSERT_DEATH({ + Tensor* x2 = new Tensor({1},{1}); // put 1 by 2 tensor + Dense fc1(2,5); // input - 2, output should be 5 auto m = fc1.forward(x2); // should give error as dot product will not be compatible ! }, "Shapes aren't compatible for dot product !"); + } /* @@ -41,4 +44,6 @@ TEST(DENSE_LAYER_TESTS, CORRECTNESS_CHECK) { auto expectedVal = matrixOps::sigmoid((x->val).dot(w) + b); ASSERT_TRUE(testUtils::isMatrixEqual(m->val, expectedVal)); + + delete m; } diff --git a/tests/main.cpp b/tests/main.cpp index cde9948..6065f3c 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -7,6 +7,7 @@ #include "tests/matrix.h" #include "tests/tensor.h" #include "tests/dense.h" +#include "tests/sgd.h" int main(int argc, char **argv) { testing::InitGoogleTest(&argc, argv); diff --git a/tests/matrix.h b/tests/matrix.h index 8aa14bd..1e4dc54 100644 --- a/tests/matrix.h +++ b/tests/matrix.h @@ -127,6 +127,28 @@ TEST( MATRIX_TESTS, MatrixOperationMultiplicationCheck) { ASSERT_TRUE(testUtils::isMatrixEqual(ans,res)); } +/* + This test tests the accuracy of the power operation between a matrix and a scalar +*/ +TEST( MATRIX_TESTS, MatrixOperationPowerCheck) { + + vector a({1,2,3}); + vector shape1({1,3}); + Matrix m1(a,shape1); + int pow = 3; + auto ans = m1^pow; // Checking barebones operation + Matrix res({1,8,27},{1,3}); + + ASSERT_TRUE(testUtils::isMatrixEqual(ans,res)); + + Matrix m2({1,2,3},{1,3}); + pow = 2; + Matrix res2({1,4,9},{1,3}); + auto ans2 = matrixOps::power(m2,pow); // Checking wrapper function + + ASSERT_TRUE(testUtils::isMatrixEqual(ans2,res2)); +} + /* This test tests the accuracy of the division operation between 2 matrices */ @@ -143,17 +165,6 @@ TEST( MATRIX_TESTS, MatrixOperationDivisionCheck) { ASSERT_TRUE(testUtils::isMatrixEqual(ans,res)); } -/* - This test tests the accuracy of the power operation between matrice and scalar -*/ -TEST( MATRIX_TESTS, MatrixOperationPowerCheck) { - vector a({1,2,3}); - vector shape1({1,3}); - Matrix m1(a,shape1); - auto ans = m1^2; - Matrix res({1,4,9},{1,3}); - ASSERT_TRUE(testUtils::isMatrixEqual(ans,res)); -} /* This test tests the accuracy of the exponent operation. diff --git a/tests/sgd.h b/tests/sgd.h new file mode 100644 index 0000000..3c40d02 --- /dev/null +++ b/tests/sgd.h @@ -0,0 +1,54 @@ +/* + This file tests the SGD Optimizer layer. +*/ + +#include +#include "optims/sgd.h" +#include "tests/utils.h" +#include "overloads/tensor.h" + +/* + Tests that the optimizer layer gets all the tensors that need to be updated. +*/ +TEST(SGD_OPTIM_TESTS, TENSOR_UPDATE_CHECK) { + Tensor* a = new Tensor({2},{1}); + Tensor* b = new Tensor({4},{1}); + auto c = tensorOps::add(a,b); + Tensor* d = new Tensor({3},{1}); + + auto e = tensorOps::multiply(c,d); + e->backward(); + + SGD sgd(0.1); + // get all paramters/tensors that need to be updated wrt to e + sgd.getParams(e); + unordered_set*> expected_res = {a,b,c,d}; + ASSERT_TRUE(sgd.params == expected_res); + + // Clean up + delete e; +} + +/* + Tests that the tensor values are updated according to gradient values and learning rate +*/ +TEST(SGD_OPTIM_TESTS, SGD_STEP_CHECK) { + Tensor* a = new Tensor({2},{1}); + Tensor* b = new Tensor({4},{1}); + auto c = tensorOps::add(a,b); + Tensor* d = new Tensor({3},{1}); + + auto e = tensorOps::multiply(c,d); + e->backward(); + + SGD sgd(1); + // get all paramters/tensors that need to be updated wrt to e + sgd.minimise(e); + + ASSERT_TRUE(a->val.val[0] == -1); // update = 2 - 1*3 + ASSERT_TRUE(b->val.val[0] == 1); // update = 4 - 1*3 + ASSERT_TRUE(d->val.val[0] == -3); // update = 3 -1*6 + + // Clean up + delete e; +} diff --git a/tests/sigmoidTests.h b/tests/sigmoidTests.h new file mode 100644 index 0000000..8d9a36a --- /dev/null +++ b/tests/sigmoidTests.h @@ -0,0 +1,155 @@ +#include "buildTensorflow.h" + +void oldSigmoidTest() { + + Tensor w0({2},{1}); + Tensor x0({-1},{1}); + + Tensor w1({-3},{1}); + Tensor x1({-2},{1}); + + Tensor w3({-3},{1}); + + Tensor a = w0*x0; + Tensor b = w1*x1; + Tensor c = a + b; + Tensor d = w3+c; + Tensor e({-1}, {1}); + Tensor f = d*e; + Tensor g = f.exp(); + Tensor h({1}, {1}); + Tensor i = g + h; + Tensor j({1}, {1}); + Tensor k = j/i; + + vector vsl = {1}; + vector sh = {1}; + auto grad = Matrix(vsl,sh); + k.backward(grad); + + + cout< w0({2},{1}); + Tensor x0({-1},{1}); + + Tensor w1({-3},{1}); + Tensor x1({-2},{1}); + + Tensor w3({-3},{1}); + Tensor e({-1}, {1}); + Tensor h({1}, {1}); + Tensor j({1}, {1}); + + Tensor a = e*(w0*x0 + w1*x1 + w3); + Tensor k = j/(a.exp() + h); + + vector vsl = {1}; + vector sh = {1}; + auto grad = Matrix(vsl,sh); + k.backward(grad); + + cout<* w0 = new Tensor({2},{1}); + Tensor* x0= new Tensor({-1},{1}); + + Tensor* w1= new Tensor({-3},{1}); + Tensor* x1= new Tensor({-2},{1}); + + Tensor* w3= new Tensor({-3},{1}); + + auto a = tensorOps::multiply(w0,x0); + auto b = tensorOps::multiply(w1,x1); + auto c = tensorOps::add(a,b); + auto d = tensorOps::add(w3,c); + + Tensor* e = new Tensor({-1}, {1}); + auto f = tensorOps::multiply(d,e); + + auto g = tensorOps::exp(f); // exponent + + Tensor* h = new Tensor({1}, {1}); + auto i = tensorOps::add(g,h); + + Tensor* j = new Tensor({1}, {1}); + auto k = tensorOps::divide(j,i); + + auto grad = Matrix({1},{1}); + k->backward(grad); + + + cout<grad<grad<grad<grad<grad<* w0 = new Tensor({2},{1}); + Tensor* x0= new Tensor({-1},{1}); + + Tensor* w1= new Tensor({-3},{1}); + Tensor* x1= new Tensor({-2},{1}); + + Tensor* w3= new Tensor({-3},{1}); + + auto a = tensorOps::multiply(w0,x0); + auto b = tensorOps::multiply(w1,x1); + auto c = tensorOps::add(a,b); + auto d = tensorOps::add(w3,c); + + auto k = tensorOps::sigmoid(d); + k->backward(); + + cout<grad<grad<grad<grad<grad< a({1,2,3,4,5,6}); vector shape1({2,4}); - Matrix m1(a,shape1); + Tensor m1(a,shape1); }, "Shape and size of vector are incompatible !"); // testing for no asserts with various dimensions that can used in nd matrix @@ -24,9 +24,9 @@ TEST( TENSOR_TESTS, TensorCreation) { vector shape1({2,3}); vector shape2({1,1,1,2,3}); vector shape3({2,3,1,1,1}); - Matrix m1(a,shape1); - m1 = Matrix(a,shape2); - m1 = Matrix(a,shape3); + Tensor m1(a,shape1); + m1 = Tensor(a,shape2); + m1 = Tensor(a,shape3); } /* @@ -42,6 +42,9 @@ TEST( TENSOR_TESTS, TensorAddOperations) { Matrix res({2,4,6,8,10},{5}); ASSERT_TRUE(testUtils::isMatrixEqual(ans->val,res)); + + // Clean up + delete ans; } @@ -53,6 +56,9 @@ TEST( TENSOR_TESTS, TensorMultiplyOperations) { Matrix res({1,4,9,16,25},{5}); ASSERT_TRUE(testUtils::isMatrixEqual(ans->val,res)); + + // Clean up + delete ans; } TEST( TENSOR_TESTS, TensorDivideOperations) { @@ -63,6 +69,9 @@ TEST( TENSOR_TESTS, TensorDivideOperations) { Matrix res({5,2,5,2,1},{5}); ASSERT_TRUE(testUtils::isMatrixEqual(ans->val,res)); + + // Clean up + delete ans; } /* @@ -90,8 +99,33 @@ TEST( TENSOR_TESTS, TensorSigmoidOperations) { Matrix resGrad({0.196611926}, {1}); ASSERT_TRUE(testUtils::isMatrixEqual(one->grad,resGrad)); // check back Propogation + + // Clean up + delete ans; } +/* + This test checks the backward pass and forward pass of the power operation. +*/ +TEST( TENSOR_TESTS, TensorPowerOperations) { + + Tensor* one = new Tensor({2,3,4},{1,3}); + float pow = 3; + auto ans = tensorOps::power(one,pow); + Matrix res({8,27,64}, {1,3}); + + ASSERT_TRUE(testUtils::isMatrixEqual(ans->val,res)); // check front Propogation + + ans->backward(); + + Matrix resGrad({12,27,48}, {1,3}); + ASSERT_TRUE(testUtils::isMatrixEqual(one->grad,resGrad)); // check back Propogation + + // Clean up + delete ans; +} + + /* Test Computational Graph by checking Pointer Values of each tensor and operation for a barebones sigmoid function @@ -211,6 +245,9 @@ TEST( TENSOR_TESTS, ComputationGraph) { ASSERT_TRUE(x1->frontOp == b->backOp); ASSERT_TRUE(x1->backOp == NULL); + // Clean up + delete k; + } /* @@ -258,4 +295,7 @@ TEST(TENSOR_TESTS, BackwardPropogation) { res = Matrix({0.196611971},{1}); ASSERT_TRUE(testUtils::isMatrixEqual(w3->grad,res)); + + // Clean up + delete k; } diff --git a/types/matrix.h b/types/matrix.h index 6c87200..74a947f 100644 --- a/types/matrix.h +++ b/types/matrix.h @@ -18,8 +18,7 @@ struct Matrix{ */ vector elemsEncounteredPerDim; - // Check whether GPU is accessible or not - bool gpu = false; + // Verifies that the shape provided and val vector provided are compatible in size bool verifyShape(const vector &val, const vector &shape) { @@ -245,6 +244,15 @@ struct Matrix{ return Matrix(res, resShape); } + // Performs elementwise subtraction + Matrix operator - (const Matrix &rhs) { + assert("Shapes aren't compatible for addition !" && + verifyShapeForElementwiseOperation(this->shape, rhs.shape)); + + auto res = this->val - rhs.val; + auto resShape = this->shape; + return Matrix(res, resShape); + } // Performs elementwise division Matrix operator / (const Matrix &rhs) { diff --git a/types/tensor.h b/types/tensor.h index bae4ec1..9c41341 100644 --- a/types/tensor.h +++ b/types/tensor.h @@ -25,26 +25,13 @@ #include "operations/exponentOperation.h" #include "operations/dotOperation.h" #include "operations/sigmoidOperation.h" +#include "operations/powerOperation.h" #ifndef __TENSOR_FLOAT_INCLUDED__ #define __TENSOR_FLOAT_INCLUDED__ template class Tensor { - private: - - /* - This function is called during the initilaisation of Tensor. It sets the value of it's gradients to zero. This is needed as - during backPropogation the same tensor can be used for different operation, hence to calculate it's partial gradients - each individual operation's gradients have to be summed up. Hence we initialise the tensor's gradients to zero. - - See constructor for it's usage. - */ - void zeroGrad() { - assert(val.shape.size() != 0 && "The value of matrix cannot be uninitialised during initialisng zeros in tensor's gradient"); - vector g(val.val.size(), 0); - this->grad = Matrix(g, val.shape); - } public: @@ -145,6 +132,19 @@ class Tensor { } } + /* + This function is called during the initilaisation of Tensor. It sets the value of it's gradients to zero. This is needed as + during backPropogation the same tensor can be used for different operation, hence to calculate it's partial gradients + each individual operation's gradients have to be summed up. Hence we initialise the tensor's gradients to zero. + + See constructor for it's usage. + */ + void zeroGrad() { + assert(val.shape.size() != 0 && "The value of matrix cannot be uninitialised during initialisng zeros in tensor's gradient"); + vector g(val.val.size(), 0); + this->grad = Matrix(g, val.shape); + } + /* From here on, we overload the operators like +, / and * to define what happens when we we add, divide and multiply tensors. We also support other operations like dot @@ -220,10 +220,14 @@ class Tensor { return this->frontOp->forwardDeprecated(); } - // Destructor + /* + Go back towards computational graph and deletes every Tensor and Op encountered + in a DFS fashion + + TODO: find better way to clear memory of all tensors and prevent memory leaks. + */ ~Tensor() { - // delete backOp; - // delete frontOp; + delete backOp; } }; diff --git a/utils/matrix.h b/utils/matrix.h index fc79d3e..1e8394e 100644 --- a/utils/matrix.h +++ b/utils/matrix.h @@ -24,6 +24,12 @@ namespace utils { return Matrix(m.val,shape); } + + template< typename T> + Matrix zerosLike(const Matrix &m) { + vector val(m.val.size(),0); + return Matrix(val,m.shape); + } } #endif \ No newline at end of file