Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions operations/averageOperation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
This file defines the AverageOperation class which represents the average of
multiple tensors.
*/

#include "operations/operation.h"

#ifndef __OP_AVG_INCLUDED__
#define __OP_AVG_INCLUDED__

template <typename T>
class AverageOperation : public Operation<T> {
public:

vector<Tensor<T*>> tensors;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the pull request!! This is a prelim review.

The representation of the Average Tensor will need to be changed I think. Generally how the network takes in a tensor is a single tensor with size [batch_size, input_dim]. SO the average operation should take as input t1 which would be a tensor and another int variable called axis specifying what dimension to average along.

Hence the API would look like

loss = losses::mse(y,gt)
averageLoss = tensorOps::average(loss,axis=0)
// and then continue from here

This would be inline how pytorch, tf and numpy handle some operation along a list of objects. Hence, I think it would be better to adhere with their API.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I missed the big picture here. Thanks for pointing this out.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem, looking forward to your updates :)


AverageOperation(vector<Tensor<T>*>& tensors) {
this->tensors = tensors;
}

void backward(Matrix<T> grad);

Tensor<T> forwardDeprecated();

Tensor<T>* forward();

};

#endif
64 changes: 64 additions & 0 deletions operations/averageOperation_Impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
This file contains the implementation of the forward and backward pass of
the average operation.
*/

#include "operations/averageOperation.h"

#ifndef __OP_AVG_IMPL_INCLUDED__
#define __OP_AVG_IMPL_INCLUDED__

/*
Backpropogation of the average operation. The average operation distributes the gradient. So it
effectively just transfers the gradient coming in to the various input sources after scaling
it by the number of inputs.
*/
template <typename T>
void AverageOperation<T>::backward(Matrix<T> grad) {
if (tensors.size() == 0) { // To avoid division by zero
return;
}

auto scaledGrad = grad / tensors.size();
for(auto t : tensors) {
t->backward(scaledGrad);
}
}

/*
Forward Propogation of the average operation. Returns a tensor

TODO: Remove: See average operation impl for more details
*/
template <typename T>
Tensor<T> AverageOperation<T>::forwardDeprecated() {
return NULL;
}

/*
Forward Propogation of the operation. Return pointer to the tensor.
*/
template <typename T>
Tensor<T>* AverageOperation<T>::forward() {
if (tensors.size() == 0) { // To avoid division by zero
return NULL;
}

Matrix<T> sum = NULL;

for(auto t : tensors) {
if(sum == NULL) {
sum = t->val;
}
else {
sum += t->val;
}
}

sum /= tensors.size();

this->t3 = new Tensor<T>(sum, this);
return this->t3;
}

#endif
28 changes: 28 additions & 0 deletions operations/subtractOperation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
This file defines the SubtractOperation class which represents the subtraction of
two tensors.
*/

#include "operations/operation.h"

#ifndef __OP_SUBTRACT_INCLUDED__
#define __OP_SUBTRACT_INCLUDED__

template <typename T>
class SubtractOperation : public Operation<T> {
public:

SubtractOperation(Tensor<T> *t1, Tensor<T> *t2) {
this->t1 = t1;
this->t2 = t2;
}

void backward(Matrix<T> grad);

Tensor<T>* forward();

Tensor<T> forwardDeprecated();

};

#endif
31 changes: 31 additions & 0 deletions operations/subtractOperation_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
This file contains the implementation of the forward and backward pass of
the subtract operation.
*/

#include "operations/subtractOperation.h"

#ifndef __OP_SUBTRACT_IMPL_INCLUDED__
#define __OP_SUBTRACT_IMPL_INCLUDED__

template <typename T>
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comments on top of the function. Can remove comments inside function

void SubtractOperation<T>::backward(Matrix<T> grad) {
// Distributing case with negative: where one gradients is backproped
// as is, and the other is backproped with a negative sign
this->t1->backward(grad);
this->t2->backward(-1 * grad);
}

template <typename T>
Tensor<T>* SubtractOperation<T>::forward() {
this->t3 = new Tensor<T>(this->t1->val - this->t2->val, this);
return this->t3;
}

template <typename T>
Tensor<T> SubtractOperation<T>::forwardDeprecated() {
this->t3 = new Tensor<T>(this->t1->val - this->t2->val, this);
return *this->t3;
}

#endif
64 changes: 61 additions & 3 deletions overloads/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,42 @@ namespace tensorOps {
return one->frontOp->forward();
}

// Addition with Scalar
// Addition with Scalar - Scalar first
template<typename T>
Tensor<T>* add(T v, Tensor<T>* two) {
auto one = new Tensor<T>(vector<T>(two->val.val.size(), v), two->val.shape);
return add(one,two);
}

// Addition with Scalar - Vector first
template<typename T>
Tensor<T>* add(Tensor<T>* two, T v) {
auto one = new Tensor<T>(vector<T>(two->val.val.size(), v), two->val.shape);
return add(one,two);
}

// Subtraction
template<typename T>
Tensor<T>* subtract(Tensor<T>* one, Tensor<T>* two) {
one->frontOp = new SubtractOperation<T>(one, two);
two->frontOp = one->frontOp;
return one->frontOp->forward();
}

// Subtraction with Scalar - Scalar first
template<typename T>
Tensor<T>* subtract(T v, Tensor<T>* two) {
auto one = new Tensor<T>(vector<T>(two->val.val.size(),v),two->val.shape);
return subtract(one,two);
}

// Subtraction with Scalar - Vector first
template<typename T>
Tensor<T>* subtract(Tensor<T>* two, T v) {
auto one = new Tensor<T>(vector<T>(two->val.val.size(),v),two->val.shape);
return subtract(one,two);
}

// Divide
template<typename T>
Tensor<T>* divide(Tensor<T>* one, Tensor<T>* two) {
Expand All @@ -32,13 +61,20 @@ namespace tensorOps {
return one->frontOp->forward();
}

// Divide Scalar
// Divide Scalar - Scalar first
template<typename T>
Tensor<T>* divide(T v, Tensor<T>* two) {
auto one = new Tensor<T>(vector<T>(two->val.val.size(), v), two->val.shape);
return divide(one,two);
}

// Divide Scalar - Vector first
template<typename T>
Tensor<T>* divide(Tensor<T>* two, T v) {
auto one = new Tensor<T>(vector<T>(two->val.val.size(), v), two->val.shape);
return divide(one,two);
}

// Multiply
template<typename T>
Tensor<T>* multiply(Tensor<T>* one, Tensor<T>* two) {
Expand All @@ -47,13 +83,20 @@ namespace tensorOps {
return one->frontOp->forward();
}

// Multiply with scalar
// Multiply with scalar - Scalar first
template<typename T>
Tensor<T>* multiply(T v, Tensor<T>* two) {
auto one = new Tensor<T>(vector<T>(two->val.val.size(), v), two->val.shape);
return multiply(one,two);
}

// Multiply with scalar - Vector first
template<typename T>
Tensor<T>* multiply(Tensor<T>* two, T v) {
auto one = new Tensor<T>(vector<T>(two->val.val.size(), v), two->val.shape);
return multiply(one,two);
}

// Dot Product
template<typename T>
Tensor<T>* dot(Tensor<T>* one, Tensor<T>* two) {
Expand All @@ -76,6 +119,21 @@ namespace tensorOps {
return one->frontOp->forward();
}

// Average
template<typename T>
Tensor<T>* average(vector<Tensor<T>*>& tensors) {
if (tensors.size() == 0) { // To avoid division by zero. Should we do this?
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't think this check will be needed if we adhere to the requested API

return NULL;
}

Operation<T> op = new AverageOperation<T>(tensors);
for (auto t : tensors) {
t->frontOp = op;
}

return tensors[0]->frontOp->forward();
}

};

#endif
23 changes: 23 additions & 0 deletions overloads/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,29 @@ vector<T> operator + (T a, const vector<T> &b) {
return arr;
}

// Vector Subtraction
template<typename T>
vector<T> operator - (vector<T> &a, const vector<T> &b) {
assert("Tensors are not of the same size !" && a.size() == b.size());
vector<T> arr;
for(int i = 0;i<a.size();i++) {
T prod = a[i]-b[i];
arr.push_back(prod);
}
return arr;
}

// Scalar Subtraction
template<typename T>
vector<T> operator - (T a, const vector<T> &b) {
vector<T> arr;
for(int i = 0;i<b.size();i++) {
T prod = a-b[i];
arr.push_back(prod);
}
return arr;
}

// Vector Divide
template<typename T>
vector<T> operator / (vector<T> &a, const vector<T> &b) {
Expand Down
9 changes: 9 additions & 0 deletions types/matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,15 @@ struct Matrix{
return Matrix(res, resShape);
}

// Performs elementwise subtraction
Matrix<T> operator - (const Matrix<T> &rhs) {
assert("Shapes aren't compatible for subtraction !" &&
verifyShapeForElementwiseOperation(this->shape, rhs.shape));

auto res = this->val - rhs.val;
auto resShape = this->shape;
return Matrix(res, resShape);
}

// Performs elementwise division
Matrix<T> operator / (const Matrix<T> &rhs) {
Expand Down
2 changes: 2 additions & 0 deletions types/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@

#include "operations/operation.h"
#include "operations/addOperation.h"
#include "operations/subtractOperation.h"
#include "operations/multiplyOperation.h"
#include "operations/divideOperation.h"
#include "operations/exponentOperation.h"
#include "operations/dotOperation.h"
#include "operations/sigmoidOperation.h"
#include "operations/averageOperation.h"

#ifndef __TENSOR_FLOAT_INCLUDED__
#define __TENSOR_FLOAT_INCLUDED__
Expand Down