-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlinear.py
More file actions
128 lines (102 loc) · 3.74 KB
/
linear.py
File metadata and controls
128 lines (102 loc) · 3.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import copy
import sys
from typing import Tuple
import cupy as cpy
from optimizer import Optimizer
class Linear:
"""Linear layer."""
def __init__(self, in_features_size: int, out_features_size: int, bias: bool = True) -> None:
"""Initialize.
Args:
in_features_size: size of each input sample.
out_features_size: size of each output sample.
bias: if set to False layer will not learn additive bias. Defaults to True.
"""
self.in_features_size = in_features_size
# in general w is defined as [out_features_size, in_features_size] however used the opp.
self.w = cpy.zeros([in_features_size, out_features_size])
if bias:
self.b = cpy.zeros([out_features_size])
else:
self.b = None
self.cache = dict(input=None)
self.set_parameters()
self.optimizer_w = None
self.optimizer_b = None
def set_parameters(self) -> None:
"""Set parameters."""
stdv = 1.0 / cpy.sqrt(self.in_features_size)
self.w = cpy.random.uniform(-stdv, stdv, self.w.shape)
if self.b is not None:
self.b = cpy.random.uniform(-stdv, stdv, self.b.shape)
def forward(self, x: cpy.ndarray) -> cpy.ndarray:
"""Forward propagation.
Args:
x: input array.
Returns:
computed linear layer output.
"""
y = cpy.dot(x, self.w)
if self.b is not None:
y += self.b
self.cache = dict(input=x)
return y
def backward(self, grad: cpy.ndarray) -> cpy.ndarray:
"""Backward propagation.
Args:
grad: represents the gradient w.r.t. the output. Defaults to None.
Returns:
the gradients w.r.t. the input.
"""
input = self.cache["input"]
if len(grad.shape) == 3:
output_grad = cpy.dot(grad, self.w.T)
self.grad_w = cpy.sum(cpy.matmul(input.transpose(0, 2, 1), grad), axis=0)
if self.b is not None:
self.grad_b = cpy.sum(grad, axis=(0, 1))
return output_grad
else:
output_grad = cpy.dot(grad, self.w.T)
self.grad_w = cpy.dot(input.T, grad)
if self.b is not None:
self.grad_b = grad.sum(axis=0)
return output_grad
def set_optimizer(self, optimizer: Optimizer) -> None:
"""Set optimizer.
Args:
optimizer: optimizer.
"""
self.optimizer_w = copy.deepcopy(optimizer)
self.optimizer_b = copy.deepcopy(optimizer)
def update_weights(self) -> None:
"""Update weights based on the calculated gradients."""
self.w = self.optimizer_w.update(self.grad_w, self.w)
if self.b is not None:
self.b = self.optimizer_b.update(self.grad_b, self.b)
def __call__(self, x: cpy.ndarray) -> cpy.ndarray:
"""Defining __call__ method to enable function like call.
Args:
x: input array.
Returns:
computed linear output.
"""
return self.forward(x)
def set_parameters_externally(self, w: cpy.ndarray, b: cpy.ndarray) -> None:
"""Set parameters externally. used for testing.
Args:
w: weight.
b: bias.
"""
self.w = w
self.b = b
def get_grads(self) -> Tuple[cpy.ndarray, cpy.ndarray]:
"""Access gradients.used for testing.
Returns:
returns gradients
"""
return self.grad_w, self.grad_b
def get_weights(self):
return {"w": cpy.asnumpy(self.w), "b": cpy.asnumpy(self.b)}
def set_weights(self, weights):
self.w = cpy.array(weights["w"])
self.b = cpy.array(weights["b"])