-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathoptimizer.py
More file actions
87 lines (66 loc) · 2.27 KB
/
optimizer.py
File metadata and controls
87 lines (66 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from abc import abstractmethod
import cupy as cpy
class Optimizer:
"""Optimizer."""
@abstractmethod
def update(self, grad: cpy.ndarray, w: cpy.ndarray) -> None:
"""Update weights based on gradient.
Args:
grad: gradient.
w: weights to be updated.
"""
pass
class SGD(Optimizer):
"""Stochastic gradient descent optimizer."""
def __init__(self, learning_rate: float = 0.001) -> None:
"""Initialize.
Args:
learning_rate: learning rate. Defaults to 0.001.
"""
self.learning_rate = learning_rate
def update(self, grad: cpy.ndarray, w: cpy.ndarray) -> cpy.ndarray:
"""Update weights based on gradient.
Args:
grad: gradient.
w: weights to be updated.
Returns:
updated weights.
"""
w -= grad * self.learning_rate
return w
class Adam(Optimizer):
"""Implements Adam optimizer."""
def __init__(
self, learning_rate: float = 0.01, beta1: float = 0.9, beta2: float = 0.999, epsilon: float = 1e-8
) -> None:
"""Initialize
Args:
learning_rate: learning rate. Defaults to 0.01.
beta1: beta 1. Defaults to 0.9.
beta2: beta 2. Defaults to 0.999.
epsilon: epsilon. Defaults to 1e-8.
"""
self.m_dw, self.rms_dw = 0, 0
self.learning_rate = learning_rate
self.beta1 = beta1
self.beta2 = beta2
self.epsilon = epsilon
self.t = 1
def update(self, grad: cpy.ndarray, w: cpy.ndarray) -> cpy.ndarray:
"""Update weights based on gradient.
Args:
grad: gradient.
w: weights to be updated.
Returns:
updated weights.
"""
# momentum calc with beta 1
self.m_dw = self.beta1 * self.m_dw + (1 - self.beta1) * grad
# rms calculation with beta 2
self.rms_dw = self.beta2 * self.rms_dw + (1 - self.beta2) * (grad**2)
# bias correction
m_dw_corr = self.m_dw / (1 - (self.beta1**self.t))
rms_dw_corr = self.rms_dw / (1 - (self.beta2**self.t))
# update weights
w = w - self.learning_rate * (m_dw_corr / (cpy.sqrt(rms_dw_corr) + self.epsilon))
return w