-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathlogistic_regression.py
More file actions
120 lines (84 loc) · 3.45 KB
/
logistic_regression.py
File metadata and controls
120 lines (84 loc) · 3.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def entropy_loss(y_pred, y_true):
y_pred = np.clip(y_pred, 1e-7, 1.0-1e-7)
return -(np.sum((y_true * np.log(y_pred) + (1-y_true)*np.log(1-y_pred))) / y_pred.shape[1])
def confusion_matrix(y_pred, y_true):
tp, tn, fp, fn = 0, 0, 0, 0
y_pred = list(y_pred.reshape(-1, 1))
y_true = list(y_true.reshape(-1, 1))
for pred, true in zip(y_pred, y_true):
if pred < 0.5:
if true == 0:
tn += 1
else:
fn += 1
else:
if true == 1:
tp += 1
else:
fp += 1
return tp, tn, fp, fn
class LogisticRegression(object):
def __init__(self, learning_rate=0.001, activation_function=sigmoid, loss_function=entropy_loss, iterations=10):
self.learning_rate = learning_rate
self.activation_function = activation_function
self.loss_function = loss_function
self.iterations = iterations
def fit(self, x, y, validation_set_ratio=0.10):
x = x.T # Cast to (# features, # examples)
y = y.T # Cast to (label, # examples)
train_set_limit = x.shape[1] - int(validation_set_ratio*x.shape[1])
val_x = x[:, train_set_limit:]
val_y = y[:, train_set_limit:]
x = x[:, :train_set_limit]
y = y[:, :train_set_limit]
self._w = np.random.randn(x.shape[0], 1) * 0.01
self._b = 0
self._val_loss = []
self._train_loss = []
self._val_acc = []
self._train_acc = []
# Number of examples
m = x.shape[1]
for iteration in range(self.iterations):
# Propagate - calculate W and b gradients
z = np.dot(self._w.T, x) + self._b
a = self.activation_function(z)
loss = entropy_loss(a, y)
dz = a - y
dw = np.dot(x, dz.T) / m
db = np.sum(dz) / m
if (iteration + 1) % 10 == 0:
tp, tn, fp, fn = confusion_matrix(z, y)
acc = (tp+tn)/(tp+tn+fp+fn)
val_x_predictions = self.predict(val_x.T)
tp, tn, fp, fn = confusion_matrix(val_x_predictions, val_y)
v_acc = (tp+tn)/(tp+tn+fp+fn)
v_loss = entropy_loss(val_x_predictions, val_y)
self._train_acc.append(acc)
self._train_loss.append(loss)
self._val_acc.append(v_acc)
self._val_loss.append(v_loss)
print("Iteration {} - Training loss: {} Validation loss: {}".format(iteration + 1, loss, v_loss))
# Optimize
self._w = self._w - self.learning_rate * dw
self._b = self._b - self.learning_rate * db
def plot_performance(self):
import matplotlib.pyplot as plt
fig = plt.figure()
plt.subplot(2, 1, 1)
plt.plot(self._val_loss, label="Validation set loss")
plt.plot(self._train_loss, label="Training set loss")
plt.legend()
plt.subplot(2, 1, 2)
plt.plot(self._val_acc, label="Validation set accuracy")
plt.plot(self._train_acc, label="Training set accuracy")
plt.xlabel("Iterations (in hundreds)")
plt.legend()
plt.show()
def predict(self, x):
# Cast to (# features, # examples)
x = x.T
return self.activation_function(np.dot(self._w.T, x) + self._b)