-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLogisticRegression.py
More file actions
72 lines (57 loc) · 1.53 KB
/
LogisticRegression.py
File metadata and controls
72 lines (57 loc) · 1.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import math
import pandas
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial import distance
from sklearn.metrics import roc_auc_score
def Proba(x, w):
return 1 / (1 + math.exp(-w[0] * x[0] - w[1] * x[1]))
def GetSum(X, y, w, num):
s = 0
for j in range(len(X)):
s += y[j] * X[j][num] * (1 - 1 / (1 + math.exp(-y[j] * (w[0] * X[j][0] + w[1] * X[j][1]))))
return s
def Grad(X, y, w=[0, 0], k=0.1, C=1, max_steps=10000, e=10**-5):
l = len(X)
i = 0
while i < max_steps:
w0 = w[0] + k * (1 / l) * GetSum(X, y, w, 0) - k * C * w[0]
w1 = w[1] + k * (1 / l) * GetSum(X, y, w, 1) - k * C * w[1]
w_old = [0, 0]
w_old[0] = w[0]
w_old[1] = w[1]
w[0] = w0
w[1] = w1
if distance.euclidean(w_old, w) <= e:
break
return w
def GetScores(X, w):
scores = []
for x in X:
scores.append(Proba(x, w))
return scores
data = pandas.read_csv(
'Data/data-logistic.csv',
index_col=False,
header=None
)
y = np.array(data[0])
X = np.array(data.loc[:, 1:])
x1 = data[1]
x2 = data[2]
for i in range(len(y)):
s = 'ro'
if y[i] == 1:
s = 'go'
plt.plot(x1[i], x2[i], s)
fout = open('Answers/Logistic.txt', 'w')
w = Grad(X, y, C=0)
y_scores = GetScores(X, w)
print(roc_auc_score(y, y_scores), file=fout, end=' ')
print(w, y_scores[:5])
w = Grad(X, y, C=10)
y_scores = GetScores(X, w)
print(roc_auc_score(y, y_scores), file=fout, end=' ')
print(w, y_scores[:5])
fout.close()
plt.show()