pyprobml/scripts/iris_logreg_loss_surface.py at master · alxsoares/pyprobml · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# Plot 2d NLL loss surface for  binary logistic regression with 1 feature
# Loosely based on
# https://peterroelants.github.io/posts/neural-network-implementation-part02/

import numpy as np
import matplotlib.pyplot as plt
import os
figdir = os.path.join(os.environ["PYPROBML"], "figures")
def save_fig(fname): plt.savefig(os.path.join(figdir, fname))

from mpl_toolkits.mplot3d import axes3d, Axes3D

np.random.seed(0)

import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn import datasets


iris = datasets.load_iris()

X = iris["data"][:, 3:]  # petal width
y = (iris["target"] == 2).astype(np.int)  # 1 if Iris-Virginica, else 0

log_reg = LogisticRegression(solver="lbfgs",  fit_intercept=True, penalty='none')
log_reg.fit(X, y)

w_mle = log_reg.coef_[0][0] # 12.947270212450366
b_mle = log_reg.intercept_[0]  # -21.125250539711022
ypred = log_reg.predict_proba(X)

# Add column of 1s to end of X to capture bias term
N = X.shape[0]
ones = np.ones((N,1))
X1 = np.hstack((X, ones))

log_reg1 = LogisticRegression(solver="lbfgs", fit_intercept=False,  penalty='none')
log_reg1.fit(X1, y)

w_mle1 = log_reg1.coef_[0][0]
b_mle1 = log_reg1.coef_[0][1]
ypred1 = log_reg1.predict_proba(X1)

assert np.isclose(w_mle, w_mle1)
assert np.isclose(b_mle, b_mle1)
assert np.isclose(ypred[0], ypred1[0]).all()


# Define the logistic function
def logistic(z):
    return 1. / (1 + np.exp(-z))

# Define the prediction function y = 1 / (1 + numpy.exp(-x*w))
def predict_prob(x, w):
    return logistic(x.dot(w.T))


# Define the NLL loss function (y=probability, t=binary target)
def loss(y, t):
    return - np.mean(
        np.multiply(t, np.log(y)) + np.multiply((1-t), np.log(1-y)))

params =np.asmatrix([[w_mle, b_mle]])
ypred2 = predict_prob(X1,params)
#assert np.isclose(ypred1[:,1], ypred2).all()

# We compute the loss on a grid of (w, b) values.
# We use for loops for simplicity.
ngrid = 50
sf = 0.5
ws = np.linspace(-sf*w_mle, +sf*w_mle, ngrid)
bs = np.linspace(-sf*b_mle, +sf*b_mle, ngrid)
grid_w, grid_b = np.meshgrid(ws, bs)
loss_grid = np.zeros((ngrid, ngrid))
for i in range(ngrid):
    for j in range(ngrid):
        params = np.asmatrix([grid_w[i,j], grid_b[i,j]])
        p = predict_prob(X1, params)
        loss_grid[i,j] = loss(p, y)


# Plot the loss function surface
plt.figure()
plt.contourf(grid_w, grid_b, loss_grid, 20)
cbar = plt.colorbar()
cbar.ax.set_ylabel('NLL', fontsize=12)
plt.xlabel('$w$', fontsize=12)
plt.ylabel('$b$', fontsize=12)
plt.title('Loss function surface')
save_fig('logregIrisLossHeatmap.pdf')
plt.show()

fig,ax = plt.subplots()
CS = plt.contour(grid_w, grid_b, loss_grid,  cmap='jet')
#plt.plot(b_mle, w_mle, 'x') # Plot centered at MLE
save_fig('logregIrisLossContours.pdf')
plt.show()

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
surf = ax.plot_surface(grid_w, grid_b, loss_grid)
save_fig('logregIrisLossSurf.pdf')
plt.show()