-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMultipleRegression.py
More file actions
103 lines (79 loc) · 2.78 KB
/
MultipleRegression.py
File metadata and controls
103 lines (79 loc) · 2.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
######################################################################
# Author: Elaheh Jamali
# Username: Jamalie
# Programming Assignment 1: Regression Model
#
# Purpose: In this assignment, We will use the gradient descent algorithm
# discussed in class to solve for the coefficients. The regression model
# is able to accept an arbitrary number of input variables.
#
# Acknowledgement: Different articles were read for this.
#
######################################################################
import numpy as np # library supporting large, multi-dimensional arrays and matrices.
import pandas as pd # library to take data and creates a Python object with rows and columns
import matplotlib.pyplot as plot # library for embedding plots
from mpl_toolkits.mplot3d import Axes3D # library for 3D model
data = pd.read_csv('student.csv')
print(data.shape)
print(data.head())
math = data['Math'].values
reading = data['Reading'].values
writing = data['Writing'].values
# Plotting the scores as scatter plot
figure = plot.figure()
axes = Axes3D(figure)
axes.scatter(math, reading, writing, color='#ef5423')
plot.show()
# generating our X, Y and B
m = len(math)
x0 = np.ones(m)
X = np.array([x0, math, reading]).T
# Initial Coefficients
B = np.array([0, 0, 0])
Y = np.array(writing)
alpha = 0.0001
# defining the cost function
def cost_function(X, Y, B):
m = len(Y)
J = np.sum((X.dot(B) - Y) ** 2)/(2 * m)
return J
initialCost = cost_function(X, Y, B)
print("This is the initial cost:", initialCost)
# reducing our cost using Gradient Descent
def gradient_descent(X, Y, B, alpha, iterations):
cost_history = [0] * iterations
m = len(Y)
for iteration in range(iterations):
# Hypothesis Values
h = X.dot(B)
# Difference b/w Hypothesis and Actual Y
loss = h - Y
# Gradient Calculation
gradient = X.T.dot(loss) / m
# Changing Values of B using Gradient
B = B - alpha * gradient
# New Cost Value
cost = cost_function(X, Y, B)
cost_history[iteration] = cost
return B, cost_history
# 100000 Iterations
newB, cost_history = gradient_descent(X, Y, B, alpha, 100000)
# New Values of B
print("this is the new value of B", newB)
# Final Cost of new B
print("This is the final value of B", cost_history[-1])
# RMSE (Root Mean Square Error
def RMSE(Y, Y_prediction):
rmse = np.sqrt(sum((Y - Y_prediction) ** 2) / len(Y))
return rmse
# Coefficient of determination
def r2_score(Y, Y_prediction):
mean_y = np.mean(Y)
ss_tot = sum((Y - mean_y) ** 2)
ss_res = sum((Y - Y_prediction) ** 2)
r2 = 1 - (ss_res / ss_tot)
return r2
Y_prediction = X.dot(newB)
print("This is Root Mean Square Error:", RMSE(Y, Y_prediction))
print("This is the coefficient of determination:", r2_score(Y, Y_prediction))