MultipleRegression_fromScratch/MultipleRegression.py at master · Jamaliela/MultipleRegression_fromScratch · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
######################################################################
# Author: Elaheh Jamali
# Username: Jamalie

# Programming Assignment 1: Regression Model
#
# Purpose: In this assignment, We will use the gradient descent algorithm
# discussed in class to solve for the coefficients.  The regression model
# is able to accept an arbitrary number of input variables.
#
# Acknowledgement: Different articles were read for this.
#
######################################################################
import numpy as np  # library supporting large, multi-dimensional arrays and matrices.
import pandas as pd  # library to take data and creates a Python object with rows and columns
import matplotlib.pyplot as plot  # library for embedding plots
from mpl_toolkits.mplot3d import Axes3D  # library for 3D model

data = pd.read_csv('student.csv')
print(data.shape)
print(data.head())

math = data['Math'].values
reading = data['Reading'].values
writing = data['Writing'].values

# Plotting the scores as scatter plot
figure = plot.figure()
axes = Axes3D(figure)
axes.scatter(math, reading, writing, color='#ef5423')
plot.show()

# generating our X, Y and B
m = len(math)
x0 = np.ones(m)
X = np.array([x0, math, reading]).T
# Initial Coefficients
B = np.array([0, 0, 0])
Y = np.array(writing)
alpha = 0.0001


# defining the cost function
def cost_function(X, Y, B):
    m = len(Y)
    J = np.sum((X.dot(B) - Y) ** 2)/(2 * m)
    return J


initialCost = cost_function(X, Y, B)
print("This is the initial cost:", initialCost)


# reducing our cost using Gradient Descent
def gradient_descent(X, Y, B, alpha, iterations):
    cost_history = [0] * iterations
    m = len(Y)

    for iteration in range(iterations):
        # Hypothesis Values
        h = X.dot(B)
        # Difference b/w Hypothesis and Actual Y
        loss = h - Y
        # Gradient Calculation
        gradient = X.T.dot(loss) / m
        # Changing Values of B using Gradient
        B = B - alpha * gradient
        # New Cost Value
        cost = cost_function(X, Y, B)
        cost_history[iteration] = cost

    return B, cost_history


# 100000 Iterations
newB, cost_history = gradient_descent(X, Y, B, alpha, 100000)

# New Values of B
print("this is the new value of B", newB)

# Final Cost of new B
print("This is the final value of B", cost_history[-1])


# RMSE (Root Mean Square Error
def RMSE(Y, Y_prediction):
    rmse = np.sqrt(sum((Y - Y_prediction) ** 2) / len(Y))
    return rmse


# Coefficient of determination
def r2_score(Y, Y_prediction):
    mean_y = np.mean(Y)
    ss_tot = sum((Y - mean_y) ** 2)
    ss_res = sum((Y - Y_prediction) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2


Y_prediction = X.dot(newB)

print("This is Root Mean Square Error:", RMSE(Y, Y_prediction))
print("This is the coefficient of determination:", r2_score(Y, Y_prediction))