-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path01_Linear_Regression.py
More file actions
73 lines (59 loc) · 2.1 KB
/
01_Linear_Regression.py
File metadata and controls
73 lines (59 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# how to run this script:
# Python Rakesh_MLAsgn01.py DSrandom.csv 0.0001 0.0001
import sys
import csv
# input dataframe, weights, prevSSE, currSSE
def linearregr(df, weights, prevSSE, currSSE,iteration):
prevSSE = currSSE
currSSE = 0.0
# initialize learning function, error and gradient
fnofx = [0.0] * num_rows
errorfn = [0.0] * num_rows
gradient = [0.0] * num_cols
# Calculate Error and SSE
for i in range(0,num_rows):
fnofx[i] = weights[0] # this is including w0 in the calculation
for j in range(0,num_cols-1):
fnofx[i] += weights[j+1]* df[i][j]
# every row, last column is y. And error function is y - f(x)
# And currSSE to be updated here. It is the sqaure of Error [y-f(x)]
errorfn[i] = df[i][num_cols-1]- fnofx[i]
currSSE += errorfn[i] * errorfn[i]
print(iteration,weights, currSSE)
#error calculation and SSE calculation is done
#Calculate Gradient now
for j in range(0,num_cols):
for i in range(0,num_rows):
if j == 0:
gradient[j] += errorfn[i] # taking into consideration x0 = 1
else:
gradient[j] += df[i][j-1] * errorfn[i]
# Update weights after calculating gradient
for i in range(0,num_cols):
weights[i] += eeta*gradient[i]
#print (currSSE, prevSSE)
iteration += 1 #increase the iteration by 1
return currSSE, prevSSE, iteration
####### MAIN EXECUTION STARTS HERE ######
# Open the file and count the number of columns and rows
# Read the file into a dataframe (array)
# read inline arguments into parameters
data_file = sys.argv[1]
eeta = float(sys.argv[2])
threshold = float(sys.argv[3])
with open(data_file, 'r') as infile:
reader = csv.reader(infile,quoting=csv.QUOTE_NONNUMERIC)
num_cols = len(next(reader)) # number of columns in the file
infile.seek(0)
df = [] # data frame
for row in reader:
df.append(row)
num_rows = len(df) # number of rows in the file
# initialize the variables here
prevSSE = 9999.0
currSSE = 0.0
iteration = 0
weights = [0.0] * num_cols # initializing the weights list to 0.0
while(prevSSE - currSSE > threshold):
#for i in range(0,5):
prevSSE, currSSE, iteration = linearregr(df,weights,prevSSE,currSSE, iteration)