-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathkMeansDataPoint.py
More file actions
84 lines (73 loc) · 3.08 KB
/
kMeansDataPoint.py
File metadata and controls
84 lines (73 loc) · 3.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import numpy as np
# A data point class to use for the clustering algorithm
class kDataPoint:
def __init__ (self, data , windowSize, day):
# the marketVariance for the current window
self.marketVar = np.var(data)
# the marketMean return for the current window
self.marketMean = np.mean(data)
# an array of each stocks individual variance for a given window
if windowSize > 1:
self.stockVars = np.var(data, axis = 1)
else:
self.stockVars = np.zeros(len(data))
# an array of each stocks individual variance for a given window
if windowSize > 1:
self.stockMeans = np.mean(data, axis = 1)
else:
self.stockMeans = data
# id of the cluster should be between 0 and numClusters - 1
self.kCluster = -1
# setUpTheData
self.windowSize = windowSize
# need the date to check back to start
# this will help with back tracking
self.day = day
def calcMarketVar(self,data):
self.marketVar = np.var(data)
print(np.var(data))
def calcMarkMean(self, data):
self.marketMean = np.mean(data)
def calcStockVar(self,data):
self.stockVars = np.var(data, axis = 1)
def calcStockMean(self,data):
self.stockMeans = np.mean(data, axis = 1)
def setUpData(self, data):
self.marketVar = self.calcMarketVar(data)
self.marketMean = self.calcMarkMean(data)
self.stockVars = self.calcStockVar(data)
self.stockMeans = self.calcStockMean(data)
print("I set up")
def calcDist(self, cluster):
if cluster.marketMean is not None:
try:
distMarketMean = np.abs(self.marketMean - cluster.marketMean)
distMarketVar = np.abs(self.marketVar - cluster.marketVar)
tempMean = np.abs(self.stockMeans - cluster.stockMeans)
tempVar = np.abs(self.stockVars - cluster.stockVars)
dist = distMarketMean + distMarketVar + np.sum(tempMean) + np.sum(tempVar)
# dist = np.sqrt(dist)
return dist
except:
return 1e+9
else:
return 1e+9
def reassignCluster(self, clusters):
# Distance vastly too large so we can find a good minimum distance
minDist = 1e+8
# assume we are passing in a list of clusters
bestCluster = -1
for i in clusters:
dist = self.calcDist(i)
# print("Distance: " + str(dist) + " , minDist: " + str(minDist))
if dist < minDist:
minDist = dist
bestCluster = int(i.id)
self.kCluster = bestCluster
return
def printPoint(self):
print("K Means Data Point, following att:\n")
print("MarketVariance: " + str(self.marketVar))
print("MarketMean: " + str(self.marketMean))
print("StockMean: " + str(self.stockMeans))
print("StockVariance: " + str(self.stockMeans))