-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathLearning.py
More file actions
113 lines (82 loc) · 3.94 KB
/
Learning.py
File metadata and controls
113 lines (82 loc) · 3.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import configparser
import logging
from time import gmtime
from datetime import datetime, timedelta
import couchdb
from clairelib.HomeState import HomeState
from clairelib.NetworkService import NetworkService
import clairelib.couch.ViewDefinitions as ViewDefinitions
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier as ClassificationModel
from sklearn.ensemble import RandomForestRegressor as RegressionModel
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from numpy import *
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
config = configparser.ConfigParser()
config.read('config.cfg')
if not "General" in config.sections():
print("Missing general section in configuration file. Please check config.cfg exists.")
exit()
# Configuration
home_name = config.get("General", "home_name")
couchdb_server = config.get("CouchDB", "url")
couchdb_name = config.get("CouchDB", "db")
logfile = config.get("Log","logfile")
ouput_log_to_console = config.getboolean("Log","ouput_log_to_console")
# Connect to CouchDB
couch = couchdb.Server(couchdb_server)
try:
couchdb = couch[couchdb_name]
except couchdb.http.ResourceNotFound:
print("Error database $(couchdb_name)s does not exist")
# Sync all views
ViewDefinitions.sync(couchdb)
# Load all home states in the database
home_states = HomeState.view(couchdb, "_design/home_state/_view/by_time")
first_home_state = HomeState.view(couchdb, "_design/home_state/_view/by_time", limit=1).rows[0]
# Use this code if you only want to train on a subset of data
#home_states = home_states[str(datetime.now()-timedelta(days=14)):str(datetime.now())]
# Get all output devices in this home
output_devices = first_home_state.output_devices()
# Generate dataset from home states for each output device
Xs = {}
ys = {}
for device in output_devices:
print("Generating training set for", device['name'])
# Initialize empty X and y datasets for out output device
Xs[device['device_id']] = []
ys[device['device_id']] = []
# For each home state generate the datasets
for home in home_states:
Xs[device['device_id']].append(home.feature_vector_for_output_device( device ))
ys[device['device_id']].append(home.output_vector_for_device_id( device['device_id'] ))
# Now code the time values (weekday, hour and minute) as categorial features in one-of-k (aka one-hot) scheme
encoder = OneHotEncoder(categorical_features=[0,1,2], sparse=False) # One code feature 0,1 and 2
for device in output_devices:
print("Training model for", device['name'], "with type", device['type'])
X = Xs[device['device_id']]
y = ys[device['device_id']]
# Encode time values using encoder
X = encoder.fit_transform(X)
# Split into random training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=gmtime().tm_sec) #2)
# Fit to model
if device['type'] == 'BinaryPowerSwitchDevice':
model = ClassificationModel(n_estimators=100,max_features='auto',n_jobs=-1, class_weight='balanced_subsample') #{0:1,1:2}
else:
model = RegressionModel(n_estimators=10,max_features='log2',n_jobs=-1)
print("Cross Validation Score: ", round(mean(cross_val_score(model, X, y))*100,2))
model.fit(X, y)
y_predictions = model.predict(X_test)
# Score predictions - calculate accuracy and f1 score
if device['type'] == 'BinaryPowerSwitchDevice':
print("Accuracy Score: {} %".format(round(accuracy_score(y_test, y_predictions, True)*100, 2)))
else:
print("Mean Sq. Error Score: {}".format(round(mean_squared_error(y_test, y_predictions),2)))
# Store the preprocessor and model
joblib.dump(model, "models/random_forest_model_device_{}.pkl".format(device['device_id']))
# Store encoder
joblib.dump(encoder, "models/feature_vector_encoder.pkl")