diff --git a/challenge1/analysis/tur/ch1.html b/challenge1/analysis/tur/ch1.html new file mode 100644 index 000000000..13221695d --- /dev/null +++ b/challenge1/analysis/tur/ch1.html @@ -0,0 +1,13532 @@ + + +
+ +import pandas as pd
+import numpy as np
+df = pd.read_csv('../../data/training_dataset_500.csv')
+from sklearn.preprocessing import PolynomialFeatures, LabelEncoder, MinMaxScaler
+df = df.drop(columns=['ID','Label'])
+#df = df[df.Month.isin([4,5,6,7,8])]
+df.corr()
+def preprocessing(df):
+ #from keras.utils import np_utils
+ X = df[['House','Month','Temperature','Daylight']]
+ #X = X[X.Month.isin([4,5,6,7,8])]
+ y = df[['EnergyProduction']]
+ #enc = LabelEncoder()
+ #house = X.House.values.reshape(-1,1)
+ #X['House'] = enc.fit(house).transform(house).toarray()
+ #sc = MinMaxScaler()
+ #X = sc.fit_transform(X)
+ return np.array(X), np.array(y).reshape(len(y),)
+
+def MAPE(y_true, y_pred):
+ y_true, y_pred = np.array(y_true), np.array(y_pred)
+ return np.mean(np.abs((y_true - y_pred) / y_true))*100
+from sklearn import linear_model
+from sklearn import svm
+reg = linear_model.LinearRegression()
+#reg = svm.SVR(kernel='rbf', C=8, gamma=5e-5)
+X, y = preprocessing(df)
+reg.fit(X, y)
+dft = pd.read_csv('../../data/test_dataset_500.csv')
+Xt, yt = preprocessing(dft)
+MAPE(yt, reg.predict(Xt))
+def individual(houseId, df=df):
+ X = df[['House','Month','Temperature','Daylight']]
+ X = X[X.House==houseId].drop(columns=['House'])
+ y = df[['House','EnergyProduction']]
+ y = y[y.House==houseId].drop(columns=['House'])
+ return np.array(X), np.array(y)
+# individual prognoses
+ireg = linear_model.LinearRegression()
+y_pred, y_true = [],[]
+with open('predicted_energy_production.csv','w') as f:
+ f.write('House, EnergyProduction\n')
+ for i in set(dft.House.values):
+ x,y = individual(i)
+ ireg.fit(x,y)
+ p = ireg.predict(individual(i,dft)[0])[0][0]
+ y_pred.append(p)
+ y_true.append((individual(i,dft)[1])[0][0])
+ f.write('{}, {}\n'.format(i,p))
+with open('mape.txt','w') as f:
+ f.write(str(MAPE(y_true, y_pred)))
+# neural network
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, Activation
+model = Sequential()
+model.add(Dense(units=40, activation='relu', input_dim=4))
+model.add(Dense(units=20, activation='relu'))
+model.add(Dense(1))
+model.compile(optimizer='adam',
+ loss='mse',
+ metrics=['mape'])
+
+X, y = preprocessing(df)
+model.fit(X, y, epochs=12, verbose=3)
+
+Xt, yt = preprocessing(dft)
+model.evaluate(Xt, yt)
+#LTSE
+| \n", + " | House | \n", + "Year | \n", + "Month | \n", + "Temperature | \n", + "Daylight | \n", + "EnergyProduction | \n", + "
|---|---|---|---|---|---|---|
| House | \n", + "1.000000e+00 | \n", + "0.000000 | \n", + "-1.816873e-18 | \n", + "0.000881 | \n", + "0.001583 | \n", + "-0.008303 | \n", + "
| Year | \n", + "0.000000e+00 | \n", + "1.000000 | \n", + "-6.340757e-01 | \n", + "-0.356800 | \n", + "0.524603 | \n", + "0.267481 | \n", + "
| Month | \n", + "-1.816873e-18 | \n", + "-0.634076 | \n", + "1.000000e+00 | \n", + "0.353837 | \n", + "-0.276307 | \n", + "-0.232748 | \n", + "
| Temperature | \n", + "8.810764e-04 | \n", + "-0.356800 | \n", + "3.538369e-01 | \n", + "1.000000 | \n", + "-0.053363 | \n", + "0.272789 | \n", + "
| Daylight | \n", + "1.582656e-03 | \n", + "0.524603 | \n", + "-2.763068e-01 | \n", + "-0.053363 | \n", + "1.000000 | \n", + "0.531577 | \n", + "
| EnergyProduction | \n", + "-8.302696e-03 | \n", + "0.267481 | \n", + "-2.327484e-01 | \n", + "0.272789 | \n", + "0.531577 | \n", + "1.000000 | \n", + "
import pandas as pd
+df = pd.read_csv('../../data/total_watt.csv', header=None)
+df.columns=['datetime','consumption']
+df = df.set_index('datetime')
+%matplotlib inline
+plt.figure()
+df.plot(title='Energy Consumption for every 30 min', figsize=(16, 6))
+plt.xticks(rotation=30)
+plt.ylabel('Energy Consumption, W')
+plt.xlabel('Date')
+import datetime as dt
+df = pd.read_csv('../../data/total_watt.csv', header=None, parse_dates=[0], \
+ date_parser=lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d'))
+df.columns=['datetime','consumption']
+plt.figure()
+df = df.groupby(by=df.datetime.dt.date).sum()
+df.plot.bar(title='Energy Consumption per day', figsize=(16, 6))
+plt.xticks(rotation=30)
+plt.ylabel('Energy Consumption, W')
+plt.xlabel('Date')
+# Plot clusters
+labels = KMeans(3, random_state=0).fit_predict(df)
+plt.figure()
+plt.scatter(df.index, df.consumption, c=labels, s=50, cmap='viridis')
+plt.title('Clustering Energy Consumption')
+plt.figsize=(16, 6)
+plt.xticks(rotation=30)
+plt.ylabel('Energy Consumption, W')
+plt.xlabel('Date')
+
+