Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13,532 changes: 13,532 additions & 0 deletions challenge1/analysis/tur/ch1.html

Large diffs are not rendered by default.

362 changes: 362 additions & 0 deletions challenge1/analysis/tur/ch1.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,362 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"df = pd.read_csv('../../data/training_dataset_500.csv')"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>House</th>\n",
" <th>Year</th>\n",
" <th>Month</th>\n",
" <th>Temperature</th>\n",
" <th>Daylight</th>\n",
" <th>EnergyProduction</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>House</th>\n",
" <td>1.000000e+00</td>\n",
" <td>0.000000</td>\n",
" <td>-1.816873e-18</td>\n",
" <td>0.000881</td>\n",
" <td>0.001583</td>\n",
" <td>-0.008303</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Year</th>\n",
" <td>0.000000e+00</td>\n",
" <td>1.000000</td>\n",
" <td>-6.340757e-01</td>\n",
" <td>-0.356800</td>\n",
" <td>0.524603</td>\n",
" <td>0.267481</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Month</th>\n",
" <td>-1.816873e-18</td>\n",
" <td>-0.634076</td>\n",
" <td>1.000000e+00</td>\n",
" <td>0.353837</td>\n",
" <td>-0.276307</td>\n",
" <td>-0.232748</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Temperature</th>\n",
" <td>8.810764e-04</td>\n",
" <td>-0.356800</td>\n",
" <td>3.538369e-01</td>\n",
" <td>1.000000</td>\n",
" <td>-0.053363</td>\n",
" <td>0.272789</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Daylight</th>\n",
" <td>1.582656e-03</td>\n",
" <td>0.524603</td>\n",
" <td>-2.763068e-01</td>\n",
" <td>-0.053363</td>\n",
" <td>1.000000</td>\n",
" <td>0.531577</td>\n",
" </tr>\n",
" <tr>\n",
" <th>EnergyProduction</th>\n",
" <td>-8.302696e-03</td>\n",
" <td>0.267481</td>\n",
" <td>-2.327484e-01</td>\n",
" <td>0.272789</td>\n",
" <td>0.531577</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" House Year Month Temperature Daylight \\\n",
"House 1.000000e+00 0.000000 -1.816873e-18 0.000881 0.001583 \n",
"Year 0.000000e+00 1.000000 -6.340757e-01 -0.356800 0.524603 \n",
"Month -1.816873e-18 -0.634076 1.000000e+00 0.353837 -0.276307 \n",
"Temperature 8.810764e-04 -0.356800 3.538369e-01 1.000000 -0.053363 \n",
"Daylight 1.582656e-03 0.524603 -2.763068e-01 -0.053363 1.000000 \n",
"EnergyProduction -8.302696e-03 0.267481 -2.327484e-01 0.272789 0.531577 \n",
"\n",
" EnergyProduction \n",
"House -0.008303 \n",
"Year 0.267481 \n",
"Month -0.232748 \n",
"Temperature 0.272789 \n",
"Daylight 0.531577 \n",
"EnergyProduction 1.000000 "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import PolynomialFeatures, LabelEncoder, MinMaxScaler\n",
"df = df.drop(columns=['ID','Label'])\n",
"#df = df[df.Month.isin([4,5,6,7,8])]\n",
"df.corr()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"def preprocessing(df):\n",
" #from keras.utils import np_utils\n",
" X = df[['House','Month','Temperature','Daylight']]\n",
" #X = X[X.Month.isin([4,5,6,7,8])]\n",
" y = df[['EnergyProduction']]\n",
" #enc = LabelEncoder()\n",
" #house = X.House.values.reshape(-1,1)\n",
" #X['House'] = enc.fit(house).transform(house).toarray()\n",
" #sc = MinMaxScaler()\n",
" #X = sc.fit_transform(X)\n",
" return np.array(X), np.array(y).reshape(len(y),)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def MAPE(y_true, y_pred): \n",
" y_true, y_pred = np.array(y_true), np.array(y_pred)\n",
" return np.mean(np.abs((y_true - y_pred) / y_true))*100"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn import linear_model\n",
"from sklearn import svm\n",
"reg = linear_model.LinearRegression()\n",
"#reg = svm.SVR(kernel='rbf', C=8, gamma=5e-5)\n",
"X, y = preprocessing(df)\n",
"reg.fit(X, y)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"dft = pd.read_csv('../../data/test_dataset_500.csv')\n",
"Xt, yt = preprocessing(dft)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"14.450698807918974"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"MAPE(yt, reg.predict(Xt))"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"def individual(houseId, df=df):\n",
" X = df[['House','Month','Temperature','Daylight']]\n",
" X = X[X.House==houseId].drop(columns=['House'])\n",
" y = df[['House','EnergyProduction']]\n",
" y = y[y.House==houseId].drop(columns=['House'])\n",
" return np.array(X), np.array(y)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"# individual prognoses\n",
"ireg = linear_model.LinearRegression()\n",
"y_pred, y_true = [],[]\n",
"with open('predicted_energy_production.csv','w') as f:\n",
" f.write('House, EnergyProduction\\n')\n",
" for i in set(dft.House.values):\n",
" x,y = individual(i)\n",
" ireg.fit(x,y)\n",
" p = ireg.predict(individual(i,dft)[0])[0][0]\n",
" y_pred.append(p)\n",
" y_true.append((individual(i,dft)[1])[0][0])\n",
" f.write('{}, {}\\n'.format(i,p))\n",
"with open('mape.txt','w') as f:\n",
" f.write(str(MAPE(y_true, y_pred)))\n"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"# neural network"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"from keras.models import Sequential\n",
"from keras.layers import Dense, Dropout, Activation"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/12\n",
"Epoch 2/12\n",
"Epoch 3/12\n",
"Epoch 4/12\n",
"Epoch 5/12\n",
"Epoch 6/12\n",
"Epoch 7/12\n",
"Epoch 8/12\n",
"Epoch 9/12\n",
"Epoch 10/12\n",
"Epoch 11/12\n",
"Epoch 12/12\n",
"500/500 [==============================] - 0s 138us/step\n"
]
},
{
"data": {
"text/plain": [
"[21690.6206796875, 17.908103942871094]"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = Sequential()\n",
"model.add(Dense(units=40, activation='relu', input_dim=4))\n",
"model.add(Dense(units=20, activation='relu'))\n",
"model.add(Dense(1))\n",
"model.compile(optimizer='adam',\n",
" loss='mse',\n",
" metrics=['mape'])\n",
"\n",
"X, y = preprocessing(df)\n",
"model.fit(X, y, epochs=12, verbose=3)\n",
"\n",
"Xt, yt = preprocessing(dft)\n",
"model.evaluate(Xt, yt)"
]
},
{
"cell_type": "code",
"execution_count": 787,
"metadata": {},
"outputs": [],
"source": [
"#LTSE"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Binary file added challenge1/analysis/tur/ch1.pdf
Binary file not shown.
1 change: 1 addition & 0 deletions challenge1/analysis/tur/mape.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
7.955755908782016
Loading