diff --git a/Analysis project.ipynb b/Analysis project.ipynb
new file mode 100644
index 0000000..714066e
--- /dev/null
+++ b/Analysis project.ipynb
@@ -0,0 +1,3001 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Importing libraries and overview of data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 415,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import seaborn as sns\n",
+ "import matplotlib.pyplot as plt\n",
+ "from statsmodels.formula.api import ols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 416,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " ... | \n",
+ " PoolArea | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 65.0 | \n",
+ " 8450 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 208500 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 80.0 | \n",
+ " 9600 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 181500 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 68.0 | \n",
+ " 11250 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 9 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 223500 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 60.0 | \n",
+ " 9550 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2006 | \n",
+ " WD | \n",
+ " Abnorml | \n",
+ " 140000 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 84.0 | \n",
+ " 14260 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 12 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 250000 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 1455 | \n",
+ " 1456 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 62.0 | \n",
+ " 7917 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 175000 | \n",
+ "
\n",
+ " \n",
+ " | 1456 | \n",
+ " 1457 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 85.0 | \n",
+ " 13175 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " MnPrv | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2010 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 210000 | \n",
+ "
\n",
+ " \n",
+ " | 1457 | \n",
+ " 1458 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 66.0 | \n",
+ " 9042 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " GdPrv | \n",
+ " Shed | \n",
+ " 2500 | \n",
+ " 5 | \n",
+ " 2010 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 266500 | \n",
+ "
\n",
+ " \n",
+ " | 1458 | \n",
+ " 1459 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 68.0 | \n",
+ " 9717 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 4 | \n",
+ " 2010 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 142125 | \n",
+ "
\n",
+ " \n",
+ " | 1459 | \n",
+ " 1460 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 75.0 | \n",
+ " 9937 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 6 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 147500 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1460 rows × 81 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
+ "0 1 60 RL 65.0 8450 Pave NaN Reg \n",
+ "1 2 20 RL 80.0 9600 Pave NaN Reg \n",
+ "2 3 60 RL 68.0 11250 Pave NaN IR1 \n",
+ "3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
+ "4 5 60 RL 84.0 14260 Pave NaN IR1 \n",
+ "... ... ... ... ... ... ... ... ... \n",
+ "1455 1456 60 RL 62.0 7917 Pave NaN Reg \n",
+ "1456 1457 20 RL 85.0 13175 Pave NaN Reg \n",
+ "1457 1458 70 RL 66.0 9042 Pave NaN Reg \n",
+ "1458 1459 20 RL 68.0 9717 Pave NaN Reg \n",
+ "1459 1460 20 RL 75.0 9937 Pave NaN Reg \n",
+ "\n",
+ " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
+ "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "2 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "4 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "... ... ... ... ... ... ... ... ... \n",
+ "1455 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "1456 Lvl AllPub ... 0 NaN MnPrv NaN 0 \n",
+ "1457 Lvl AllPub ... 0 NaN GdPrv Shed 2500 \n",
+ "1458 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "1459 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "\n",
+ " MoSold YrSold SaleType SaleCondition SalePrice \n",
+ "0 2 2008 WD Normal 208500 \n",
+ "1 5 2007 WD Normal 181500 \n",
+ "2 9 2008 WD Normal 223500 \n",
+ "3 2 2006 WD Abnorml 140000 \n",
+ "4 12 2008 WD Normal 250000 \n",
+ "... ... ... ... ... ... \n",
+ "1455 8 2007 WD Normal 175000 \n",
+ "1456 2 2010 WD Normal 210000 \n",
+ "1457 5 2010 WD Normal 266500 \n",
+ "1458 4 2010 WD Normal 142125 \n",
+ "1459 6 2008 WD Normal 147500 \n",
+ "\n",
+ "[1460 rows x 81 columns]"
+ ]
+ },
+ "execution_count": 416,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_trains = pd.read_csv('house-prices-advanced-regression-techniques/train.csv')\n",
+ "df_trains"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 417,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " count mean std min 25% \\\n",
+ "Id 1460.0 730.500000 421.610009 1.0 365.75 \n",
+ "MSSubClass 1460.0 56.897260 42.300571 20.0 20.00 \n",
+ "LotFrontage 1201.0 70.049958 24.284752 21.0 59.00 \n",
+ "LotArea 1460.0 10516.828082 9981.264932 1300.0 7553.50 \n",
+ "OverallQual 1460.0 6.099315 1.382997 1.0 5.00 \n",
+ "OverallCond 1460.0 5.575342 1.112799 1.0 5.00 \n",
+ "YearBuilt 1460.0 1971.267808 30.202904 1872.0 1954.00 \n",
+ "YearRemodAdd 1460.0 1984.865753 20.645407 1950.0 1967.00 \n",
+ "MasVnrArea 1452.0 103.685262 181.066207 0.0 0.00 \n",
+ "BsmtFinSF1 1460.0 443.639726 456.098091 0.0 0.00 \n",
+ "BsmtFinSF2 1460.0 46.549315 161.319273 0.0 0.00 \n",
+ "BsmtUnfSF 1460.0 567.240411 441.866955 0.0 223.00 \n",
+ "TotalBsmtSF 1460.0 1057.429452 438.705324 0.0 795.75 \n",
+ "1stFlrSF 1460.0 1162.626712 386.587738 334.0 882.00 \n",
+ "2ndFlrSF 1460.0 346.992466 436.528436 0.0 0.00 \n",
+ "LowQualFinSF 1460.0 5.844521 48.623081 0.0 0.00 \n",
+ "GrLivArea 1460.0 1515.463699 525.480383 334.0 1129.50 \n",
+ "BsmtFullBath 1460.0 0.425342 0.518911 0.0 0.00 \n",
+ "BsmtHalfBath 1460.0 0.057534 0.238753 0.0 0.00 \n",
+ "FullBath 1460.0 1.565068 0.550916 0.0 1.00 \n",
+ "HalfBath 1460.0 0.382877 0.502885 0.0 0.00 \n",
+ "BedroomAbvGr 1460.0 2.866438 0.815778 0.0 2.00 \n",
+ "KitchenAbvGr 1460.0 1.046575 0.220338 0.0 1.00 \n",
+ "TotRmsAbvGrd 1460.0 6.517808 1.625393 2.0 5.00 \n",
+ "Fireplaces 1460.0 0.613014 0.644666 0.0 0.00 \n",
+ "GarageYrBlt 1379.0 1978.506164 24.689725 1900.0 1961.00 \n",
+ "GarageCars 1460.0 1.767123 0.747315 0.0 1.00 \n",
+ "GarageArea 1460.0 472.980137 213.804841 0.0 334.50 \n",
+ "WoodDeckSF 1460.0 94.244521 125.338794 0.0 0.00 \n",
+ "OpenPorchSF 1460.0 46.660274 66.256028 0.0 0.00 \n",
+ "EnclosedPorch 1460.0 21.954110 61.119149 0.0 0.00 \n",
+ "3SsnPorch 1460.0 3.409589 29.317331 0.0 0.00 \n",
+ "ScreenPorch 1460.0 15.060959 55.757415 0.0 0.00 \n",
+ "PoolArea 1460.0 2.758904 40.177307 0.0 0.00 \n",
+ "MiscVal 1460.0 43.489041 496.123024 0.0 0.00 \n",
+ "MoSold 1460.0 6.321918 2.703626 1.0 5.00 \n",
+ "YrSold 1460.0 2007.815753 1.328095 2006.0 2007.00 \n",
+ "SalePrice 1460.0 180921.195890 79442.502883 34900.0 129975.00 \n",
+ "\n",
+ " 50% 75% max \n",
+ "Id 730.5 1095.25 1460.0 \n",
+ "MSSubClass 50.0 70.00 190.0 \n",
+ "LotFrontage 69.0 80.00 313.0 \n",
+ "LotArea 9478.5 11601.50 215245.0 \n",
+ "OverallQual 6.0 7.00 10.0 \n",
+ "OverallCond 5.0 6.00 9.0 \n",
+ "YearBuilt 1973.0 2000.00 2010.0 \n",
+ "YearRemodAdd 1994.0 2004.00 2010.0 \n",
+ "MasVnrArea 0.0 166.00 1600.0 \n",
+ "BsmtFinSF1 383.5 712.25 5644.0 \n",
+ "BsmtFinSF2 0.0 0.00 1474.0 \n",
+ "BsmtUnfSF 477.5 808.00 2336.0 \n",
+ "TotalBsmtSF 991.5 1298.25 6110.0 \n",
+ "1stFlrSF 1087.0 1391.25 4692.0 \n",
+ "2ndFlrSF 0.0 728.00 2065.0 \n",
+ "LowQualFinSF 0.0 0.00 572.0 \n",
+ "GrLivArea 1464.0 1776.75 5642.0 \n",
+ "BsmtFullBath 0.0 1.00 3.0 \n",
+ "BsmtHalfBath 0.0 0.00 2.0 \n",
+ "FullBath 2.0 2.00 3.0 \n",
+ "HalfBath 0.0 1.00 2.0 \n",
+ "BedroomAbvGr 3.0 3.00 8.0 \n",
+ "KitchenAbvGr 1.0 1.00 3.0 \n",
+ "TotRmsAbvGrd 6.0 7.00 14.0 \n",
+ "Fireplaces 1.0 1.00 3.0 \n",
+ "GarageYrBlt 1980.0 2002.00 2010.0 \n",
+ "GarageCars 2.0 2.00 4.0 \n",
+ "GarageArea 480.0 576.00 1418.0 \n",
+ "WoodDeckSF 0.0 168.00 857.0 \n",
+ "OpenPorchSF 25.0 68.00 547.0 \n",
+ "EnclosedPorch 0.0 0.00 552.0 \n",
+ "3SsnPorch 0.0 0.00 508.0 \n",
+ "ScreenPorch 0.0 0.00 480.0 \n",
+ "PoolArea 0.0 0.00 738.0 \n",
+ "MiscVal 0.0 0.00 15500.0 \n",
+ "MoSold 6.0 8.00 12.0 \n",
+ "YrSold 2008.0 2009.00 2010.0 \n",
+ "SalePrice 163000.0 214000.00 755000.0 \n",
+ "\n",
+ "RangeIndex: 1460 entries, 0 to 1459\n",
+ "Data columns (total 81 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Id 1460 non-null int64 \n",
+ " 1 MSSubClass 1460 non-null int64 \n",
+ " 2 MSZoning 1460 non-null object \n",
+ " 3 LotFrontage 1201 non-null float64\n",
+ " 4 LotArea 1460 non-null int64 \n",
+ " 5 Street 1460 non-null object \n",
+ " 6 Alley 91 non-null object \n",
+ " 7 LotShape 1460 non-null object \n",
+ " 8 LandContour 1460 non-null object \n",
+ " 9 Utilities 1460 non-null object \n",
+ " 10 LotConfig 1460 non-null object \n",
+ " 11 LandSlope 1460 non-null object \n",
+ " 12 Neighborhood 1460 non-null object \n",
+ " 13 Condition1 1460 non-null object \n",
+ " 14 Condition2 1460 non-null object \n",
+ " 15 BldgType 1460 non-null object \n",
+ " 16 HouseStyle 1460 non-null object \n",
+ " 17 OverallQual 1460 non-null int64 \n",
+ " 18 OverallCond 1460 non-null int64 \n",
+ " 19 YearBuilt 1460 non-null int64 \n",
+ " 20 YearRemodAdd 1460 non-null int64 \n",
+ " 21 RoofStyle 1460 non-null object \n",
+ " 22 RoofMatl 1460 non-null object \n",
+ " 23 Exterior1st 1460 non-null object \n",
+ " 24 Exterior2nd 1460 non-null object \n",
+ " 25 MasVnrType 1452 non-null object \n",
+ " 26 MasVnrArea 1452 non-null float64\n",
+ " 27 ExterQual 1460 non-null object \n",
+ " 28 ExterCond 1460 non-null object \n",
+ " 29 Foundation 1460 non-null object \n",
+ " 30 BsmtQual 1423 non-null object \n",
+ " 31 BsmtCond 1423 non-null object \n",
+ " 32 BsmtExposure 1422 non-null object \n",
+ " 33 BsmtFinType1 1423 non-null object \n",
+ " 34 BsmtFinSF1 1460 non-null int64 \n",
+ " 35 BsmtFinType2 1422 non-null object \n",
+ " 36 BsmtFinSF2 1460 non-null int64 \n",
+ " 37 BsmtUnfSF 1460 non-null int64 \n",
+ " 38 TotalBsmtSF 1460 non-null int64 \n",
+ " 39 Heating 1460 non-null object \n",
+ " 40 HeatingQC 1460 non-null object \n",
+ " 41 CentralAir 1460 non-null object \n",
+ " 42 Electrical 1459 non-null object \n",
+ " 43 1stFlrSF 1460 non-null int64 \n",
+ " 44 2ndFlrSF 1460 non-null int64 \n",
+ " 45 LowQualFinSF 1460 non-null int64 \n",
+ " 46 GrLivArea 1460 non-null int64 \n",
+ " 47 BsmtFullBath 1460 non-null int64 \n",
+ " 48 BsmtHalfBath 1460 non-null int64 \n",
+ " 49 FullBath 1460 non-null int64 \n",
+ " 50 HalfBath 1460 non-null int64 \n",
+ " 51 BedroomAbvGr 1460 non-null int64 \n",
+ " 52 KitchenAbvGr 1460 non-null int64 \n",
+ " 53 KitchenQual 1460 non-null object \n",
+ " 54 TotRmsAbvGrd 1460 non-null int64 \n",
+ " 55 Functional 1460 non-null object \n",
+ " 56 Fireplaces 1460 non-null int64 \n",
+ " 57 FireplaceQu 770 non-null object \n",
+ " 58 GarageType 1379 non-null object \n",
+ " 59 GarageYrBlt 1379 non-null float64\n",
+ " 60 GarageFinish 1379 non-null object \n",
+ " 61 GarageCars 1460 non-null int64 \n",
+ " 62 GarageArea 1460 non-null int64 \n",
+ " 63 GarageQual 1379 non-null object \n",
+ " 64 GarageCond 1379 non-null object \n",
+ " 65 PavedDrive 1460 non-null object \n",
+ " 66 WoodDeckSF 1460 non-null int64 \n",
+ " 67 OpenPorchSF 1460 non-null int64 \n",
+ " 68 EnclosedPorch 1460 non-null int64 \n",
+ " 69 3SsnPorch 1460 non-null int64 \n",
+ " 70 ScreenPorch 1460 non-null int64 \n",
+ " 71 PoolArea 1460 non-null int64 \n",
+ " 72 PoolQC 7 non-null object \n",
+ " 73 Fence 281 non-null object \n",
+ " 74 MiscFeature 54 non-null object \n",
+ " 75 MiscVal 1460 non-null int64 \n",
+ " 76 MoSold 1460 non-null int64 \n",
+ " 77 YrSold 1460 non-null int64 \n",
+ " 78 SaleType 1460 non-null object \n",
+ " 79 SaleCondition 1460 non-null object \n",
+ " 80 SalePrice 1460 non-null int64 \n",
+ "dtypes: float64(3), int64(35), object(43)\n",
+ "memory usage: 924.0+ KB\n",
+ "None\n"
+ ]
+ }
+ ],
+ "source": [
+ "# checking data measures and info\n",
+ "print(df_trains.describe().T)\n",
+ "print(df_trains.info() ) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 418,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " LotConfig | \n",
+ " ... | \n",
+ " PoolArea | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 65.0 | \n",
+ " 8450 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " Inside | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 208500 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 80.0 | \n",
+ " 9600 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " FR2 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 181500 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 68.0 | \n",
+ " 11250 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " Inside | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 9 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 223500 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 60.0 | \n",
+ " 9550 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " Corner | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2006 | \n",
+ " WD | \n",
+ " Abnorml | \n",
+ " 140000 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 84.0 | \n",
+ " 14260 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " FR2 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 12 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 250000 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 1455 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 62.0 | \n",
+ " 7917 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " Inside | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 175000 | \n",
+ "
\n",
+ " \n",
+ " | 1456 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 85.0 | \n",
+ " 13175 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " Inside | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " MnPrv | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2010 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 210000 | \n",
+ "
\n",
+ " \n",
+ " | 1457 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 66.0 | \n",
+ " 9042 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " Inside | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " GdPrv | \n",
+ " Shed | \n",
+ " 2500 | \n",
+ " 5 | \n",
+ " 2010 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 266500 | \n",
+ "
\n",
+ " \n",
+ " | 1458 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 68.0 | \n",
+ " 9717 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " Inside | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 4 | \n",
+ " 2010 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 142125 | \n",
+ "
\n",
+ " \n",
+ " | 1459 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 75.0 | \n",
+ " 9937 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " Inside | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 6 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 147500 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1460 rows × 80 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
+ "0 60 RL 65.0 8450 Pave NaN Reg \n",
+ "1 20 RL 80.0 9600 Pave NaN Reg \n",
+ "2 60 RL 68.0 11250 Pave NaN IR1 \n",
+ "3 70 RL 60.0 9550 Pave NaN IR1 \n",
+ "4 60 RL 84.0 14260 Pave NaN IR1 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "1455 60 RL 62.0 7917 Pave NaN Reg \n",
+ "1456 20 RL 85.0 13175 Pave NaN Reg \n",
+ "1457 70 RL 66.0 9042 Pave NaN Reg \n",
+ "1458 20 RL 68.0 9717 Pave NaN Reg \n",
+ "1459 20 RL 75.0 9937 Pave NaN Reg \n",
+ "\n",
+ " LandContour Utilities LotConfig ... PoolArea PoolQC Fence MiscFeature \\\n",
+ "0 Lvl AllPub Inside ... 0 NaN NaN NaN \n",
+ "1 Lvl AllPub FR2 ... 0 NaN NaN NaN \n",
+ "2 Lvl AllPub Inside ... 0 NaN NaN NaN \n",
+ "3 Lvl AllPub Corner ... 0 NaN NaN NaN \n",
+ "4 Lvl AllPub FR2 ... 0 NaN NaN NaN \n",
+ "... ... ... ... ... ... ... ... ... \n",
+ "1455 Lvl AllPub Inside ... 0 NaN NaN NaN \n",
+ "1456 Lvl AllPub Inside ... 0 NaN MnPrv NaN \n",
+ "1457 Lvl AllPub Inside ... 0 NaN GdPrv Shed \n",
+ "1458 Lvl AllPub Inside ... 0 NaN NaN NaN \n",
+ "1459 Lvl AllPub Inside ... 0 NaN NaN NaN \n",
+ "\n",
+ " MiscVal MoSold YrSold SaleType SaleCondition SalePrice \n",
+ "0 0 2 2008 WD Normal 208500 \n",
+ "1 0 5 2007 WD Normal 181500 \n",
+ "2 0 9 2008 WD Normal 223500 \n",
+ "3 0 2 2006 WD Abnorml 140000 \n",
+ "4 0 12 2008 WD Normal 250000 \n",
+ "... ... ... ... ... ... ... \n",
+ "1455 0 8 2007 WD Normal 175000 \n",
+ "1456 0 2 2010 WD Normal 210000 \n",
+ "1457 2500 5 2010 WD Normal 266500 \n",
+ "1458 0 4 2010 WD Normal 142125 \n",
+ "1459 0 6 2008 WD Normal 147500 \n",
+ "\n",
+ "[1460 rows x 80 columns]"
+ ]
+ },
+ "execution_count": 418,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Removing id Column since there is no relevant information there\n",
+ "df_trains.drop('Id', axis=1, inplace=True)\n",
+ "df_trains"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 419,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "LotFrontage - 259\n",
+ "Alley - 1369\n",
+ "MasVnrType - 8\n",
+ "MasVnrArea - 8\n",
+ "BsmtQual - 37\n",
+ "BsmtCond - 37\n",
+ "BsmtExposure - 38\n",
+ "BsmtFinType1 - 37\n",
+ "BsmtFinType2 - 38\n",
+ "Electrical - 1\n",
+ "FireplaceQu - 690\n",
+ "GarageType - 81\n",
+ "GarageYrBlt - 81\n",
+ "GarageFinish - 81\n",
+ "GarageQual - 81\n",
+ "GarageCond - 81\n",
+ "PoolQC - 1453\n",
+ "Fence - 1179\n",
+ "MiscFeature - 1406\n"
+ ]
+ }
+ ],
+ "source": [
+ "# since no all information had been displayed, a piece of code to show null values was created\n",
+ "for i in df_trains.columns:\n",
+ " if df_trains[i].isnull().sum() > 0:\n",
+ " print(i, ' - ', df_trains[i].isnull().sum())\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Creating dictionaries and setting numeric values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 420,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 0\n",
+ "1 0\n",
+ "2 0\n",
+ "3 0\n",
+ "4 0\n",
+ " ..\n",
+ "1455 0\n",
+ "1456 0\n",
+ "1457 0\n",
+ "1458 0\n",
+ "1459 0\n",
+ "Name: PoolQC, Length: 1460, dtype: int32"
+ ]
+ },
+ "execution_count": 420,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#LotShape\n",
+ "dict_lotShape = {3:'Reg', 2:'IR1', 1:'IR2', 0:'IR3'}\n",
+ "df_trains.LotShape = df_trains.LotShape.str.replace('Reg','3').str.replace('IR1','2').str.replace('IR2','1').str.replace('IR3','0')\n",
+ "df_trains.LotShape.astype('int')\n",
+ "#ExterQual\n",
+ "dict_ExterQual = {4:'Ex', 3:'Gd', 2:'TA', 1:'Fa', 0:'Po'}\n",
+ "df_trains.ExterQual = df_trains.ExterQual.str.replace('Ex','4').str.replace('Gd','3').str.replace('TA','2').str.replace('Fa','1').str.replace('Po','0')\n",
+ "df_trains.ExterQual.astype('int')\n",
+ "#ExterCond\n",
+ "dict_ExterCond = {4:'Ex', 3:'Gd', 2:'TA', 1:'Fa', 0:'Po'}\n",
+ "df_trains.ExterCond = df_trains.ExterCond.str.replace('Ex','4').str.replace('Gd','3').str.replace('TA','2').str.replace('Fa','1').str.replace('Po','0')\n",
+ "df_trains.ExterCond.astype('int')\n",
+ "#BsmtQual\n",
+ "dict_BsmtQual = {5:'Ex', 4:'Gd', 3:'TA', 2:'Fa', 1:'Po', 0:'NA'}\n",
+ "df_trains.BsmtQual = df_trains.BsmtQual.str.replace('Ex','5').str.replace('Gd','4').str.replace('TA','3').str.replace('Fa','2').str.replace('Po','1').str.replace('NA','0')\n",
+ "df_trains.BsmtQual = df_trains.BsmtQual.fillna('0')\n",
+ "df_trains.BsmtQual.astype('int')\n",
+ "#BsmtCond\n",
+ "dict_BsmtCond = {5:'Ex', 4:'Gd', 3:'TA', 2:'Fa', 1:'Po', 0:'NA'}\n",
+ "df_trains.BsmtCond = df_trains.BsmtCond.str.replace('Ex','5').str.replace('Gd','4').str.replace('TA','3').str.replace('Fa','2').str.replace('Po','1').str.replace('NA','0')\n",
+ "df_trains.BsmtCond = df_trains.BsmtCond.fillna('0')\n",
+ "df_trains.BsmtCond.astype('int')\n",
+ "#BsmtExposure\n",
+ "dict_BsmtExposure = {4:'Gd', 3:'Av', 2:'Mn', 1:'No', 0:'NA'}\n",
+ "df_trains.BsmtExposure = df_trains.BsmtExposure.str.replace('Gd','4').str.replace('Av','3').str.replace('Mn','2').str.replace('No','1').str.replace('NA','0')\n",
+ "df_trains.BsmtExposure = df_trains.BsmtExposure.fillna('0')\n",
+ "df_trains.BsmtExposure.astype('int')\n",
+ "#HeatingQC\n",
+ "dict_HeatingQC = {4:'Ex', 3:'Gd', 2:'TA', 1:'Fa', 0:'Po'}\n",
+ "df_trains.HeatingQC = df_trains.HeatingQC.str.replace('Ex','4').str.replace('Gd','3').str.replace('TA','2').str.replace('Fa','1').str.replace('Po','0')\n",
+ "df_trains.HeatingQC.astype('int')\n",
+ "#CentralAir\n",
+ "dict_CentralAir = {1:'Y', 0:'N'}\n",
+ "df_trains.CentralAir = df_trains.CentralAir.str.replace('Y','1').str.replace('N','0')\n",
+ "df_trains.CentralAir.astype('int')\n",
+ "#KitchenQual\n",
+ "dict_KitchenQual = {4:'Ex', 3:'Gd', 2:'TA', 1:'Fa', 0:'Po'}\n",
+ "df_trains.KitchenQual = df_trains.KitchenQual.str.replace('Ex','4').str.replace('Gd','3').str.replace('TA','2').str.replace('Fa','1').str.replace('Po','0')\n",
+ "df_trains.KitchenQual.astype('int')\n",
+ "#FireplaceQu\n",
+ "dict_FireplaceQu = {5:'Ex', 4:'Gd', 3:'TA', 2:'Fa', 1:'Po', 0:'NA'}\n",
+ "df_trains.FireplaceQu = df_trains.FireplaceQu.str.replace('Ex','5').str.replace('Gd','4').str.replace('TA','3').str.replace('Fa','2').str.replace('Po','1').str.replace('NA','0')\n",
+ "df_trains.FireplaceQu = df_trains.FireplaceQu.fillna('0')\n",
+ "df_trains.FireplaceQu.astype('int')\n",
+ "#GarageFinish\n",
+ "dict_GarageFinish = {3:'Fin', 2:'RFn', 1:'Unf', 0:'NA'}\n",
+ "df_trains.GarageFinish = df_trains.GarageFinish.str.replace('Fin','3').str.replace('RFn','2').str.replace('Unf','1').str.replace('NA','0')\n",
+ "df_trains.GarageFinish = df_trains.GarageFinish.fillna('0')\n",
+ "df_trains.GarageFinish.astype('int')\n",
+ "#GarageQual\n",
+ "dict_GarageQual = {5:'Ex', 4:'Gd', 3:'TA', 2:'Fa', 1:'Po', 0:'NA'}\n",
+ "df_trains.GarageQual = df_trains.GarageQual.str.replace('Ex','5').str.replace('Gd','4').str.replace('TA','3').str.replace('Fa','2').str.replace('Po','1').str.replace('NA','0')\n",
+ "df_trains.GarageQual = df_trains.GarageQual.fillna('0')\n",
+ "df_trains.GarageQual.astype('int')\n",
+ "#GarageCond\n",
+ "dict_GarageCond = {5:'Ex', 4:'Gd', 3:'TA', 2:'Fa', 1:'Po', 0:'NA'}\n",
+ "df_trains.GarageCond = df_trains.GarageCond.str.replace('Ex','5').str.replace('Gd','4').str.replace('TA','3').str.replace('Fa','2').str.replace('Po','1').str.replace('NA','0')\n",
+ "df_trains.GarageCond = df_trains.GarageCond.fillna('0')\n",
+ "df_trains.GarageCond.astype('int')\n",
+ "#PoolQC\n",
+ "dict_PoolQC = {4:'Ex', 3:'Gd', 2:'TA', 1:'Fa', 0:'NA'}\n",
+ "df_trains.PoolQC = df_trains.PoolQC.str.replace('Ex','4').str.replace('Gd','3').str.replace('TA','2').str.replace('Fa','1').str.replace('NA','0')\n",
+ "df_trains.PoolQC = df_trains.PoolQC.fillna('0')\n",
+ "df_trains.PoolQC.astype('int')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 421,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "LotFrontage - 259\n",
+ "Alley - 1369\n",
+ "MasVnrType - 8\n",
+ "MasVnrArea - 8\n",
+ "BsmtFinType1 - 37\n",
+ "BsmtFinType2 - 38\n",
+ "Electrical - 1\n",
+ "GarageType - 81\n",
+ "GarageYrBlt - 81\n",
+ "Fence - 1179\n",
+ "MiscFeature - 1406\n"
+ ]
+ }
+ ],
+ "source": [
+ "for i in df_trains.columns:\n",
+ " if df_trains[i].isnull().sum() > 0:\n",
+ " print(i, ' - ', df_trains[i].isnull().sum())\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Checking this information we can do following assumptions:\n",
+ "Alley:\\\n",
+ " It is not relevant since there are more recors with no alley access to the ones that have, so this information could be converted into outliers \\\n",
+ " Grvl\tGravel \\\n",
+ " Pave\tPaved \\\n",
+ " NA \tNo alley access\n",
+ "\n",
+ "Records with MasVnrType, MasVnrArea has 8 nulls in there, so those records can be deleted\n",
+ "\n",
+ "Electrical: \\\n",
+ " it is not relevant, there is just 1 record null, so this record can be deleted\n",
+ "\n",
+ "GarageType: \\\n",
+ " NA values could be set as NA instead of nan\n",
+ "\n",
+ "GarageYrBlt: \\\n",
+ " since there are 81 null values, year build values will be considered to fill the missing ones.\n",
+ "\n",
+ "Fence: \\\n",
+ "cannot assign numeric value in there since values are not as logical as thought, so this column should be deleted\n",
+ " \n",
+ "MiscFeature: \\\n",
+ "since there are several records with null value, has been decided to remove this column because it can generate outliers. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 422,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\David\\AppData\\Local\\Temp/ipykernel_7488/2391990203.py:17: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " df_trains['GarageYrBlt'][i]= df_trains['YearBuilt'] [i]\n"
+ ]
+ }
+ ],
+ "source": [
+ "#removing columns\n",
+ "df_trains.drop(['Alley','MiscFeature','Fence','LotFrontage'], axis=1, inplace=True)\n",
+ "\n",
+ "#Removing rows\n",
+ "\n",
+ "#Electrical\n",
+ "df_trains.drop(df_trains[df_trains['Electrical'].isnull()].index, inplace=True)\n",
+ "# There is a relation between MasVnrType and MasVnrArea, so if one of them is removed the other one too\n",
+ "df_trains.drop(df_trains[df_trains['MasVnrType'].isnull()].index, inplace=True)\n",
+ "#there is a raltion between BsmtinType1 and BsmtinType2 however, there was just one more value set as null in BsmtFinType2 so this column has been considered to be removed.\n",
+ "df_trains.drop(df_trains[df_trains['BsmtFinType2'].isnull()].index, inplace=True)\n",
+ "#adding values in GarageYrBlt and GarageType\n",
+ "df_trains['GarageType'] = df_trains['GarageType'].fillna('NA')\n",
+ "#df_trains.reset_index(inplace=True)\n",
+ "df_trains[df_trains['GarageYrBlt'].isnull()]['GarageYrBlt'].index\n",
+ "for i in df_trains[df_trains['GarageYrBlt'].isnull()]['GarageYrBlt'].index:\n",
+ " df_trains['GarageYrBlt'][i]= df_trains['YearBuilt'] [i]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 423,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for i in df_trains.columns:\n",
+ " if df_trains[i].isnull().sum() > 0:\n",
+ " print(i, ' - ', df_trains[i].isnull().sum())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "There are some column names that starts with number, so a character will be added at the beginning in order to complete every analysis needed"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 424,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for i in df_trains.columns:\n",
+ " if i[0].isdigit():\n",
+ " df_trains.rename(columns={i:'_'+i},inplace=True)\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Have checked if there are no more columns with nulls in there, there were not.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now, will check which distribution would fit it"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 425,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 425,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAA3sAAAFzCAYAAACHARCnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAABJY0lEQVR4nO3dd5hcZd3/8c93Zna2l2xJdpMtqaSQQAIhoSugoYoNFESxoOij/ixYHtHH9qiP2FCxoFixodIRQULvpAAhCUk2vewmW5NsbzNz//7Y2bAJm2ST7OyZ8n5d17nmzH3OzHx2zmay37nvc25zzgkAAAAAkFx8XgcAAAAAAIw8ij0AAAAASEIUewAAAACQhCj2AAAAACAJUewBAAAAQBKi2AMAAACAJBTwOsCxKC4udhMnTvQ6BgAAAAB44sUXX2xyzpUMtS2hi72JEydq+fLlXscAAAAAAE+Y2baDbWMYJwAAAAAkIYo9AAAAAEhCFHsAAAAAkIQo9gAAAAAgCVHsAQAAAEASotgDAAAAgCREsQcAAAAASYhiDwAAAACSEMUeAAAAACQhij0AAAAASEIxK/bM7Pdm1mBmqwe1/cPMVkSXrWa2Ito+0cy6Bm37VaxyAQAAAEAqCMTwuf8o6eeS/jTQ4Jx798C6mf1IUsug/Tc55+bGMA8AAAAApIyYFXvOuafMbOJQ28zMJL1L0rmxen0AAAAASGVenbN3lqR659yGQW2TzOxlM3vSzM462APN7FozW25myxsbG2OfFAmjorJKZnZUS0VlldfxAQAAgBEVy2Gch3KlpNsG3d8lqdI512xmJ0u6x8yOd861HvhA59wtkm6RpPnz57tRSYuEULNju25cXH1Uj71u0fQRTgMAAAB4a9R79swsIOkdkv4x0Oac63HONUfXX5S0SdJxo50NAAAAAJKFF8M43yRpnXOuZqDBzErMzB9dnyxpmqTNHmQDAAAAgKQQy6kXbpP0vKTpZlZjZtdEN12h/YdwStLZklZGp2K4Q9LHnHO7Y5UNAAAAAJJdLK/GeeVB2j8wRNudku6MVRYAAAAASDVeXY0TAAAAABBDFHsAAAAAkIQo9gAAAAAgCVHsAQAAAEASotgDAAAAgCREsQcAAAAASYhiDwAAAACSEMUeIEnmk5kd9VJRWeX1TwAAAADsJ2aTqgMJxUV04+Lqo374dYumj2AYAAAA4NjRswcAAAAASYhiDwAAAACSEMUeAAAAACQhij0AAAAASEIUewAAAACQhCj2AAAAACAJUewBAAAAQBKi2AMAAACAJESxBwAAAABJiGIPAAAAAJIQxR4AAAAAJCGKPQAAAABIQhR7AAAAAJCEKPYAAAAAIAlR7AEAAABAEqLYAwAAAIAkRLEHAAAAAEmIYg8AAAAAkhDFHgAAAAAkIYo9AAAAAEhCFHsAAAAAkIQo9gAAAAAgCVHsAQAAAEASotgDAAAAgCREsQcAAAAASYhiDwAAAACSUMyKPTP7vZk1mNnqQW3fMLNaM1sRXS4atO16M9toZtVmdn6scgEAAABAKohlz94fJV0wRPuPnXNzo8sDkmRmsyRdIen46GN+aWb+GGYDAAAAgKQWiNUTO+eeMrOJw9z9rZL+7pzrkbTFzDZKWiDp+VjlAw4Ujjg1tfeouy+snlBEPX0R5WQENLEoS2bmdTwAAADgiMSs2DuET5rZ1ZKWS/qcc26PpAmSXhi0T020DRgVaUUV+sfyHWps63ndtqLsoE6ZWKhp43Lko+gDAABAghjtYu9mSd+S5KK3P5L0oSN5AjO7VtK1klRZWTnS+ZBinHN6paZFpe//idq7Q3rTzLEqyAoqI+BTMOBT7d4uLduyR/95tU5LtqTp7ONKNLEo2+vYAAAAwGGN6tU4nXP1zrmwcy4i6TfqH6opSbWSKgbtWh5tG+o5bnHOzXfOzS8pKYltYCS17r6w7lmxU0+ub1T3tld01cJKHT8+XxMKMlWUk67cjDTNKM3Te0+t1EVzSmVmuu+VnVq7q9Xr6AAAAMBhjWqxZ2Zlg+6+XdLAlTrvk3SFmaWb2SRJ0yQtHc1sSC3OOT2ytl41ezp1zvQSNd75v8pOH7qj28w0bWyu3j2/QuUFmVq8pl4rduwd3cAAAADAEYrZME4zu03SGyUVm1mNpK9LeqOZzVX/MM6tkj4qSc65V83sn5LWSApJ+oRzLhyrbMCq2hZtauzQWdOKdUJ5wbAeEwz4dOnc8frP6jo9ub5RPX1hLZhUyMVbAAAAEJdieTXOK4do/t0h9v+OpO/EKg8woKm9R09taFJVYZbmVRQc0WMDPp8uml2mR9bV64Utu2VmWjCpMDZBAQAAgGPgxdU4Ac+EwhH9Z3Wdgn6f3jxr3FH1yvl8pjfPHCfnpOc3N6skNz0GSQEAAIBjM6rn7AFee3pjk5o7enX+8eMOeo7ecJiZzp0xViU56Xro1ToFCsoO/yAAAABgFFHsIWXUtXRrZU2L5lUWqGoEpk9I8/t08QllMkklb/+yOntDxx4SAAAAGCEUe0gZS7fuVkbAp1MnFY3Yc+ZnpumC2aVKK6nSl+5cJefciD03AAAAcCwo9pASGtt6tKWpQ/MqxygYGNlf+6qibO196s+675Wd+vuyHSP63AAAAMDRothDSli2dbeCfp9OLM+PyfO3vnCHTp9SpO/8e61q93bF5DUAAACAI0Gxh6S3u6NXGxradWJFvtLT/DF6FafvvfMERZzTl+5cyXBOAAAAeI5iD0lv2dbdCvhMc49wTr0jVVGYpesvmqmnNzQd0XDOisoqmdlRLxWVVTH8qQAAAJComGcPSW1vZ6+q69s0t6JAWcHY/7pftaBSD67ape/8e63OPq5EEwoyD/uYmh3bdePi6qN+zesWTT/qxwIAACB50bOHpPbitj3ymemkyjGj8no+nzGcEwAAAHGBYg9Jq7svrLW72jSrLE85xzCB+pEaPJzznhW1o/a6AAAAwGAUe0ha6+vbFHZOsyfkjfprX7WgUidWFOg7/16nlq6+UX99AAAAgGIPSWvtrjYV5QRVkpM+6q/t85m+/dbZau7o0Y8fXj/qrw8AAABQ7CEp7enoVV1rt2aW5snMPMkwpzxf711YpT89v1Wra1s8yQAAAIDURbGHpLS2rlUmaUZprqc5Pr9ousZkBfXVe1crEuFiLQAAABg9FHtIOs45rd3VpsqiLGWP4oVZhpKflabrL5qpl7fv1e0vDn/uPQAAAOBYUewh6dTs6VJ7T0gzS0f/wixDeedJE3TKxDG64UEu1gIAAIDRQ7GHpLO2rlVBv09TSrK9jiJJMjN949LjtberTz97dIPXcQAAAJAiKPaQVPrCEW1saNe0cTkK+OPn1/v48fl69/wK/fG5rdrc2O51HAAAAKSA+PlrGBgBGxva1Rd2cTOEc7DPLZqujDS//u+BdV5HAQAAQAqg2ENSWVfXpryMgMYXZHgd5XVKctP1iXOm6pG19XpmQ5PXcQAAAJDkKPaQNCw9WzV7OjVtXK5nc+sdzgfPmKjyMZn61v1rFApHvI4DAACAJEaxh6SROflkRZzi5sIsQ8lI8+vLF81UdX2b/r6MqRgAAAAQOxR7SBpZUxcqM82vcXnxN4RzsAtnl2rBxEL9+OH1au8JeR0HAAAASYpiD0mhNxRR5uSTNak4W744HcI5wMx0/UUz1NzRq1ue2ux1HAAAACQpij0khWVbd8uXkRPXQzgHm1c5RhfPKdNvn94sX3aB13EAAACQhCj2kBQeXlOvSF+PKgqzvI4ybF84f7p6QxEVnPEer6MAAAAgCVHsIeE55/Twmnp1b12htDiaSP1wJhZn6z0LK5Vz4vna09HrdRwAAAAkmcT5yxg4iHV1bard26WujS94HeWIfeq8aXJ9PXp2E/PuAQAAYGRR7CHhPbymXmZS58ZlXkc5YsU56Wpdepc2NXZoV0uX13EAAACQRCj2kPAeWVuvuRUFinTu9TrKUWlddo+ygn49u7FZzjmv4wAAACBJUOwhodW1dGtlTYveNHOc11GOmuvr1oKJhard26Xtuzu9jgMAAIAkQbGHhPbounpJ0qJZHhd75pOZHdUiScdPyFNuRkDPb6Z3DwAAACMj4HUA4Fg8vq5BFYWZmjo2x9sgLqIbF1cf1UOvWzRdAZ9PCycV6pG1Ddrc1KEpJR7/PAAAAEh49OwhYfWFI3ph826dPa1kXw9ZIptZmqeCzDR69wAAADAiYlbsmdnvzazBzFYPavuBma0zs5VmdreZFUTbJ5pZl5mtiC6/ilUuJI8VO/aqvSeks6YVex1lRPh8plMnF6m5vVfr69u9jgMAAIAEF8uevT9KuuCAtoclzXbOnSBpvaTrB23b5JybG10+FsNcSBJPb2iSz6TTpiRHsSdJx43LUVFOUC9sblYkQu8eAAAAjl7Mij3n3FOSdh/Qttg5F4refUFSeaxeH8nvmQ2NOqG8QPmZaV5HGTFmptMmF2lvV5/W1rV6HQcAAAAJzMtz9j4k6cFB9yeZ2ctm9qSZneVVKCSG1u4+vVLTkjRDOAebXJytsbnpWrZ1D717AAAAOGqeFHtm9hVJIUl/jTbtklTpnJsn6TpJfzOzvIM89lozW25myxsbG0cnMOLO85uaFY44nTk1+Yo9M9PCSYVq6epTdX2b13EAAACQoEa92DOzD0i6RNJVLnrJQedcj3OuObr+oqRNko4b6vHOuVucc/Odc/NLSkpGKTXizTMbmpQV9Gte5Rivo8TEpOJsFecEtXTLbnr3AAAAcFRGtdgzswskfVHSpc65zkHtJWbmj65PljRN0ubRzIbE8szGJi2cVKhgIDlnD+nv3es/d299A717AAAAOHKxnHrhNknPS5puZjVmdo2kn0vKlfTwAVMsnC1ppZmtkHSHpI8553YP9bxAzZ5ObWnq0JnTkrtnd0pJtooGeveYdw8AAABHKBCrJ3bOXTlE8+8Osu+dku6MVRYkl2c2NElSUl6cZTAz08KJhXpgdZ02NrTruHG5XkcCAABAAknOMXBIak9vbNK4vHRNG5vjdZSYmzo2R4XZ/b17jt49AAAAHAGKPSSUSMTpuY1NOmNqsczM6zgxZ2ZaMLFQzR292tTY4XUcAAAAJBCKPSSUV3e2ak9nX9IP4Rxs2tgc5Wemafk2evcAAAAwfBR7SCjPbuo/X++MJJxf72B8PtNJlQWqb+1R7d4ur+MAAAAgQVDsIaEs3bJbU0qyNTY3w+soo2pWWZ6ygn4t37rH6ygAAABIEBR7SBjhiNOyrbu1YFKh11FGXcDv09yKAm3b3anGth6v4wAAACABUOwhYVTXtamtO5SSxZ4knTAhX0G/T8u3MQUlAAAADo9iD3GjorJKZnbQ5dS3XCVJuvwNJw25Pdmlp/k1Z0K+NtS3q6Wrz+s4AAAAiHMxm1QdOFI1O7brxsXVB93+71W7VN/arR/c+cyQ269bND1W0eLG3MoCrdixVy9u26NzZ4z1Og4AAADiGD17SAjOOdXu6dKEgkyvo3gqJz2gGWW5WrOrVZ29Ia/jAAAAII5R7CEh7O3sU1dfOOWLPUk6qXKMwhGnVTUtXkcBAABAHKPYQ0IYmF+OYk8qzA6qqihLK2tbFIpEvI4DAACAOEWxh4RQu7dLmWl+FWSleR0lLsyrKFBnb1jr69q9jgIAAIA4RbGHhFC7t0sTxmSmxFU3h6OyMEtF2UG9vINJ1gEAADA0ij3EvdauPrV1hxjCOYiZaV5lgZrae5VReYLXcQAAABCHKPYQ93Zyvt6Qpo/LVWaaX7mnvM3rKAAAAIhDFHuIe7V7uxQM+FSUE/Q6SlwJ+H06oTxfWVMXaFMj5+4BAABgfxR7iHu1e7s0Pj9DPs7Xe505E/LlQn36w7NbvI4CAACAOEOxh7jW2RvSns4+TRjDEM6hZKcH1LHmCd35Yq1auvq8jgMAAIA4QrGHuMb8eofX9tL96uoL666XaryOAgAAgDhCsYe4tnNPtwI+09jcDK+jxK3e+k2aW1GgP7+wTc45r+MAAAAgTlDsIa7V7u1SaX6G/D7O1zuUq0+r0ubGDj23qdnrKAAAAIgTFHuIWz19YTW29zCEcxgumlOmMVlp+vPz27yOAgAAgDhBsYe4tbOlWxLn6w1HRppf7zqlQg+vrdeuli6v4wAAACAOUOwhbtXu7ZLPpNJ8ztcbjvcurFLEOd22dIfXUQAAABAHKPYQt3bu7dK4vAyl+fk1HY6KwiydM32sblu6Xb2hiNdxAAAA4DH+ikZc6gtHVN/azRDOI/S+U6vU2NajxWvqvI4CAAAAj1HsIS7VtXQr4qTxFHtH5OzjSlRRmMmFWgAAAECxh/g0MJn6+ALO1zsSfp/pvQurtGTLblXXtXkdBwAAAB6i2ENcqt3bpZLcdKUH/F5HSTiXz69QMODTX16gdw8AACCVUewh7oQjTnUtnK93tAqzg7rkhDLd9VKN2ntCXscBAACARyj2EHca2roVijiKvWNw9WkT1dEb1t0v1XgdBQAAAB6h2EPcqd3D+XrH6sTyfM2ZkK8/v7BNzjmv4wAAAMADFHuIO7V7uzQmK01ZwYDXURKWmel9p1ZpfX27lm7Z7XUcAAAAeIBiD3El4px2cr7eiHjLieOVn5mmP3GhFgAAgJREsYe40tzeq95QRBPGUOwdq8ygX5efXK6HVtepobXb6zgAAAAYZTEt9szs92bWYGarB7UVmtnDZrYhejsm2m5mdpOZbTSzlWZ2UiyzIT4NzK9Hz97IuOrUKoUiTn9ftsPrKAAAABhlse7Z+6OkCw5o+5KkR51z0yQ9Gr0vSRdKmhZdrpV0c4yzIQ7V7ulSXkZAuRlpXkdJCpOKs3XWtGLdtnS7QuGI13EAAAAwimJa7DnnnpJ04NUh3irp1uj6rZLeNqj9T67fC5IKzKwslvkQf2r3dmk8vXpHxnwys4Mud37vs9rV0q28Gae/bltFZZXX6QEAABAjXlzucJxzbld0vU7SuOj6BEmDx5rVRNt2DWqTmV2r/p4/VVZWxjYpRlWgcIK6+sIM4TxSLqIbF1cfdHMk4vSH57aq6iPf1dvmTdhv23WLpsc6HQAAADzi6QVaXP8EYEc0CZhz7hbn3Hzn3PySkpIYJYMXMipmSxIXZxlhPp/p+PF52ra7Uy1dfV7HAQAAwCgZVrFnZmcMp22Y6geGZ0ZvG6LttZIqBu1XHm1DikgvP15ZQb8KMjlfb6TNHp8vM2lVbYvXUQAAADBKhtuz97Nhtg3HfZLeH11/v6R7B7VfHb0q56mSWgYN90QKyKiYrQkFmTIzr6MknZyMgCYXZ2vNzlaFIlyoBQAAIBUc8pw9MztN0umSSszsukGb8iT5D/fkZnabpDdKKjazGklfl3SDpH+a2TWStkl6V3T3ByRdJGmjpE5JHzyinwQJrWZPpwL5Y7k4SwzNmZCvTY0d2tjQrhmleV7HAQAAQIwd7gItQUk50f1yB7W3SrrscE/unLvyIJvOG2JfJ+kTh3tOJKelW/ov2srFWWKnsjBL+ZlpWlXTQrEHAACQAg5Z7DnnnpT0pJn90Tm3bZQyIQUt27pb4e52FeUEvY6StMxMcybk65mNTWpq71FxTrrXkQAAABBDwz1nL93MbjGzxWb22MAS02RIKUu27FZPzRr5OF8vpmaNz5PfZ1pVw4VaAAAAkt1w59m7XdKvJP1WUjh2cZCKGtt6tLmxQz07VnsdJellpvk1bWyO1tW16YypxV7HAQAAQAwNt9gLOedujmkSpKxlW/vP1+um2BsVJ5Tna11dm6rr27yOAgAAgBga7jDOf5nZx82szMwKB5aYJkPKWLpltzLT/Oqt3+R1lJRQmpeh4pwgQzkBAACS3HB79gbmxfvCoDYnafLIxkEqWrJlt06qKtC6CCOER8PAhVoer25UcPx0r+MAAAAgRobVs+ecmzTEQqGHY9bS1ad1da1aMLHI6ygpZUZpntL8pty5F3odBQAAADEyrJ49M7t6qHbn3J9GNg5SzYvbdss5acEkRgWPpmDApxmleXplxlna29mrgiymvAAAAEg2wz1n75RBy1mSviHp0hhlQgpZsmW30vymeZUFXkdJOXMm5MuXlq47XqzxOgoAAABiYFg9e865/zf4vpkVSPp7LAIhtSzdslsnlBcoI83vdZSUU5Kbru6aNfrrkmx96IxJ8vmY4xAAACCZDLdn70AdkiaNZBCkns7ekFbVtDCE00PtLz+gLU0demZjk9dRAAAAMMKGVeyZ2b/M7L7o8m9J1ZLujm00JLsV2/cqFHEUex7qqH5GxTlB3frcVq+jAAAAYIQNd+qFHw5aD0na5pzjRB8ckyVbdstn0slVY7yOkrrCIb1nYZV+9tgGbW3q0MTibK8TAQAAYIQMd+qFJyWtk5QraYyk3liGQmpYumW3Zo3PU15GmtdRUtp7F1bKb6Zbn9/qdRQAAACMoOEO43yXpKWSLpf0LklLzOyyWAZDcusNRfTS9j3MrxcHxuZl6OITynT78hq194S8jgMAAIARMtwLtHxF0inOufc7566WtEDSV2MXC8luVe1e9YQiWjCJIZzx4INnTFJ7T0h3Mg0DAABA0hhusedzzjUMut98BI8FXmfJlt2SpFMmcnGWeDC3okBzKwp063NbFYk4r+MAAABgBAy3YPuPmT1kZh8wsw9I+rekB2IXC8lu2Zbdmjo2R0U56V5HQdQHz5iozU0dempDo9dRAAAAMAIOWeyZ2VQzO8M59wVJv5Z0QnR5XtIto5APSSgccVq+dQ9TLsSZC2eXqSQ3XX94dqvXUQAAADACDtez9xNJrZLknLvLOXedc+469c+x95PYRkOyWrurVW09IS2k2IsrwYBP7zu1Sk+ub9SG+jav4wAAAOAYHa7YG+ecW3VgY7RtYkwSIekt5Xy9uPXeU6uUkebTLU9t9joKAAAAjtHhir2CQ2zLHMEcSCFLt+xW+ZhMjS/gVyjeFGYH9e75FbpnRa3qWrq9jgMAAIBjcLhib7mZfeTARjP7sKQXYxMJySwScVqypVkLJzG/Xrz68FmTFY44/eG5LV5HAQAAwDEIHGb7ZyTdbWZX6bXibr6koKS3xzAXktT6hjbt6ezTaVMo9uJVRWGWLppTpr+9sF2fOGeq8jLSvI4EAACAo3DInj3nXL1z7nRJ35S0Nbp80zl3mnOuLvbxkGye39QsSVycJc599OwpausJ6bYl272OAgAAgKN0uJ49SZJz7nFJj8c4C1LAC5ubVT4mUxWFWV5HwSHMKc/X6VOK9Ptnt+iDZ0xSMDDcKTkBAAAQL/gLDqOm/3y93TptMkM4E8FH3zBF9a09undFrddRAAAAcBQo9jBq1tW1aW9nn06l2EsIZ08r1ozSXP3qyU0KR5zXcQAAAHCEKPYwal7Y3H++3qlcnCUhmJk+ee5UbWrs0P0rd3odBwAAAEeIYg+j5oXNzaoszNIE5tdLGBfNLtNx43J006Mb6N0DAABIMBR7GBUD5+udOpmrcCYSn8/06fOOo3cPAAAgAVHsYVSsrWtVSxfz6yWiC2eXavq4XP2U3j0AAICEQrGHUfHa/HoUe4nG5zN9+k3TtLmxQ/96hd49AACAREGxh1HxwubdqirK0njO10tIFxxfqhmlubrp0Q0KhSNexwEAAMAwUOwh5sIRp6VbmplfL4H5fKbPvGmaNjd16D569wAAABLCqBd7ZjbdzFYMWlrN7DNm9g0zqx3UftFoZ0NsrN3VqtbuEPPrJbhFs0o1syxPNz68Xj2hsNdxAAAAcBijXuw556qdc3Odc3MlnSypU9Ld0c0/HtjmnHtgtLMhNvbNr0exl9B8PtOXL5qhmj1duvW5rV7HAQAAwGF4PYzzPEmbnHPbPM6BGHp2Y5MmFWerND/D6yg4RmdNK9Ebp5foZ49t1O6OXq/jAAAA4BC8LvaukHTboPufNLOVZvZ7MxvjVSiMnN5QREu27NaZU4u9joIR8uWLZqqjJ6SbHt3gdRQAAAAcgmfFnpkFJV0q6fZo082SpkiaK2mXpB8d5HHXmtlyM1ve2Ng4GlFxDFbs2KvO3rDOoNhLGseNy9UVCyr1lxe2aXNju9dxAAAAcBBe9uxdKOkl51y9JDnn6p1zYedcRNJvJC0Y6kHOuVucc/Odc/NLSkpGMS6OxjMbm+QzMZl6kvnsm45TesCnGx5c53UUAAAAHISXxd6VGjSE08zKBm17u6TVo54II+6ZDY06obxA+ZlpXkfBCCrJTdfHz5mqxWvq912ABwAAAPHFk2LPzLIlvVnSXYOav29mq8xspaRzJH3Wi2wYOa3dfXqlpoXz9ZLUNWdO0oSCTH3t3tXqDTHROgAAQLzxpNhzznU454qccy2D2t7nnJvjnDvBOXepc26XF9kwcpZs3q1wxHG+XpLKSPPrf996vNbXt+s3T2/2Og4AAAAO4PXVOJHEnt3YpIw0n06qKvA6CmLkvJnjdOHsUt306AZta+7wOg4AAAAGodhDzDyzsUkLJhUpPeD3Ogpi6OtvOV5pfp/+557Vcs55HQcAAABRFHuIibqWbm1saNeZU7kKZ7Irzc/QF86frqc3NOm+V3Z6HQcAAABRFHuIiWc2NkkS5+uliPeeWqUTyvP1rfvXqKWzz+s4AAAAEMUeYuTZjU0qyg5qZmme11EwCvw+0/+9fY72dPbpa/cxawoAAEA8oNjDiHPO6ZmNTTp9arF8PvM6DkbJ7An5+tS503Tvip0M5wQAAIgDFHsYcRsa2tXY1sP5einoE+dM0bzKAv3P3au0q6XL6zgAAAApjWIPI+6p9Y2SOF8vFQX8Pv34XXPVF3b6/O2vKBLh6pwAAABeodjDiHtyfaOmlGSrfEyW11HggYnF2frqJbP07MZm/fG5rV7HAQAASFkUexhRnb0hLdm8W+dMH+t1FHjoygUVOm/GWN3wn3Vas7N1yH0qKqtkZke9VFRWjfJPBQAAkFgCXgdAcnluY7N6wxG9kWIvpZmZvnfZCbrkpmf0X399Ufd98kzlZ6btt0/Nju26cXH1Ub/GdYumH2tMAACApEbPHkbUE+sblBX065RJY7yOguEwX8x614pz0vWLq+apdk+XvnD7K3KO8/cAAABGEz17GDHOOT1R3ajTpxQpPeD3Og6Gw0Vi2rt2clWhrr9opr51/xrd8tRmffQNU476tQAAAHBk6NnDiNnU2KGaPV16A0M4MciHzpioi+eU6fsPVWvJ5mav4wAAAKQMij2MmCeqGyRJbzyuxOMkGDXDGAbq8/l084fOVHfjDl1244MK5JXIzLxODgAAkPQYxokR8+T6Rk0dm6OKQqZcSBlHMAy0ub1H/1xeo3n/fZsuP7lc/33hzBiHAwAASG307GFEDEy5QK8eDqYoJ10XzC5VU1uPHnq1ThK9ewAAALFEsYcR8fwmplzA4U0qztaZ04q1qbFD+Wdd5XUcAACApEaxhxHxRHUjUy5gWOZVFGhWWZ4KTr9C6+qGnnAdAAAAx45iD8fMOafHqxuYcgHDYmY6d8ZYdW9fpUfWNKhmT6fXkQAAAJISxR6OGVMu4Ej5fabGu7+j/Mw0/WvlLjW193gdCQAAIOlQ7OGYPbK2XpJ03gyKPQxfpLtdb503Xml+070rdqqtu8/rSAAAAEmFYg/HbPGrdZo9IU/jCzK9joIEk5eRpreeOEG9oYjuWbFT3X1hryMBAAAkDYo9HJPGth69vGOv3jyz1OsoSFAluem65IQy7e3s1b9W7lQoHPE6EgAAQFKg2MMxeXRtvZyTFh0/zusoSGAVhVlaNKtUO/d266FX6xVxzutIAAAACY9iD8dk8Zp6lY/J1IzSXK+jIMFNL83VWdOKtbGxXU+tb5Sj4AMAADgmFHs4ah09IT2zsUlvnjVOZuZ1HCSBkyrHaF5lgV6padGL2/Z4HQcAACChUezhqD29oVG9oYgWzeo/X6+iskpmdtQLIElnTS3WceNy9OymZq3ZyaTrAAAARyvgdQDEl4rKKtXs2D6sfYsu+qwyp56i06eNlVz/RTVuXFx91K993aLpR/1YJA8z05tnjVNXX1iPrK1XMODT1LE5XscCAABIOBR72E/Nju3DKtgiEaffPL1ZE4uzdd1DayVRrGHkBHw+XTJnvO5+uVb/WV2nS+eOV2VhltexAAAAEgrDOHFUdrZ0qTsU0eSSbK+jIEkFAz69de54FWSn6f6VO7WrpcvrSAAAAAmFYg9HZVNjh/w+U1UhxR5iJyPNr7fPnaCsYED3rtippvYeryMBAAAkDIo9HDHnnDY3tqtiTKaCAX6FEFvZ6QG9Y94Epfl9uvvlWu3t7PU6EgAAQELgL3Ucsca2HrV2hzSlhItmYHTkZabp7fMmKOKc7nq5Vu3dIa8jAQAAxD2KPRyx9Q3t8pk0hSskYhQVZgf1trkT1NMX0d0v18qXmed1JAAAgLhGsYcj4pzThvo2VRRmKTPN73UcpJhxeRl6y4llaunu09jLv6m27j6vIwEAAMQtz4o9M9tqZqvMbIWZLY+2FZrZw2a2IXo7xqt8GFpDdAjnNHr14JHyMVm6eE6ZgmMn6cO3Lld3X9jrSAAAAHHJ6569c5xzc51z86P3vyTpUefcNEmPRu8jjmyojw7h5Hw9eGhScbaa/n2jlm7drY//9SX1hSNeRwIAAIg7Xhd7B3qrpFuj67dKept3UXAg55zWN7SpsjBLGQzhhMc61z6lb79tth5b16DP3/6KIhHndSQAAIC44mWx5yQtNrMXzezaaNs459yu6HqdpHEHPsjMrjWz5Wa2vLGxcbSyQlJ9a4/aukM6blyu11EASdJVC6v03xfM0L0rduqr966WcxR8AAAAAwIevvaZzrlaMxsr6WEzWzd4o3POmdnr/nJzzt0i6RZJmj9/Pn/ZjaL1DW3ym2lyMROpI3781xunqLW7Tzc/sUnBgE9fu2SWzMzrWAAAAJ7zrNhzztVGbxvM7G5JCyTVm1mZc26XmZVJavAqH/bXfxXOdlUWZSmdIZyIM188f7p6+iL6/bNbFPT79KULZ1DwAQCAlOfJME4zyzaz3IF1SYskrZZ0n6T3R3d7v6R7vciH19vV0q32npCO4yqciENmpq9eMlPvO7VKv35qs258eL3XkQAAADznVc/eOEl3R795D0j6m3PuP2a2TNI/zewaSdskvcujfDjAhoZ2+X2mSSUM4UR8MjN989LjFYpE9LPHNirN79OnzpvmdSwAAADPeFLsOec2SzpxiPZmSeeNfiIcSiQ6kfrEoiylBxjCifjl85m+87Y56gs73fjwegX8po+/carXsQAAADzh5QVakCB27O5UR29Y00u5Cifin89n+t47T1BfOKLv/6daQb9PHz5rstexAAAARh3FHg5r7a42pQd8msRVOJEg/D7Tjy4/UaGw07f/vVZpfp/ef/pEr2MBAACMKoo9HFJPKKxNje2aWZangM/LaRmBIxPw+/STK+aqLxzR1+97VT6f6X2nVnkdCwAAYNTw1zsOaWNDu0IRp5llDOFE4knz+/Tz95ykN80cq6/es1q/e2aL15EAAABGDcUeDmntrjYVZKapNC/D6yjAUQkGfPrlVSfrwtml+tb9a3TzE5u8jgQAADAqKPZwUK1dfard26WZZXlMUI2EFgz49LMr5+nSE8fre/9Zp588sl7OOa9jAQAAxBTn7OGg1tW1SZJmcBVOJIGA36cfv3uuggGffvLIBvWGIvrC+dP5IgMAACQtij0MyTmntbtaVV6QqbzMNK/jACPC7zN9/50nKM3v0y+f2KSeUET/c/FMCj4AAJCUKPYwpLrWbu3t6tP8iWO8jgKMKJ/P9H9vn630gE+/e2aLekJh/e+ls+XzUfABAIDkQrGHIa3Z1aqAzzRtLEM4kXzMTF9/yyylB3z69VOb1RuK6LvvOEF+Cj4AAJBEKPbwOr2hiNbXtWva2BwFA1zDB8nJzPSlC2coPeDTTY9tVEdPWDe++0SlB/xeRwMAABgRFHt4nfX1beoNRzR7Qr7XUYCYMjNdt2i6cjPS9J0H1qq1u0+/eu/Jyk7noxEAACQ+um3wOqtqW1SUHVRZPnPrITlVVFbJzPYt175hipoe+Imeqq7X1I/8VP7MvP22H7hUVFZ5/SMAAAAcFl9fYz/B0qlqaOvRG48r4QqFSFo1O7brxsXVr2vf1NiuB1cHdOL1d+pt88YrN2PoK9Fet2h6rCMCAAAcM3r2sJ+cuRcq4DPNKOPCLEg9U0py9La549XeE9LtL9ZoT2ev15EAAACOGsUe9mnr7lP2zLN13LhcLlKB+Ge+Qw61PNRyKOVjsvTOkyYoFHa6fXmNGlq7R+kHAgAAGFkM48Q+96zYKV8wU3O4MAsSgYsMORRzOA43DHNsXoYun1+uu1+u1Z0v1eotJ5apfEzWUb0WAACAV+jZgyTJOae/Ldmu3vpNGpeX7nUcwHNjsoK6/ORy5aQHdM/LO7Whvs3rSAAAAEeEYg+SpJd37NXaXa1qW/EfLswCROVmpOmy+eUam5euB1bXacWOvV5HAgAAGDaKPUiS/vz8NmUH/epY84TXUYC4kpnm1zvmTdDk4mw9ub5Rz2xs8joSAADAsFDsQXUt3frXKzv1rlMq5Hq7vI4DxJ2A36eLTyjTnAn5enHbHhVdfJ16QxGvYwEAABwSxR70p+e3KuycPnj6JK+jAHHLZ6ZzppfotMlFypl9rq65dZnae0JexwIAADgoir0U19kb0l+XbNf5s0pVWcTVBoFDMTMtmFSopgd+quc2NeuKW55XY1uP17EAAACGRLGX4u58qVYtXX265ix69YDh6lj1sH5z9cna1NChd9z8rDY3tnsdCQAA4HUo9lJYJOL0+2e26MTyfM2vGuN1HCChnDtjnG679lR19IT1jpuf05LNzV5HAgAA2A/FXgp7vLpBW5o6dM1Zk5luATgKcysKdPfHT1dhdlDv/d0S3f1yjdeRAAAA9qHYS2G/fXqLyvIzdOHsUq+jAInFfDIzmZkmFufoya9corbNK/TZf7yigjOv2rdtqKWissrr9AAAIEUEvA4Ab7y6s0XPb27W9RfOUJqfmh84Ii6iGxdX79cUjjg9uq5ea898j0697KM6b+ZYBXyv/7d13aLpo5USAACkOIq9FPWLxzcqNz2gKxZUeh0FSAp+n+nNM8epICuo5zc1q7W7T5ecMF6ZaX6vowEAgBRFl04K2lDfpgdX1+n9p09Ufmaa13GApGFmWjCxUBccX6r61h79c9kO7ens9ToWAABIURR7Kejnj29UZppfHzqT6RaAWJhemqt3zJugnlBE/1y2Q9t3d3odCQAApCCKvRSzubFd/3plp953WpUKs4NexwGS1viCTL1rfrmy0wO65+VavbR9j5xzXscCAAAphHP2UswvHt+kYMCnj5w12esoQNIryArqXfMrtHhNnZ7e0KSGth5ZgC9ZAADA6KBnL4Vsb+7UPStq9Z4FVSrOSfc6DpASggGfLp5TptMmF6m6rk3jrvq+tjZ1eB0LAACkAIq9FPLLJzbK7zN99A306gGjycy0YFKhLj1xvAL543TJz57Rfa/s9DoWAABIcqNe7JlZhZk9bmZrzOxVM/t0tP0bZlZrZiuiy0WjnS2Z7djdqTtfqtEVp1RoXF6G13GAlDSpOFu7/vApTS/N1adue1nX37VSXb3hUXv9isqqQ074friFCeEBAEgsXpyzF5L0OefcS2aWK+lFM3s4uu3HzrkfepAp6f1wcbX8PtN/vXGK11GAlBZua9Tfrz1VP354vX75xCYt27pHP7z8RM2tKIj5a9fs2P66yeCPBBPCAwCQWEa9Z885t8s591J0vU3SWkkTRjtHKlld26J7V+zUh86YpLL8TK/jACkvze/TFy+YoT9fs0AdPSG945fP6oYH16m7b/R6+QAAQPLz9Jw9M5soaZ6kJdGmT5rZSjP7vZmNOchjrjWz5Wa2vLGxcbSiJiznnP7vgbUak5Wmj9GrB8SVs6aV6KHPnq13n1KhXz25SRff9LSWb93tdSwAAJAkPCv2zCxH0p2SPuOca5V0s6QpkuZK2iXpR0M9zjl3i3NuvnNufklJyWjFTVhPrm/Uc5ua9anzpikvI83rOAAOkJeRpu++4wT96UML1N0X0WW/el6fuu1l7dzb5XU0AACQ4Dwp9swsTf2F3l+dc3dJknOu3jkXds5FJP1G0gIvsiWTcMTphgfXqbIwS1ct5MIKQDw7+7gSPXzd2frUedP00Kt1OvdHT+jHD69XZ2/I62gAACBBeXE1TpP0O0lrnXM3DmovG7Tb2yWtHu1syebul2u1rq5NXzh/uoIBZtkA4l1WMKDr3nycHv3cG/SmmeP000c36IwbHtMvHt+otu4+r+MBAIAE40UFcIak90k694BpFr5vZqvMbKWkcyR91oNsSaOzN6QfLa7WieX5unhO2eEfACBulI/J0s/fc5Lu+vjpmltRoB88VK0zbnhMNy6uVkNrt9fxAABAghj1qRecc89IsiE2PTDaWZLZTx/doF0t3frZlfPk8w31dgOIdydVjtEfPrhAq2pa9LPHNuimxzbql09s0ptnjdN7FlbqjCnF/PsGAAAH5cU8e4ix6ro2/e7pLXrX/HLNn1jodRwAx2hOeb5uuXq+Nje267al23XHizV6cHWdKguz9JYTy3TxnPGaWZar/lHyAAAA/Sj2koxzTl+9Z7VyMgL60oUzvY4DYARNLsnRVy6epc8tmq6HXq3THS/W6FdPbtYvHt+kySXZunB2qc6bOU4nlhfIT48fAAApj2IvydzxYo2Wbt2t771zjgqzg17HARADGWl+vXXuBL117gQ1t/fooVfrdf/KnfsKv8LsoN44vUTnzRins44rZtoVAABSFMVeEtnT0avvPrhOJ1eN0eUnV3gdB8AoKMpJ13sWVuo9Cyu1t7NXT65v1OPrGvTYugbd9VKtAj7TKRMLdd7MsQqMGe91XAAAMIoo9pLIDQ+uU0tXn779ttlctAFIQQVZwX09fqFwRC/v2KvH1jXosbUN+va/12rCtbfoj89t1aTibE0qztaEgkyGewIAkMQo9pLEY+vq9Y/lO/TRN0zWzLI8r+MA8FjA79MpEwt1ysRC/fcFM1Szp1Mzz7tcVVd+XqtqW7Rix14F/T5VFmZpYnGWJhZlKzud/xIAAEgm/M+eBJrbe/TFO1ZpRmmurnvzcV7HARCHysdkqf3lB/S27/1YfeGIduzu1JamDm1p7tDGxnZJUkVhpmaW5mlKSY6CAS+mYQUAACOJYi/BOed0/V2r1NrVpz9fs0DpAb/XkQAcivmOaYqE8opK7di+7ZgipPl9mlySo8klOXLOqam9Vxsb2lVd36bFa+oV8DVoytgczRmfr/EFGUzpAABAgqLYS3C3L6/R4jX1+vJFMxi+CSQCF9GNi6uP+uHXLZo+gmEkM1NJbrpKctN16uRC7Wrp1tq6Vq2vb1d1XZuKsoOaMyFfM8pyR/R1AQBA7FHsJbDtzZ365r9e1amTC/XhMyd7HQfAaDjGnsFDPrWZxhdkanxBps6eVqLq+jatqmnRE+sb9eymJo055xrt3Nul8QWZMXl9AAAwsij2ElR3X1ifvO0l+cz0w8tP5OqbQKo4hp7BI+kVTPP7NHt8vmaPz1dda7dWbN+r3vmX6uzvP65LTxyva98wWTNKGU0AAEA84wz8BOSc01fuXq2VNS360btOVPmYLK8jAUhipXkZumB2qWp//RG977QqPbi6Thf85Gl94A9L9dymJjnnvI4IAACGQLGXgG59bqvufKlGnz5vmhYdX+p1HAApItzaoK+/5Xg9f/25+vyi47S6tkXv+c0SvfUXz+rfK3cpHKHoAwAgnlDsJZjnNzXrW/9eqzfNHKdPnzfN6zgAUlBBVlCfPHeanvnvc/V/b5+jtu6QPvG3l/SmG5/U35duV08o7HVEAAAgir2EUrOnU5/420uaWJSlH7+b8/QAeCsjza/3LKzUI9e9Qb+86iTlpAf0pbtW6azvPa5bntqk9p6Q1xEBAEhpFHsJoqm9R1f/bqn6whHdcvV85WakeR0JACRJfp/pojlluu+TZ+gv1yzUtHE5+r8H1un07z6qHy2uVnN7j9cRAQBISVyNMwG0dPXp6t8t1c6WLv3lmoWaUpLjdSQAqegIpn0Ilk5Tx6mX6aauXv30oVelbcv1n5u+pFnjuYInAACjhWIvznX1hvXhW5dpQ0ObfnP1fM2fWHjI/Ssqq1SzY/sopQOQUo5i2ofdHb16efserdTJuuimp7VgUqGuPq1Kb541TukBf4yCAgAAiWIvrvWEwvrYX17U8m179LMr5+mN08ce9jE1O7Yf9Rxc0pHNwwUAh1OYHdR5M8fpwS9eqF8tXqlbn9+qT/7tZeVnpumtc8frspPLNWdCfswmigcAIJVR7MWp9p6QPvrn5Xp2Y7O++445uuSE8V5HAoCjFunp0EfOnqwPnTlJz25s0h0v1ugfy3boT89v0+SSbJ1/fKkWzRqnE8sLYnLxqWMZ9VBeUakd27eNcCIAAGKPYi8ONbX36AN/WKq1u9r0w8tP1GUnl3sdCQBGhN9nOvu4Ep19XIlauvr075W79MCqXfrNU5t18xObNC4vXWdPK9HCyUVaOKlQFYVZI/K6xzLqgREPAIBERbEXZ7Y3d+qsr/1DLiNPTffcoMu/u9zrSAAQE/mZaXrPwkq9Z2GlWjr79Fh1vRa/Wq+H19br9hdrJEkTCjI1Z0K+ZpblaUZZrmaW5ml8QYYCfi4mDQDA4VDsxZElm5v1ib+9rEggQ1eeNlVlF/z1iJ+Db6ABJKL8rDS9fV653j6vXJGIU3V9m5ZsbtayrXu0ZlerHlpTJ+f69/X7TGX5GSofk6nx+ZnKz0pTQWZQ+ZmBfet5mWnKz0xTXmZAOen8VwcASE38DxgHnHP69VOb9YOHqlVZmKVX/vpFlV3yqNexAMATPp9pZlmeZpbl6QNnTJIkdfSEVF3fpvV1barZ06WaPZ2q2dOlJVt2q6Wr77ATuFd+4V7d/OQmBf0+BQO+/W7TAqZ0v1/BgE9ZQX90CSg73c+cpgCAhEax57GWzj597vZX9Mjael08p0w3vHOO8r5Q43UsAIgr2ekBnVQ5RidVjhlye184otauPrUcsLR2h9TRE9L1X/2G3vCuj6knHFZfyKk3HFFvKKL2npB6QxH1hiPqC0XkDnheM2n8R3+r9/52iSoKs1RVlKWqwixVFmVpUnG2soL8NwoAiF/8L+Whx6sb9D93r1Z9a7e+/pZZ+sDpE7n8OAAMYSTmEH3DV796yO0R59TdF1ZHT1idvSG194TU0tWnJ159Qm09M/Sf1bu0p7Nv3/5m0sSibM0ozdWM0tfOKSwfkxmTK4oCAHCkKPY80Nzeo2/dv0b3rNipKSXZuv1jp2neQb6tBgCMzhyiPjNlBQPR3rr0fe133H+j7vvXDyVJFsxSoKBUaWPKlFZUofaxk7S+ZKIeGFMms/6LxkR6OtXbtE19DVvUW79JBdapDUsfV0Yak8gDAEYXxd4oCoUjuvOlGt3w4Dq194T06fOm6ePnTFF6gD8AACBuuchhC82+cETN7b1qau/pX8YVqbHqePWGIpKkWV/7j6aU5GjW+DwdPz5Ps8ryNWt8ngqzg6PxEwAAUhTF3ihwzmnxmnr94KFqbWxo18lVY/Tdd8zRceNyvY4GAKPDfEk9TD3N71NpfoZK8zP2tTnn1Nod0vc//xF9++d/0JpdrVq6ZbfuXbFz3z5l+RmaFZ1WYtrYXE0dm6MpJTnKDPIlIADg2FHsxVAk4vTkhkb99JENWrFjryaXZOtX7z1J5x9fmtR/9ADA6wyjd+xQEnFaGTNTfmaaujY8v1/+3R29WrurVa/ubNGana16dWernljfqHDERR/XP7/g1LE5mjY2R1PH5mhiUbYqi7I0LjeD8wEBAMNGsRcD3X1h3fVSrX7/7BZtbGhXWX6GvvfOOXrnSeVMBAwAqWY4vZq+QP95gMWV/ecCFpVrc1GlHi8qlwVeG+oZDPhUPiaz/4qghVmqiC7j8zNVVpChouwgXyYCAPah2BthD67apa/cs1q7O3p1/Pg8/fjdJ+riOeMVDFDkAUBKOoZezevOn6mtja3avrtT25o7tWN3p7ZHl+Vb96jtgPkFgwGfyvIzVJafsa8ALMvP1PiB2/xM5WUGKAgBIEVQ7I2w8jFZal63TPVP36ZtO1brAa8DAQAS2sTinINu82XkKlAwTv7cEgXyiuXPLdaevBJV5xb3388pkvn2P/8v0tulcFuTQq2N/bdtTQq3NSvc1qxQe/9tpKtV5RWV2rF9W6x/PABADFHsjbA55fna/revpNy5KQCAGDjWcx3Pn6lv3rtS7T0htXWH1N4dUltPSG3dxWrvmaq27pA6e8Ove5zfTD17dumym5/TuPwMleb1L4PXx+alM50EAMQ5ij0AAJKViyg3I025GWkqyx96l3DE7ZtEvr0npI6esNp7Qnpy9WMKnDRLa3a26rG1Derqe31RmJcRUFFOugqzgyrMDqooeluYHVRRTlCF2en7tVEcAsDoirtiz8wukPRTSX5Jv3XO3eBxJAAAkpbfZ/sKwsHuuv9H+nt0MvmBaSTqW7tV19KtutZu1bd0q6m9R80dvdrd0asduzu1Ysde7enoVSh6ZdEDuVCfIr2divR0KtLTIdfbpUhPhyI9nXL72juVn5Wun/zwBmWm+ZUZ9Cszza+sYECZQZ8ygwFlRdvTA8k9pceRqqisUs2O7Uf1WIbtIlWk2r+TuCr2zMwv6ReS3iypRtIyM7vPObfG22QAAKSugWkk8jPTDjtHrHNOrV0hFZdP1Gd+/YC6+sLq6g2rqy+snlBEvaGIesPR21BEPaHwa+vhiFy0Trzun68MI5eihaBfGdHb1xeI/tftk5HWXyimp/mUHoiuB/zR+4Pa0nxK8/vkN5PPZ/L7LLou+cz2tR/sfQhHnEKR124j+92PKBR26gv3vx99A+vR96cvNERbdOm/37+tL/Ta4ztnvUUXf/7dwzyo/cN1fdGf4bHbbtZPHlmvNL9PAZ8p4PcpzW8K+HwK+G3f+v5tA/tGn8f636OBW//A++R77XUG3j9/tN3MNFCvD7yTAwX8a/cHttt+93WY7Yd9Pr4oSEk1O7YPa3i8c05h52Tq/x01SZ87f0bsA46wuCr2JC2QtNE5t1mSzOzvkt4qiWIPAIDRNJwpIw5jfEHmEe3vXH8x9OXLz5QFgrK0dFlahnxp6QesZ8gC6bJghloC0fvRfXxpGfIFM6RA+muPC7z22FgYKALNJOekcLTQizUX6pML98lFQlIkrMypC7W5qeN1+x14FJ36c0aciy5S3qmX6yePbIh55njlXEQKh+XCva+9rwO34T65UK8UPqA91KesjKA+9P73KRjo/6Ig6PcpGOhfAn6fgtGiOC1aPL+2vv/9YCBaTAf6C2gz7SugfdZfyJpP++77bKAAOeD+KBSwLvqNjBv0K+4O3Kb+3699X3iEo194uMH3IwpH+guqUPjAL0eiX3aEIvt9IRIa+MJj0JcdfeGI+iKD93X7vhgJhd1+X5T0hZxK3/dD3bZ0+77Xjuy77R/WHokWeW6If8JV/32/uvvCCTUkPd6KvQmSdgy6XyNpoUdZAABIXcd6cZijuNiYWX8PUri9+Zhfe6jHO+fUF37tj8kDe97CEadff+Va3XH3veoJhdXTF1FPKLLvj9b+22iRdOAfik6KRJzMTIFoL2DAZ/L7B+779m+PLsHAwB/7Pl10/iJ94vu39veEDer9GugRG9xr5hviD/vrFk3X5452mo9F0xUOR9QX7XEMhd2+9b5wRKHIwB/ar/0hHhpojzhdcOFF+vC3fy3nFF363xMnt6+wHCgyndu/7d5ffVc33nhj9BhFj5XcAfd10O1f/vKXddGHrtt/R+2/3wE3g7ZLi//ycy266pP7ivRwZP/fkf0W99rvSjjitLepUXe9VKPecP/vylAFwmgaKBJN/bf7Kv0D3jdpqPd20LYD9ol3wYHiOfrvKc03aP2AojvS06ns9IB8pkG99YN6pAf1PpvPpEG/xw/95ecK+C70+sc9Iubi6Cia2WWSLnDOfTh6/32SFjrnPjlon2slXRu9O11Ss6Sm0c6KI1YsjlMi4DjFP45RYuA4JQaOU/zjGCUGjpO3qpxzJUNtiLeevVpJFYPul0fb9nHO3SLploH7ZrbcOTd/dOLhaHGcEgPHKf5xjBIDxykxcJziH8coMXCc4pfP6wAHWCZpmplNMrOgpCsk3edxJgAAAABIOHHVs+ecC5nZJyU9pP6pF37vnHvV41gAAAAAkHDiqtiTJOfcA5IeOIKH3HL4XRAHOE6JgeMU/zhGiYHjlBg4TvGPY5QYOE5xKq4u0AIAAAAAGBnxds4eAAAAAGAEJHSxZ2YXmFm1mW00sy95nScZmdnvzazBzFYPais0s4fNbEP0dky03czspujxWGlmJw16zPuj+28ws/cPaj/ZzFZFH3OTRScNOthrYGhmVmFmj5vZGjN71cw+HW3nWMUJM8sws6Vm9kr0GH0z2j7JzJZE39d/RC9OJTNLj97fGN0+cdBzXR9trzaz8we1D/mZeLDXwMGZmd/MXjaz+6P3OU5xxsy2Rj+TVpjZ8mgbn3lxxMwKzOwOM1tnZmvN7DSOUXwxs+nRf0MDS6uZfYbjlET6J7ZMvEX9F3DZJGmypKCkVyTN8jpXsi2SzpZ0kqTVg9q+L+lL0fUvSfpedP0iSQ+qfwrPUyUtibYXStocvR0TXR8T3bY0uq9FH3vhoV6D5aDHqUzSSdH1XEnrJc3iWMXPEn3fcqLraZKWRN/Pf0q6Itr+K0n/FV3/uKRfRdevkPSP6Pqs6OdduqRJ0c9B/6E+Ew/2GiyHPF7XSfqbpPsP9R5ynDw9RlslFR/QxmdeHC2SbpX04eh6UFIBxyh+l+jnU52kKo5T8iyeBzjq4NJpkh4adP96Sdd7nSsZF0kTtX+xVy2pLLpeJqk6uv5rSVceuJ+kKyX9elD7r6NtZZLWDWrft9/BXoNl2MfsXklv5ljF5yIpS9JLkhaqfxLaQLR93+ea+q9KfFp0PRDdzw78rBvY72CfidHHDPkaLAc9PuWSHpV0rqT7D/Uecpw8PU5b9fpij8+8OFkk5Uvaouj1IThG8b9IWiTpWY5Tci2JPIxzgqQdg+7XRNsQe+Occ7ui63WSxkXXD3ZMDtVeM0T7oV4DhxEdRjZP/T1HHKs4Eh0auEJSg6SH1d/Ds9c5F4ruMvh93XcsottbJBXpyI9d0SFeA0P7iaQvSopE7x/qPeQ4ecdJWmxmL5rZtdE2PvPixyRJjZL+YP1Don9rZtniGMWzKyTdFl3nOCWJRC72EAdc/9cxLtFfI1mYWY6kOyV9xjnXOngbx8p7zrmwc26u+nuOFkia4W0iHMjMLpHU4Jx70essOKwznXMnSbpQ0ifM7OzBG/nM81xA/aeB3OycmyepQ/1D9fbhGMWP6DnCl0q6/cBtHKfElsjFXq2kikH3y6NtiL16MyuTpOhtQ7T9YMfkUO3lQ7Qf6jVwEGaWpv5C76/OubuizRyrOOSc2yvpcfUP1Ssws4E5Twe/r/uORXR7vqRmHfmxaz7Ea+D1zpB0qZltlfR39Q/l/Kk4TnHHOVcbvW2QdLf6v0DhMy9+1Eiqcc4tid6/Q/3FH8coPl0o6SXnXH30PscpSSRysbdM0jTrv3pZUP1dz/d5nClV3Cfp/dH196v//LCB9qujV2o6VVJLtHv+IUmLzGxM9EpLi9R/LsouSa1mdmr0ykxXH/BcQ70GhhB9/34naa1z7sZBmzhWccLMSsysILqeqf5zKteqv+i7LLrbgcdo4H29TNJj0W8+75N0hfVfBXKSpGnqP/l9yM/E6GMO9ho4gHPueudcuXNuovrfw8ecc1eJ4xRXzCzbzHIH1tX/WbVafObFDedcnaQdZjY92nSepDXiGMWrK/XaEE6J45Q8vD5p8FgW9V8RaL36z3v5itd5knFR/z/8XZL61P8t3TXqP7fkUUkbJD0iqTC6r0n6RfR4rJI0f9DzfEjSxujywUHt89X/H/QmST9X9ETug70Gy0GP05nqH/6wUtKK6HIRxyp+FkknSHo5eoxWS/patH2y+ouAjeofPpMebc+I3t8Y3T550HN9JXocqhW9qlm0fcjPxIO9Bsthj9kb9drVODlOcbRE36tXosurA+8jn3nxtUiaK2l59HPvHvVfpZFjFGeLpGz1jy7IH9TGcUqSZeDNBgAAAAAkkUQexgkAAAAAOAiKPQAAAABIQhR7AAAAAJCEKPYAAAAAIAlR7AEAAABAEqLYAwCkDDP7ipm9amYrzWyFmS08xL5/NLPLDrZ90D5bos/1kpmddpD9/tfM3nSs+QEAOBIBrwMAADAaooXYJZJOcs71mFmxpOAIPPUXnHN3mNkiSb9W/5yKg1/X75z72gi8DgAAR4SePQBAqiiT1OSc65Ek51yTc26nmX3NzJaZ2Wozu8XM7MAHmtnJZvakmb1oZg+ZWdkQz/+UpKnR/bea2ffM7CVJlw/uJTSzU8zsOTN7xcyWmlmumfnN7AfRHCvN7KOxexsAAKmCYg8AkCoWS6ows/Vm9ksze0O0/efOuVOcc7MlZaq/928fM0uT9DNJlznnTpb0e0nfGeL53yJp1aD7zc65k5xzfx/0XEFJ/5D0aefciZLeJKlL0jWSWpxzp0g6RdJHzGzSCPzMAIAUxjBOAEBKcM61m9nJks6SdI6kf5jZlyS1mdkXJWVJKpT0qqR/DXrodEmzJT0c7fTzS9o1aPsPzOx/JDWqv2gb8I8hYkyXtMs5tyyaqVWSokNATxh0jmC+pGmSthz9TwwASHUUewCAlOGcC0t6QtITZrZK0kfVf47dfOfcDjP7hqSMAx5mkl51zg158RVFz9kbor3jCKKZpP/nnHvoCB4DAMAhMYwTAJASzGy6mU0b1DRXUnV0vcnMciQNdfXNakklA1faNLM0Mzv+KGNUSyozs1Oiz5VrZgFJD0n6r+iQUZnZcWaWfZSvAQCAJHr2AACpI0fSz8ysQFJI0kZJ10raK2m1pDpJyw58kHOuNzq88iYzy1f//50/Uf9wzyMSfa53R3Nkqv98vTdJ+q2kiZJeil4gplHS2470+QEAGMycc15nAAAAAACMMIZxAgAAAEASotgDAAAAgCREsQcAAAAASYhiDwAAAACSEMUeAAAAACQhij0AAAAASEIUewAAAACQhCj2AAAAACAJ/X+D413KL5f41wAAAABJRU5ErkJggg==",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.figure(figsize=(15,6))\n",
+ "sns.histplot(df_trains['SalePrice'], kde=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Aa we can see, it seems to be a poisson distribution.\\\n",
+ "So next step would be check p-values in order to know if attributes are relevant against SalePrice"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 426,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['MSSubClass', 'MSZoning', 'LotArea', 'Street', 'LotShape',\n",
+ " 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood',\n",
+ " 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'OverallQual',\n",
+ " 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl',\n",
+ " 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea', 'ExterQual',\n",
+ " 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure',\n",
+ " 'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF',\n",
+ " 'TotalBsmtSF', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical',\n",
+ " '_1stFlrSF', '_2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath',\n",
+ " 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr',\n",
+ " 'KitchenQual', 'TotRmsAbvGrd', 'Functional', 'Fireplaces',\n",
+ " 'FireplaceQu', 'GarageType', 'GarageYrBlt', 'GarageFinish',\n",
+ " 'GarageCars', 'GarageArea', 'GarageQual', 'GarageCond', 'PavedDrive',\n",
+ " 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '_3SsnPorch',\n",
+ " 'ScreenPorch', 'PoolArea', 'PoolQC', 'MiscVal', 'MoSold', 'YrSold',\n",
+ " 'SaleType', 'SaleCondition', 'SalePrice'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 426,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_trains.columns\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 427,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sig_col_prob = {}\n",
+ "df_categ = []\n",
+ "string = 'SalePrice ~'\n",
+ "for i in df_trains.columns:\n",
+ " string_model = ''\n",
+ " if df_trains[i].dtypes == 'object':\n",
+ " model = ols(f'SalePrice ~ C({i})', data = df_trains).fit()\n",
+ " df_categ.append(i)\n",
+ " string_model = f' C({i}) +'\n",
+ " else:\n",
+ " model = ols(f'SalePrice ~ {i}', data = df_trains).fit()\n",
+ " string_model = ' '+ i + ' +'\n",
+ " if model.f_pvalue < 0.05 and i != 'SalePrice':\n",
+ " sig_col_prob[i] = model.f_pvalue\n",
+ " string = string + string_model\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As we can see in sig_col_prob variable, most of the data are significat relevants for SalePrice\n",
+ "Sales price column should be deleted from them since p value is 0 and is the value to compare with other columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 428,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'MSSubClass': 0.0019668997898389627,\n",
+ " 'MSZoning': 1.5774716114609725e-35,\n",
+ " 'LotArea': 4.188494058573459e-24,\n",
+ " 'LotShape': 1.2291837166364689e-23,\n",
+ " 'LandContour': 1.7873973312019462e-08,\n",
+ " 'LotConfig': 2.3104813531408423e-06,\n",
+ " 'Neighborhood': 7.944897112489736e-215,\n",
+ " 'Condition1': 1.6009219573634665e-07,\n",
+ " 'Condition2': 0.03953280209407048,\n",
+ " 'BldgType': 4.308280969293859e-08,\n",
+ " 'HouseStyle': 1.0205088692647994e-24,\n",
+ " 'OverallQual': 7.0904967242108365e-298,\n",
+ " 'OverallCond': 0.0004263317175746006,\n",
+ " 'YearBuilt': 4.090600212307172e-98,\n",
+ " 'YearRemodAdd': 2.4778743399914053e-90,\n",
+ " 'RoofStyle': 1.1401534170341191e-18,\n",
+ " 'RoofMatl': 8.1385685613492e-08,\n",
+ " 'Exterior1st': 2.089890352267967e-42,\n",
+ " 'Exterior2nd': 7.891304017791327e-41,\n",
+ " 'MasVnrType': 2.9479832665389148e-61,\n",
+ " 'MasVnrArea': 3.019275242084859e-80,\n",
+ " 'ExterQual': 2.7034189043674756e-192,\n",
+ " 'ExterCond': 4.180159538400305e-06,\n",
+ " 'Foundation': 4.157463082303885e-85,\n",
+ " 'BsmtQual': 3.2687659316175138e-183,\n",
+ " 'BsmtCond': 1.692626862971258e-08,\n",
+ " 'BsmtExposure': 5.724344667880617e-41,\n",
+ " 'BsmtFinType1': 1.929029875513618e-62,\n",
+ " 'BsmtFinSF1': 1.0213727645941967e-46,\n",
+ " 'BsmtFinType2': 0.021048762692804365,\n",
+ " 'BsmtUnfSF': 7.932120134626303e-13,\n",
+ " 'TotalBsmtSF': 2.2083254456346077e-145,\n",
+ " 'Heating': 0.002473827878914888,\n",
+ " 'HeatingQC': 2.0295271499167507e-62,\n",
+ " 'CentralAir': 3.3396741377559114e-18,\n",
+ " 'Electrical': 9.426298309197317e-16,\n",
+ " '_1stFlrSF': 1.4979097874823512e-147,\n",
+ " '_2ndFlrSF': 1.5058392150966874e-34,\n",
+ " 'GrLivArea': 4.4019696964122803e-218,\n",
+ " 'BsmtFullBath': 2.3232454159882026e-15,\n",
+ " 'FullBath': 1.9532215024769195e-125,\n",
+ " 'HalfBath': 4.7812069711283865e-25,\n",
+ " 'BedroomAbvGr': 1.2055786109175643e-10,\n",
+ " 'KitchenAbvGr': 6.803353238903545e-05,\n",
+ " 'KitchenQual': 7.207797634752796e-182,\n",
+ " 'TotRmsAbvGrd': 2.2183533266083552e-110,\n",
+ " 'Functional': 0.0012342972262940743,\n",
+ " 'Fireplaces': 5.785107942885739e-76,\n",
+ " 'FireplaceQu': 7.90173630716701e-103,\n",
+ " 'GarageType': 1.0658261450802592e-80,\n",
+ " 'GarageYrBlt': 4.1061923281445587e-91,\n",
+ " 'GarageFinish': 9.900255473029922e-108,\n",
+ " 'GarageCars': 6.995346991827569e-165,\n",
+ " 'GarageArea': 1.617669668252519e-151,\n",
+ " 'GarageQual': 1.870218396971908e-23,\n",
+ " 'GarageCond': 1.1222351417909253e-22,\n",
+ " 'PavedDrive': 2.8134604962488835e-16,\n",
+ " 'WoodDeckSF': 1.963043699407181e-34,\n",
+ " 'OpenPorchSF': 8.470965811353925e-32,\n",
+ " 'EnclosedPorch': 3.9911095702792166e-07,\n",
+ " 'ScreenPorch': 4.604602126747757e-05,\n",
+ " 'PoolArea': 0.0004801124771611024,\n",
+ " 'PoolQC': 8.554588306049216e-07,\n",
+ " 'SaleType': 7.253528527759462e-40,\n",
+ " 'SaleCondition': 2.2710946808736094e-41}"
+ ]
+ },
+ "execution_count": 428,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sig_col_prob"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Will take all those variables in our matrix\n",
+ "so next step will be OLS method"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 429,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " OLS Regression Results \n",
+ "==============================================================================\n",
+ "Dep. Variable: SalePrice R-squared: 0.929\n",
+ "Model: OLS Adj. R-squared: 0.916\n",
+ "Method: Least Squares F-statistic: 69.26\n",
+ "Date: Wed, 09 Feb 2022 Prob (F-statistic): 0.00\n",
+ "Time: 22:07:50 Log-Likelihood: -16073.\n",
+ "No. Observations: 1413 AIC: 3.260e+04\n",
+ "Df Residuals: 1187 BIC: 3.378e+04\n",
+ "Df Model: 225 \n",
+ "Covariance Type: nonrobust \n",
+ "===============================================================================================\n",
+ " coef std err t P>|t| [0.025 0.975]\n",
+ "-----------------------------------------------------------------------------------------------\n",
+ "Intercept -9.814e+05 1.4e+05 -7.024 0.000 -1.26e+06 -7.07e+05\n",
+ "C(MSZoning)[T.FV] 3.742e+04 1.21e+04 3.091 0.002 1.37e+04 6.12e+04\n",
+ "C(MSZoning)[T.RH] 2.819e+04 1.2e+04 2.343 0.019 4579.932 5.18e+04\n",
+ "C(MSZoning)[T.RL] 3.004e+04 1.03e+04 2.915 0.004 9820.460 5.03e+04\n",
+ "C(MSZoning)[T.RM] 2.706e+04 9588.138 2.822 0.005 8248.474 4.59e+04\n",
+ "C(LotShape)[T.1] -2200.1829 9540.418 -0.231 0.818 -2.09e+04 1.65e+04\n",
+ "C(LotShape)[T.2] -6849.3731 8994.745 -0.761 0.447 -2.45e+04 1.08e+04\n",
+ "C(LotShape)[T.3] -5321.9323 9049.707 -0.588 0.557 -2.31e+04 1.24e+04\n",
+ "C(LandContour)[T.HLS] 8875.6057 5224.029 1.699 0.090 -1373.755 1.91e+04\n",
+ "C(LandContour)[T.Low] -7143.7490 6537.156 -1.093 0.275 -2e+04 5681.919\n",
+ "C(LandContour)[T.Lvl] 4574.3476 3773.285 1.212 0.226 -2828.703 1.2e+04\n",
+ "C(LotConfig)[T.CulDSac] 7984.4736 3255.886 2.452 0.014 1596.541 1.44e+04\n",
+ "C(LotConfig)[T.FR2] -7881.9487 4079.475 -1.932 0.054 -1.59e+04 121.837\n",
+ "C(LotConfig)[T.FR3] -1.478e+04 1.27e+04 -1.160 0.246 -3.98e+04 1.02e+04\n",
+ "C(LotConfig)[T.Inside] -1379.0496 1797.435 -0.767 0.443 -4905.554 2147.455\n",
+ "C(Neighborhood)[T.Blueste] 987.2761 1.92e+04 0.051 0.959 -3.68e+04 3.87e+04\n",
+ "C(Neighborhood)[T.BrDale] -373.0447 1.11e+04 -0.034 0.973 -2.22e+04 2.15e+04\n",
+ "C(Neighborhood)[T.BrkSide] -1831.8944 9586.521 -0.191 0.848 -2.06e+04 1.7e+04\n",
+ "C(Neighborhood)[T.ClearCr] -1.271e+04 9462.242 -1.344 0.179 -3.13e+04 5850.691\n",
+ "C(Neighborhood)[T.CollgCr] -8437.4333 7373.875 -1.144 0.253 -2.29e+04 6029.848\n",
+ "C(Neighborhood)[T.Crawfor] 1.623e+04 8698.840 1.865 0.062 -840.438 3.33e+04\n",
+ "C(Neighborhood)[T.Edwards] -1.785e+04 8163.026 -2.187 0.029 -3.39e+04 -1839.270\n",
+ "C(Neighborhood)[T.Gilbert] -8477.6664 7793.449 -1.088 0.277 -2.38e+04 6812.804\n",
+ "C(Neighborhood)[T.IDOTRR] -7822.0452 1.09e+04 -0.717 0.474 -2.92e+04 1.36e+04\n",
+ "C(Neighborhood)[T.MeadowV] -7152.3107 1.13e+04 -0.630 0.529 -2.94e+04 1.51e+04\n",
+ "C(Neighborhood)[T.Mitchel] -1.764e+04 8330.643 -2.118 0.034 -3.4e+04 -1297.379\n",
+ "C(Neighborhood)[T.NAmes] -1.539e+04 7926.908 -1.942 0.052 -3.09e+04 160.870\n",
+ "C(Neighborhood)[T.NPkVill] 1.235e+04 1.42e+04 0.867 0.386 -1.56e+04 4.03e+04\n",
+ "C(Neighborhood)[T.NWAmes] -1.602e+04 8125.454 -1.971 0.049 -3.2e+04 -76.225\n",
+ "C(Neighborhood)[T.NoRidge] 2.732e+04 8551.259 3.195 0.001 1.05e+04 4.41e+04\n",
+ "C(Neighborhood)[T.NridgHt] 1.867e+04 7622.734 2.449 0.014 3715.333 3.36e+04\n",
+ "C(Neighborhood)[T.OldTown] -1.15e+04 9770.899 -1.177 0.239 -3.07e+04 7669.256\n",
+ "C(Neighborhood)[T.SWISU] -5686.7763 9821.168 -0.579 0.563 -2.5e+04 1.36e+04\n",
+ "C(Neighborhood)[T.Sawyer] -8397.8147 8243.455 -1.019 0.309 -2.46e+04 7775.551\n",
+ "C(Neighborhood)[T.SawyerW] -2214.8204 7952.487 -0.279 0.781 -1.78e+04 1.34e+04\n",
+ "C(Neighborhood)[T.Somerst] 124.6180 9139.365 0.014 0.989 -1.78e+04 1.81e+04\n",
+ "C(Neighborhood)[T.StoneBr] 3.894e+04 8420.487 4.624 0.000 2.24e+04 5.55e+04\n",
+ "C(Neighborhood)[T.Timber] -1.205e+04 8255.331 -1.460 0.145 -2.82e+04 4144.479\n",
+ "C(Neighborhood)[T.Veenker] 2821.7828 1.06e+04 0.266 0.791 -1.8e+04 2.37e+04\n",
+ "C(Condition1)[T.Feedr] 6002.4176 5165.818 1.162 0.245 -4132.734 1.61e+04\n",
+ "C(Condition1)[T.Norm] 1.459e+04 4292.563 3.399 0.001 6166.996 2.3e+04\n",
+ "C(Condition1)[T.PosA] 9998.1984 1.01e+04 0.992 0.321 -9776.357 2.98e+04\n",
+ "C(Condition1)[T.PosN] 1.158e+04 7515.926 1.540 0.124 -3168.350 2.63e+04\n",
+ "C(Condition1)[T.RRAe] -1.358e+04 9602.918 -1.414 0.158 -3.24e+04 5263.447\n",
+ "C(Condition1)[T.RRAn] 1.253e+04 7022.796 1.784 0.075 -1248.493 2.63e+04\n",
+ "C(Condition1)[T.RRNe] 27.4000 1.76e+04 0.002 0.999 -3.46e+04 3.46e+04\n",
+ "C(Condition1)[T.RRNn] 3973.1493 1.29e+04 0.308 0.758 -2.14e+04 2.93e+04\n",
+ "C(Condition2)[T.Feedr] -4028.4922 2.42e+04 -0.167 0.868 -5.14e+04 4.34e+04\n",
+ "C(Condition2)[T.Norm] -5459.8097 2.1e+04 -0.260 0.795 -4.67e+04 3.58e+04\n",
+ "C(Condition2)[T.PosA] 4.422e+04 3.79e+04 1.168 0.243 -3.01e+04 1.19e+05\n",
+ "C(Condition2)[T.PosN] -2.309e+05 2.83e+04 -8.161 0.000 -2.86e+05 -1.75e+05\n",
+ "C(Condition2)[T.RRAe] -7.997e+04 4.42e+04 -1.810 0.071 -1.67e+05 6714.071\n",
+ "C(Condition2)[T.RRAn] -1.404e+04 3.23e+04 -0.435 0.664 -7.74e+04 4.93e+04\n",
+ "C(Condition2)[T.RRNn] 2049.8201 2.78e+04 0.074 0.941 -5.25e+04 5.66e+04\n",
+ "C(BldgType)[T.2fmCon] -2378.0050 1.26e+04 -0.189 0.850 -2.7e+04 2.22e+04\n",
+ "C(BldgType)[T.Duplex] -1.361e+04 7514.510 -1.811 0.070 -2.84e+04 1134.801\n",
+ "C(BldgType)[T.Twnhs] -1.957e+04 9991.219 -1.959 0.050 -3.92e+04 33.361\n",
+ "C(BldgType)[T.TwnhsE] -1.428e+04 9019.408 -1.583 0.114 -3.2e+04 3418.204\n",
+ "C(HouseStyle)[T.1.5Unf] 1.279e+04 8094.930 1.579 0.115 -3096.539 2.87e+04\n",
+ "C(HouseStyle)[T.1Story] 6220.4270 4537.597 1.371 0.171 -2682.178 1.51e+04\n",
+ "C(HouseStyle)[T.2.5Fin] -1.525e+04 1.26e+04 -1.215 0.225 -3.99e+04 9378.694\n",
+ "C(HouseStyle)[T.2.5Unf] -6876.6639 9358.825 -0.735 0.463 -2.52e+04 1.15e+04\n",
+ "C(HouseStyle)[T.2Story] -5046.3751 3612.626 -1.397 0.163 -1.21e+04 2041.470\n",
+ "C(HouseStyle)[T.SFoyer] 3522.7167 6630.304 0.531 0.595 -9485.705 1.65e+04\n",
+ "C(HouseStyle)[T.SLvl] 3883.3669 5674.855 0.684 0.494 -7250.498 1.5e+04\n",
+ "C(RoofStyle)[T.Gable] 1.198e+04 1.87e+04 0.639 0.523 -2.48e+04 4.88e+04\n",
+ "C(RoofStyle)[T.Gambrel] 1.364e+04 2.05e+04 0.666 0.505 -2.65e+04 5.38e+04\n",
+ "C(RoofStyle)[T.Hip] 1.261e+04 1.88e+04 0.671 0.503 -2.43e+04 4.95e+04\n",
+ "C(RoofStyle)[T.Mansard] 2.453e+04 2.18e+04 1.125 0.261 -1.82e+04 6.73e+04\n",
+ "C(RoofStyle)[T.Shed] 6.269e+04 3.4e+04 1.844 0.065 -4010.079 1.29e+05\n",
+ "C(RoofMatl)[T.CompShg] 5.66e+05 4.7e+04 12.045 0.000 4.74e+05 6.58e+05\n",
+ "C(RoofMatl)[T.Membran] 6.275e+05 5.77e+04 10.878 0.000 5.14e+05 7.41e+05\n",
+ "C(RoofMatl)[T.Metal] 5.943e+05 5.65e+04 10.513 0.000 4.83e+05 7.05e+05\n",
+ "C(RoofMatl)[T.Roll] 5.577e+05 5.37e+04 10.386 0.000 4.52e+05 6.63e+05\n",
+ "C(RoofMatl)[T.Tar&Grv] 5.637e+05 5.11e+04 11.033 0.000 4.63e+05 6.64e+05\n",
+ "C(RoofMatl)[T.WdShake] 5.568e+05 4.98e+04 11.183 0.000 4.59e+05 6.54e+05\n",
+ "C(RoofMatl)[T.WdShngl] 6.233e+05 4.79e+04 13.005 0.000 5.29e+05 7.17e+05\n",
+ "C(Exterior1st)[T.BrkComm] -3.659e+04 3.37e+04 -1.085 0.278 -1.03e+05 2.96e+04\n",
+ "C(Exterior1st)[T.BrkFace] 5662.9980 1.36e+04 0.417 0.677 -2.1e+04 3.23e+04\n",
+ "C(Exterior1st)[T.CBlock] -5610.5903 1.38e+04 -0.407 0.684 -3.27e+04 2.14e+04\n",
+ "C(Exterior1st)[T.CemntBd] -8681.0563 1.95e+04 -0.444 0.657 -4.7e+04 2.97e+04\n",
+ "C(Exterior1st)[T.HdBoard] -1.615e+04 1.37e+04 -1.176 0.240 -4.31e+04 1.08e+04\n",
+ "C(Exterior1st)[T.ImStucc] -3.599e+04 2.86e+04 -1.257 0.209 -9.21e+04 2.02e+04\n",
+ "C(Exterior1st)[T.MetalSd] -9428.2118 1.53e+04 -0.616 0.538 -3.94e+04 2.06e+04\n",
+ "C(Exterior1st)[T.Plywood] -1.742e+04 1.36e+04 -1.281 0.200 -4.41e+04 9254.197\n",
+ "C(Exterior1st)[T.Stone] -762.7865 2.69e+04 -0.028 0.977 -5.35e+04 5.2e+04\n",
+ "C(Exterior1st)[T.Stucco] -1.409e+04 1.5e+04 -0.942 0.347 -4.34e+04 1.53e+04\n",
+ "C(Exterior1st)[T.VinylSd] -1.647e+04 1.39e+04 -1.187 0.236 -4.37e+04 1.08e+04\n",
+ "C(Exterior1st)[T.Wd Sdng] -1.645e+04 1.32e+04 -1.248 0.212 -4.23e+04 9419.619\n",
+ "C(Exterior1st)[T.WdShing] -1.445e+04 1.42e+04 -1.020 0.308 -4.23e+04 1.34e+04\n",
+ "C(Exterior2nd)[T.AsphShn] 1.431e+04 2.28e+04 0.629 0.530 -3.03e+04 5.9e+04\n",
+ "C(Exterior2nd)[T.Brk Cmn] 7890.5080 2.08e+04 0.379 0.705 -3.29e+04 4.87e+04\n",
+ "C(Exterior2nd)[T.BrkFace] 6170.9966 1.41e+04 0.438 0.662 -2.15e+04 3.38e+04\n",
+ "C(Exterior2nd)[T.CBlock] -5610.5903 1.38e+04 -0.407 0.684 -3.27e+04 2.14e+04\n",
+ "C(Exterior2nd)[T.CmentBd] 9177.9617 1.93e+04 0.476 0.634 -2.87e+04 4.7e+04\n",
+ "C(Exterior2nd)[T.HdBoard] 1.073e+04 1.33e+04 0.809 0.419 -1.53e+04 3.68e+04\n",
+ "C(Exterior2nd)[T.ImStucc] 2.219e+04 1.51e+04 1.470 0.142 -7436.990 5.18e+04\n",
+ "C(Exterior2nd)[T.MetalSd] 8170.2654 1.49e+04 0.547 0.585 -2.11e+04 3.75e+04\n",
+ "C(Exterior2nd)[T.Other] -1.516e+04 2.77e+04 -0.547 0.585 -6.96e+04 3.92e+04\n",
+ "C(Exterior2nd)[T.Plywood] 8968.0944 1.29e+04 0.693 0.488 -1.64e+04 3.43e+04\n",
+ "C(Exterior2nd)[T.Stone] -1.226e+04 2.44e+04 -0.503 0.615 -6.01e+04 3.55e+04\n",
+ "C(Exterior2nd)[T.Stucco] 1.119e+04 1.44e+04 0.780 0.436 -1.7e+04 3.94e+04\n",
+ "C(Exterior2nd)[T.VinylSd] 1.455e+04 1.34e+04 1.088 0.277 -1.17e+04 4.08e+04\n",
+ "C(Exterior2nd)[T.Wd Sdng] 1.384e+04 1.28e+04 1.084 0.279 -1.12e+04 3.89e+04\n",
+ "C(Exterior2nd)[T.Wd Shng] 8768.5281 1.33e+04 0.662 0.508 -1.72e+04 3.48e+04\n",
+ "C(MasVnrType)[T.BrkFace] 6986.6411 6911.529 1.011 0.312 -6573.534 2.05e+04\n",
+ "C(MasVnrType)[T.None] 1.03e+04 6974.031 1.477 0.140 -3384.859 2.4e+04\n",
+ "C(MasVnrType)[T.Stone] 1.17e+04 7297.153 1.603 0.109 -2617.682 2.6e+04\n",
+ "C(ExterQual)[T.2] -5701.6040 9876.582 -0.577 0.564 -2.51e+04 1.37e+04\n",
+ "C(ExterQual)[T.3] -5251.7123 1.01e+04 -0.518 0.605 -2.52e+04 1.47e+04\n",
+ "C(ExterQual)[T.4] 1.422e+04 1.12e+04 1.266 0.206 -7807.374 3.62e+04\n",
+ "C(ExterCond)[T.1] -2532.1160 2.7e+04 -0.094 0.925 -5.54e+04 5.04e+04\n",
+ "C(ExterCond)[T.2] -7816.7501 2.65e+04 -0.294 0.768 -5.99e+04 4.43e+04\n",
+ "C(ExterCond)[T.3] -1.11e+04 2.67e+04 -0.416 0.677 -6.34e+04 4.12e+04\n",
+ "C(ExterCond)[T.4] -3062.6573 3.19e+04 -0.096 0.924 -6.57e+04 5.96e+04\n",
+ "C(Foundation)[T.CBlock] 4529.3984 3239.513 1.398 0.162 -1826.411 1.09e+04\n",
+ "C(Foundation)[T.PConc] 4435.5194 3501.266 1.267 0.205 -2433.841 1.13e+04\n",
+ "C(Foundation)[T.Stone] 1.073e+04 1.12e+04 0.958 0.338 -1.12e+04 3.27e+04\n",
+ "C(Foundation)[T.Wood] -1.831e+04 1.46e+04 -1.250 0.211 -4.7e+04 1.04e+04\n",
+ "C(BsmtQual)[T.3] -4066.0106 4993.560 -0.814 0.416 -1.39e+04 5731.176\n",
+ "C(BsmtQual)[T.4] -6270.0218 5523.768 -1.135 0.257 -1.71e+04 4567.415\n",
+ "C(BsmtQual)[T.5] 1.271e+04 6446.058 1.971 0.049 58.790 2.54e+04\n",
+ "C(BsmtCond)[T.2] -2.337e+04 2.9e+04 -0.807 0.420 -8.02e+04 3.35e+04\n",
+ "C(BsmtCond)[T.3] -2.16e+04 2.92e+04 -0.739 0.460 -7.89e+04 3.57e+04\n",
+ "C(BsmtCond)[T.4] -2.454e+04 2.94e+04 -0.834 0.404 -8.22e+04 3.32e+04\n",
+ "C(BsmtExposure)[T.1] 1595.2945 2.33e+04 0.068 0.945 -4.41e+04 4.73e+04\n",
+ "C(BsmtExposure)[T.2] 3813.1041 2.34e+04 0.163 0.871 -4.21e+04 4.98e+04\n",
+ "C(BsmtExposure)[T.3] 6946.5587 2.33e+04 0.298 0.766 -3.88e+04 5.27e+04\n",
+ "C(BsmtExposure)[T.4] 2.226e+04 2.34e+04 0.950 0.342 -2.37e+04 6.82e+04\n",
+ "C(BsmtFinType1)[T.BLQ] 2460.8109 2825.411 0.871 0.384 -3082.545 8004.167\n",
+ "C(BsmtFinType1)[T.GLQ] 4384.5898 2542.374 1.725 0.085 -603.458 9372.637\n",
+ "C(BsmtFinType1)[T.LwQ] -4945.9715 3795.368 -1.303 0.193 -1.24e+04 2500.405\n",
+ "C(BsmtFinType1)[T.Rec] -745.1046 3027.960 -0.246 0.806 -6685.855 5195.646\n",
+ "C(BsmtFinType1)[T.Unf] 1459.2519 2953.112 0.494 0.621 -4334.649 7253.153\n",
+ "C(BsmtFinType2)[T.BLQ] -1.24e+04 7662.603 -1.619 0.106 -2.74e+04 2630.013\n",
+ "C(BsmtFinType2)[T.GLQ] -4619.3928 9485.148 -0.487 0.626 -2.32e+04 1.4e+04\n",
+ "C(BsmtFinType2)[T.LwQ] -1.148e+04 7478.853 -1.535 0.125 -2.62e+04 3191.867\n",
+ "C(BsmtFinType2)[T.Rec] -9178.5479 7195.119 -1.276 0.202 -2.33e+04 4938.021\n",
+ "C(BsmtFinType2)[T.Unf] -6810.5418 7647.899 -0.891 0.373 -2.18e+04 8194.365\n",
+ "C(Heating)[T.GasW] -3114.4210 7180.743 -0.434 0.665 -1.72e+04 1.1e+04\n",
+ "C(Heating)[T.Grav] -2131.7181 1.2e+04 -0.178 0.859 -2.57e+04 2.14e+04\n",
+ "C(Heating)[T.OthW] -2.35e+04 1.88e+04 -1.248 0.212 -6.04e+04 1.35e+04\n",
+ "C(HeatingQC)[T.1] 4418.3780 2.74e+04 0.161 0.872 -4.94e+04 5.82e+04\n",
+ "C(HeatingQC)[T.2] 318.8335 2.71e+04 0.012 0.991 -5.28e+04 5.34e+04\n",
+ "C(HeatingQC)[T.3] -427.6546 2.71e+04 -0.016 0.987 -5.36e+04 5.27e+04\n",
+ "C(HeatingQC)[T.4] 3776.9433 2.71e+04 0.140 0.889 -4.93e+04 5.69e+04\n",
+ "C(CentralAir)[T.1] 1311.6619 4056.183 0.323 0.746 -6646.426 9269.750\n",
+ "C(Electrical)[T.FuseF] -2823.0719 6400.397 -0.441 0.659 -1.54e+04 9734.281\n",
+ "C(Electrical)[T.FuseP] -4673.2589 2.25e+04 -0.207 0.836 -4.89e+04 3.95e+04\n",
+ "C(Electrical)[T.Mix] -75.8922 4.45e+04 -0.002 0.999 -8.74e+04 8.72e+04\n",
+ "C(Electrical)[T.SBrkr] -1938.8937 3044.597 -0.637 0.524 -7912.284 4034.497\n",
+ "C(KitchenQual)[T.2] -1124.4568 4988.154 -0.225 0.822 -1.09e+04 8662.124\n",
+ "C(KitchenQual)[T.3] -2318.7271 5418.511 -0.428 0.669 -1.29e+04 8312.199\n",
+ "C(KitchenQual)[T.4] 2.108e+04 6320.944 3.335 0.001 8678.299 3.35e+04\n",
+ "C(Functional)[T.Maj2] -3356.9864 1.47e+04 -0.228 0.820 -3.23e+04 2.56e+04\n",
+ "C(Functional)[T.Min1] 6278.4208 8990.992 0.698 0.485 -1.14e+04 2.39e+04\n",
+ "C(Functional)[T.Min2] 7127.2254 8975.127 0.794 0.427 -1.05e+04 2.47e+04\n",
+ "C(Functional)[T.Mod] -3465.5594 1.16e+04 -0.299 0.765 -2.62e+04 1.92e+04\n",
+ "C(Functional)[T.Sev] -3.185e+04 3.06e+04 -1.042 0.298 -9.18e+04 2.81e+04\n",
+ "C(Functional)[T.Typ] 1.781e+04 7776.153 2.290 0.022 2550.092 3.31e+04\n",
+ "C(FireplaceQu)[T.1] 3342.8752 6567.474 0.509 0.611 -9542.276 1.62e+04\n",
+ "C(FireplaceQu)[T.2] -9022.9753 5395.127 -1.672 0.095 -1.96e+04 1562.073\n",
+ "C(FireplaceQu)[T.3] -4348.6171 3648.302 -1.192 0.234 -1.15e+04 2809.222\n",
+ "C(FireplaceQu)[T.4] -5603.0024 3490.552 -1.605 0.109 -1.25e+04 1245.337\n",
+ "C(FireplaceQu)[T.5] -7032.3220 6444.743 -1.091 0.275 -1.97e+04 5612.036\n",
+ "C(GarageType)[T.Attchd] 1.993e+04 1.09e+04 1.830 0.068 -1442.598 4.13e+04\n",
+ "C(GarageType)[T.Basment] 2.036e+04 1.26e+04 1.610 0.108 -4449.219 4.52e+04\n",
+ "C(GarageType)[T.BuiltIn] 2.045e+04 1.14e+04 1.797 0.073 -1880.630 4.28e+04\n",
+ "C(GarageType)[T.CarPort] 2.853e+04 1.6e+04 1.781 0.075 -2903.167 6e+04\n",
+ "C(GarageType)[T.Detchd] 2.266e+04 1.09e+04 2.074 0.038 1229.079 4.41e+04\n",
+ "C(GarageType)[T.NA] -3.952e+05 5.96e+04 -6.627 0.000 -5.12e+05 -2.78e+05\n",
+ "C(GarageFinish)[T.1] -1.95e+05 2.69e+04 -7.247 0.000 -2.48e+05 -1.42e+05\n",
+ "C(GarageFinish)[T.2] -1.966e+05 2.7e+04 -7.289 0.000 -2.5e+05 -1.44e+05\n",
+ "C(GarageFinish)[T.3] -1.946e+05 2.72e+04 -7.167 0.000 -2.48e+05 -1.41e+05\n",
+ "C(GarageQual)[T.1] -1.573e+05 2.66e+04 -5.916 0.000 -2.09e+05 -1.05e+05\n",
+ "C(GarageQual)[T.2] -1.384e+05 1.78e+04 -7.777 0.000 -1.73e+05 -1.03e+05\n",
+ "C(GarageQual)[T.3] -1.34e+05 1.81e+04 -7.415 0.000 -1.69e+05 -9.86e+04\n",
+ "C(GarageQual)[T.4] -1.307e+05 1.92e+04 -6.800 0.000 -1.68e+05 -9.3e+04\n",
+ "C(GarageQual)[T.5] -2.579e+04 2.97e+04 -0.869 0.385 -8.4e+04 3.24e+04\n",
+ "C(GarageCond)[T.1] -9.528e+04 2.1e+04 -4.538 0.000 -1.36e+05 -5.41e+04\n",
+ "C(GarageCond)[T.2] -9.724e+04 1.83e+04 -5.318 0.000 -1.33e+05 -6.14e+04\n",
+ "C(GarageCond)[T.3] -9.472e+04 1.8e+04 -5.267 0.000 -1.3e+05 -5.94e+04\n",
+ "C(GarageCond)[T.4] -1.021e+05 1.94e+04 -5.263 0.000 -1.4e+05 -6.41e+04\n",
+ "C(GarageCond)[T.5] -1.968e+05 3.25e+04 -6.063 0.000 -2.61e+05 -1.33e+05\n",
+ "C(PavedDrive)[T.P] -1461.1196 5737.942 -0.255 0.799 -1.27e+04 9796.518\n",
+ "C(PavedDrive)[T.Y] 675.0555 3664.489 0.184 0.854 -6514.541 7864.653\n",
+ "C(PoolQC)[T.1] -3.431e+05 1.05e+05 -3.261 0.001 -5.5e+05 -1.37e+05\n",
+ "C(PoolQC)[T.3] -3.259e+05 1.15e+05 -2.830 0.005 -5.52e+05 -1e+05\n",
+ "C(PoolQC)[T.4] -2.078e+05 9.68e+04 -2.148 0.032 -3.98e+05 -1.8e+04\n",
+ "C(SaleType)[T.CWD] 1.602e+04 1.31e+04 1.227 0.220 -9603.820 4.17e+04\n",
+ "C(SaleType)[T.Con] 2.5e+04 1.79e+04 1.400 0.162 -1e+04 6e+04\n",
+ "C(SaleType)[T.ConLD] 1.725e+04 1.04e+04 1.655 0.098 -3198.700 3.77e+04\n",
+ "C(SaleType)[T.ConLI] 2104.8763 1.18e+04 0.179 0.858 -2.1e+04 2.52e+04\n",
+ "C(SaleType)[T.ConLw] 2489.1970 1.24e+04 0.201 0.840 -2.18e+04 2.67e+04\n",
+ "C(SaleType)[T.New] 3.42e+04 1.55e+04 2.209 0.027 3818.106 6.46e+04\n",
+ "C(SaleType)[T.Oth] 9624.0524 1.46e+04 0.657 0.511 -1.91e+04 3.83e+04\n",
+ "C(SaleType)[T.WD] -248.7934 4306.908 -0.058 0.954 -8698.794 8201.207\n",
+ "C(SaleCondition)[T.AdjLand] 1.683e+04 1.6e+04 1.049 0.294 -1.46e+04 4.83e+04\n",
+ "C(SaleCondition)[T.Alloca] 5815.5561 1.05e+04 0.556 0.578 -1.47e+04 2.63e+04\n",
+ "C(SaleCondition)[T.Family] -932.0291 6181.968 -0.151 0.880 -1.31e+04 1.12e+04\n",
+ "C(SaleCondition)[T.Normal] 7039.6176 2946.502 2.389 0.017 1258.684 1.28e+04\n",
+ "C(SaleCondition)[T.Partial] -1.297e+04 1.49e+04 -0.871 0.384 -4.22e+04 1.63e+04\n",
+ "MSSubClass -67.6730 82.910 -0.816 0.415 -230.340 94.994\n",
+ "LotArea 0.4440 0.088 5.026 0.000 0.271 0.617\n",
+ "OverallQual 6478.6538 1037.638 6.244 0.000 4442.846 8514.462\n",
+ "OverallCond 5686.2059 894.487 6.357 0.000 3931.255 7441.157\n",
+ "YearBuilt 335.1348 82.245 4.075 0.000 173.773 496.497\n",
+ "YearRemodAdd 93.4559 57.144 1.635 0.102 -18.659 205.571\n",
+ "MasVnrArea 21.9689 5.858 3.750 0.000 10.475 33.463\n",
+ "BsmtFinSF1 6.7297 8.069 0.834 0.404 -9.101 22.561\n",
+ "BsmtUnfSF -10.0959 8.190 -1.233 0.218 -26.164 5.972\n",
+ "TotalBsmtSF 31.6512 9.287 3.408 0.001 13.430 49.872\n",
+ "_1stFlrSF 41.6425 19.662 2.118 0.034 3.067 80.218\n",
+ "_2ndFlrSF 59.0321 18.334 3.220 0.001 23.062 95.003\n",
+ "GrLivArea 3.9596 19.184 0.206 0.837 -33.679 41.598\n",
+ "BsmtFullBath 1569.8423 1903.132 0.825 0.410 -2164.036 5303.720\n",
+ "FullBath 4239.9643 2264.774 1.872 0.061 -203.442 8683.371\n",
+ "HalfBath 1228.6119 2144.829 0.573 0.567 -2979.466 5436.690\n",
+ "BedroomAbvGr -3451.0551 1410.704 -2.446 0.015 -6218.806 -683.304\n",
+ "KitchenAbvGr -1.218e+04 5835.297 -2.087 0.037 -2.36e+04 -729.912\n",
+ "TotRmsAbvGrd 2041.6346 978.559 2.086 0.037 121.736 3961.533\n",
+ "Fireplaces 5537.4548 2613.273 2.119 0.034 410.305 1.07e+04\n",
+ "GarageYrBlt -42.0536 60.677 -0.693 0.488 -161.100 76.993\n",
+ "GarageCars 4250.3124 2325.997 1.827 0.068 -313.211 8813.836\n",
+ "GarageArea 18.1000 8.021 2.256 0.024 2.362 33.838\n",
+ "WoodDeckSF 11.8701 5.959 1.992 0.047 0.178 23.562\n",
+ "OpenPorchSF -1.8788 11.691 -0.161 0.872 -24.816 21.058\n",
+ "EnclosedPorch 4.2685 12.829 0.333 0.739 -20.902 29.440\n",
+ "ScreenPorch 32.8250 12.523 2.621 0.009 8.256 57.394\n",
+ "PoolArea 588.1664 177.050 3.322 0.001 240.801 935.531\n",
+ "==============================================================================\n",
+ "Omnibus: 371.064 Durbin-Watson: 1.921\n",
+ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 12823.924\n",
+ "Skew: 0.513 Prob(JB): 0.00\n",
+ "Kurtosis: 17.723 Cond. No. 5.34e+18\n",
+ "==============================================================================\n",
+ "\n",
+ "Notes:\n",
+ "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
+ "[2] The smallest eigenvalue is 1.11e-26. This might indicate that there are\n",
+ "strong multicollinearity problems or that the design matrix is singular.\n"
+ ]
+ }
+ ],
+ "source": [
+ "model = ols(string[:-2], data = df_trains).fit()\n",
+ "print(model.summary())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "There are at least 65 columns that seem to be relevant\\\n",
+ "\n",
+ "Creating correlation matrix with orinary values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 430,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "OverallQual 0.786705\n",
+ "YearBuilt 0.518637\n",
+ "YearRemodAdd 0.500217\n",
+ "TotalBsmtSF 0.611020\n",
+ "_1stFlrSF 0.614622\n",
+ "GrLivArea 0.711060\n",
+ "FullBath 0.575542\n",
+ "TotRmsAbvGrd 0.545546\n",
+ "GarageYrBlt 0.502116\n",
+ "GarageCars 0.641828\n",
+ "GarageArea 0.621091\n",
+ "SalePrice 1.000000\n",
+ "Name: SalePrice, dtype: float64"
+ ]
+ },
+ "execution_count": 430,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_trains_corr = df_trains.corr()[df_trains.corr()['SalePrice'].abs() > 0.50]['SalePrice']#en la correlacion se necesita que sea mayor a 50\n",
+ "df_trains_corr"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# PCA Analysis\n",
+ "\n",
+ "Matching variables with good correlation of SalePrice againts the ones that seem to be relevant"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 431,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.preprocessing import StandardScaler\n",
+ "\n",
+ "scaler = StandardScaler() #Se crea una instancia de la StandardScaler\n",
+ "df_dummies = pd.get_dummies(data=df_trains[df_categ]) # generamos dummies de las variables categoricas"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Getting ordinary columns with good correlation and add them to the dummies one\\\n",
+ "Checking which variables have good correlation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 432,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | ExterQual_2 | \n",
+ " -0.587583 | \n",
+ "
\n",
+ " \n",
+ " | BsmtQual_5 | \n",
+ " 0.551399 | \n",
+ "
\n",
+ " \n",
+ " | KitchenQual_2 | \n",
+ " -0.520576 | \n",
+ "
\n",
+ " \n",
+ " | KitchenQual_4 | \n",
+ " 0.501267 | \n",
+ "
\n",
+ " \n",
+ " | OverallQual | \n",
+ " 0.786705 | \n",
+ "
\n",
+ " \n",
+ " | YearBuilt | \n",
+ " 0.518637 | \n",
+ "
\n",
+ " \n",
+ " | YearRemodAdd | \n",
+ " 0.500217 | \n",
+ "
\n",
+ " \n",
+ " | TotalBsmtSF | \n",
+ " 0.611020 | \n",
+ "
\n",
+ " \n",
+ " | _1stFlrSF | \n",
+ " 0.614622 | \n",
+ "
\n",
+ " \n",
+ " | GrLivArea | \n",
+ " 0.711060 | \n",
+ "
\n",
+ " \n",
+ " | FullBath | \n",
+ " 0.575542 | \n",
+ "
\n",
+ " \n",
+ " | TotRmsAbvGrd | \n",
+ " 0.545546 | \n",
+ "
\n",
+ " \n",
+ " | GarageYrBlt | \n",
+ " 0.502116 | \n",
+ "
\n",
+ " \n",
+ " | GarageCars | \n",
+ " 0.641828 | \n",
+ "
\n",
+ " \n",
+ " | GarageArea | \n",
+ " 0.621091 | \n",
+ "
\n",
+ " \n",
+ " | SalePrice | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " SalePrice\n",
+ "ExterQual_2 -0.587583\n",
+ "BsmtQual_5 0.551399\n",
+ "KitchenQual_2 -0.520576\n",
+ "KitchenQual_4 0.501267\n",
+ "OverallQual 0.786705\n",
+ "YearBuilt 0.518637\n",
+ "YearRemodAdd 0.500217\n",
+ "TotalBsmtSF 0.611020\n",
+ "_1stFlrSF 0.614622\n",
+ "GrLivArea 0.711060\n",
+ "FullBath 0.575542\n",
+ "TotRmsAbvGrd 0.545546\n",
+ "GarageYrBlt 0.502116\n",
+ "GarageCars 0.641828\n",
+ "GarageArea 0.621091\n",
+ "SalePrice 1.000000"
+ ]
+ },
+ "execution_count": 432,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#agregamos las variables ordinarias que tienen una correlación mayor al 50%\n",
+ "df_trains_corr_cols = pd.DataFrame(df_trains_corr)\n",
+ "df_trains_corr_cols.reset_index(inplace=True)\n",
+ "df_trains[df_trains_corr_cols['index']]\n",
+ "df_dummies = pd.concat([df_dummies,df_trains[df_trains_corr_cols['index']]], axis=1, join='inner') # concatenamos las variables ordinarias con los dummies de las variables categoricas con buena correlación\n",
+ "df_dummies_corr = df_dummies.corr()[df_dummies.corr()['SalePrice'].abs() > 0.50 ][['SalePrice']] # vemos que hay una alta correlación entre las variables de precio con algunos dummies\n",
+ "df_dummies_corr"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Taking off SalePrice column and leave all columns that have been gotten above"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 433,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ExterQual_2 | \n",
+ " BsmtQual_5 | \n",
+ " KitchenQual_2 | \n",
+ " KitchenQual_4 | \n",
+ " OverallQual | \n",
+ " YearBuilt | \n",
+ " YearRemodAdd | \n",
+ " TotalBsmtSF | \n",
+ " _1stFlrSF | \n",
+ " GrLivArea | \n",
+ " FullBath | \n",
+ " TotRmsAbvGrd | \n",
+ " GarageYrBlt | \n",
+ " GarageCars | \n",
+ " GarageArea | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 7 | \n",
+ " 2003 | \n",
+ " 2003 | \n",
+ " 856 | \n",
+ " 856 | \n",
+ " 1710 | \n",
+ " 2 | \n",
+ " 8 | \n",
+ " 2003.0 | \n",
+ " 2 | \n",
+ " 548 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 6 | \n",
+ " 1976 | \n",
+ " 1976 | \n",
+ " 1262 | \n",
+ " 1262 | \n",
+ " 1262 | \n",
+ " 2 | \n",
+ " 6 | \n",
+ " 1976.0 | \n",
+ " 2 | \n",
+ " 460 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 7 | \n",
+ " 2001 | \n",
+ " 2002 | \n",
+ " 920 | \n",
+ " 920 | \n",
+ " 1786 | \n",
+ " 2 | \n",
+ " 6 | \n",
+ " 2001.0 | \n",
+ " 2 | \n",
+ " 608 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 7 | \n",
+ " 1915 | \n",
+ " 1970 | \n",
+ " 756 | \n",
+ " 961 | \n",
+ " 1717 | \n",
+ " 1 | \n",
+ " 7 | \n",
+ " 1998.0 | \n",
+ " 3 | \n",
+ " 642 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " 2000 | \n",
+ " 2000 | \n",
+ " 1145 | \n",
+ " 1145 | \n",
+ " 2198 | \n",
+ " 2 | \n",
+ " 9 | \n",
+ " 2000.0 | \n",
+ " 3 | \n",
+ " 836 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 1455 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 6 | \n",
+ " 1999 | \n",
+ " 2000 | \n",
+ " 953 | \n",
+ " 953 | \n",
+ " 1647 | \n",
+ " 2 | \n",
+ " 7 | \n",
+ " 1999.0 | \n",
+ " 2 | \n",
+ " 460 | \n",
+ "
\n",
+ " \n",
+ " | 1456 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 6 | \n",
+ " 1978 | \n",
+ " 1988 | \n",
+ " 1542 | \n",
+ " 2073 | \n",
+ " 2073 | \n",
+ " 2 | \n",
+ " 7 | \n",
+ " 1978.0 | \n",
+ " 2 | \n",
+ " 500 | \n",
+ "
\n",
+ " \n",
+ " | 1457 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 7 | \n",
+ " 1941 | \n",
+ " 2006 | \n",
+ " 1152 | \n",
+ " 1188 | \n",
+ " 2340 | \n",
+ " 2 | \n",
+ " 9 | \n",
+ " 1941.0 | \n",
+ " 1 | \n",
+ " 252 | \n",
+ "
\n",
+ " \n",
+ " | 1458 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 1950 | \n",
+ " 1996 | \n",
+ " 1078 | \n",
+ " 1078 | \n",
+ " 1078 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " 1950.0 | \n",
+ " 1 | \n",
+ " 240 | \n",
+ "
\n",
+ " \n",
+ " | 1459 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 1965 | \n",
+ " 1965 | \n",
+ " 1256 | \n",
+ " 1256 | \n",
+ " 1256 | \n",
+ " 1 | \n",
+ " 6 | \n",
+ " 1965.0 | \n",
+ " 1 | \n",
+ " 276 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1413 rows × 15 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ExterQual_2 BsmtQual_5 KitchenQual_2 KitchenQual_4 OverallQual \\\n",
+ "0 0 0 0 0 7 \n",
+ "1 1 0 1 0 6 \n",
+ "2 0 0 0 0 7 \n",
+ "3 1 0 0 0 7 \n",
+ "4 0 0 0 0 8 \n",
+ "... ... ... ... ... ... \n",
+ "1455 1 0 1 0 6 \n",
+ "1456 1 0 1 0 6 \n",
+ "1457 0 0 0 0 7 \n",
+ "1458 1 0 0 0 5 \n",
+ "1459 0 0 1 0 5 \n",
+ "\n",
+ " YearBuilt YearRemodAdd TotalBsmtSF _1stFlrSF GrLivArea FullBath \\\n",
+ "0 2003 2003 856 856 1710 2 \n",
+ "1 1976 1976 1262 1262 1262 2 \n",
+ "2 2001 2002 920 920 1786 2 \n",
+ "3 1915 1970 756 961 1717 1 \n",
+ "4 2000 2000 1145 1145 2198 2 \n",
+ "... ... ... ... ... ... ... \n",
+ "1455 1999 2000 953 953 1647 2 \n",
+ "1456 1978 1988 1542 2073 2073 2 \n",
+ "1457 1941 2006 1152 1188 2340 2 \n",
+ "1458 1950 1996 1078 1078 1078 1 \n",
+ "1459 1965 1965 1256 1256 1256 1 \n",
+ "\n",
+ " TotRmsAbvGrd GarageYrBlt GarageCars GarageArea \n",
+ "0 8 2003.0 2 548 \n",
+ "1 6 1976.0 2 460 \n",
+ "2 6 2001.0 2 608 \n",
+ "3 7 1998.0 3 642 \n",
+ "4 9 2000.0 3 836 \n",
+ "... ... ... ... ... \n",
+ "1455 7 1999.0 2 460 \n",
+ "1456 7 1978.0 2 500 \n",
+ "1457 9 1941.0 1 252 \n",
+ "1458 5 1950.0 1 240 \n",
+ "1459 6 1965.0 1 276 \n",
+ "\n",
+ "[1413 rows x 15 columns]"
+ ]
+ },
+ "execution_count": 433,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#tomamos solo las columnas que esta correlacionadas con el precio y excluimos el precio de nuestra matriz\n",
+ "df_dummies_corr.reset_index(inplace=True)\n",
+ "df_dummies = df_dummies[df_dummies_corr['index']]\n",
+ "df_dummies.drop(columns='SalePrice', inplace=True)\n",
+ "df_dummies"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Starting Fit_transform method"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 434,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ExterQual_2 | \n",
+ " BsmtQual_5 | \n",
+ " KitchenQual_2 | \n",
+ " KitchenQual_4 | \n",
+ " OverallQual | \n",
+ " YearBuilt | \n",
+ " YearRemodAdd | \n",
+ " TotalBsmtSF | \n",
+ " _1stFlrSF | \n",
+ " GrLivArea | \n",
+ " FullBath | \n",
+ " TotRmsAbvGrd | \n",
+ " GarageYrBlt | \n",
+ " GarageCars | \n",
+ " GarageArea | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " -1.267679 | \n",
+ " -0.304643 | \n",
+ " -0.997879 | \n",
+ " -0.274486 | \n",
+ " 0.634231 | \n",
+ " 1.039155 | \n",
+ " 0.870141 | \n",
+ " -0.559825 | \n",
+ " -0.796475 | \n",
+ " 0.360898 | \n",
+ " 0.798472 | \n",
+ " 0.915403 | \n",
+ " 0.997693 | \n",
+ " 0.304124 | \n",
+ " 0.341502 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0.788843 | \n",
+ " -0.304643 | \n",
+ " 1.002125 | \n",
+ " -0.274486 | \n",
+ " -0.102750 | \n",
+ " 0.149859 | \n",
+ " -0.449967 | \n",
+ " 0.443885 | \n",
+ " 0.257906 | \n",
+ " -0.489976 | \n",
+ " 0.798472 | \n",
+ " -0.321770 | \n",
+ " -0.027407 | \n",
+ " 0.304124 | \n",
+ " -0.071128 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " -1.267679 | \n",
+ " -0.304643 | \n",
+ " -0.997879 | \n",
+ " -0.274486 | \n",
+ " 0.634231 | \n",
+ " 0.973281 | \n",
+ " 0.821248 | \n",
+ " -0.401604 | \n",
+ " -0.630267 | \n",
+ " 0.505243 | \n",
+ " 0.798472 | \n",
+ " -0.321770 | \n",
+ " 0.921759 | \n",
+ " 0.304124 | \n",
+ " 0.622840 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 0.788843 | \n",
+ " -0.304643 | \n",
+ " -0.997879 | \n",
+ " -0.274486 | \n",
+ " 0.634231 | \n",
+ " -1.859290 | \n",
+ " -0.743324 | \n",
+ " -0.807044 | \n",
+ " -0.523790 | \n",
+ " 0.374193 | \n",
+ " -1.018341 | \n",
+ " 0.296817 | \n",
+ " 0.807859 | \n",
+ " 1.647023 | \n",
+ " 0.782265 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " -1.267679 | \n",
+ " -0.304643 | \n",
+ " -0.997879 | \n",
+ " -0.274486 | \n",
+ " 1.371212 | \n",
+ " 0.940344 | \n",
+ " 0.723462 | \n",
+ " 0.154639 | \n",
+ " -0.045943 | \n",
+ " 1.287743 | \n",
+ " 0.798472 | \n",
+ " 1.533990 | \n",
+ " 0.883793 | \n",
+ " 1.647023 | \n",
+ " 1.691926 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 1455 | \n",
+ " 0.788843 | \n",
+ " -0.304643 | \n",
+ " 1.002125 | \n",
+ " -0.274486 | \n",
+ " -0.102750 | \n",
+ " 0.907407 | \n",
+ " 0.723462 | \n",
+ " -0.320022 | \n",
+ " -0.544566 | \n",
+ " 0.241244 | \n",
+ " 0.798472 | \n",
+ " 0.296817 | \n",
+ " 0.845826 | \n",
+ " 0.304124 | \n",
+ " -0.071128 | \n",
+ "
\n",
+ " \n",
+ " | 1456 | \n",
+ " 0.788843 | \n",
+ " -0.304643 | \n",
+ " 1.002125 | \n",
+ " -0.274486 | \n",
+ " -0.102750 | \n",
+ " 0.215733 | \n",
+ " 0.136748 | \n",
+ " 1.136099 | \n",
+ " 2.364069 | \n",
+ " 1.050334 | \n",
+ " 0.798472 | \n",
+ " 0.296817 | \n",
+ " 0.048526 | \n",
+ " 0.304124 | \n",
+ " 0.116431 | \n",
+ "
\n",
+ " \n",
+ " | 1457 | \n",
+ " -1.267679 | \n",
+ " -0.304643 | \n",
+ " -0.997879 | \n",
+ " -0.274486 | \n",
+ " 0.634231 | \n",
+ " -1.002931 | \n",
+ " 1.016819 | \n",
+ " 0.171944 | \n",
+ " 0.065728 | \n",
+ " 1.557440 | \n",
+ " 0.798472 | \n",
+ " 1.533990 | \n",
+ " -1.356240 | \n",
+ " -1.038774 | \n",
+ " -1.046434 | \n",
+ "
\n",
+ " \n",
+ " | 1458 | \n",
+ " 0.788843 | \n",
+ " -0.304643 | \n",
+ " -0.997879 | \n",
+ " -0.274486 | \n",
+ " -0.839731 | \n",
+ " -0.706499 | \n",
+ " 0.527891 | \n",
+ " -0.010998 | \n",
+ " -0.219941 | \n",
+ " -0.839442 | \n",
+ " -1.018341 | \n",
+ " -0.940357 | \n",
+ " -1.014540 | \n",
+ " -1.038774 | \n",
+ " -1.102702 | \n",
+ "
\n",
+ " \n",
+ " | 1459 | \n",
+ " -1.267679 | \n",
+ " -0.304643 | \n",
+ " 1.002125 | \n",
+ " -0.274486 | \n",
+ " -0.839731 | \n",
+ " -0.212446 | \n",
+ " -0.987788 | \n",
+ " 0.429052 | \n",
+ " 0.242324 | \n",
+ " -0.501372 | \n",
+ " -1.018341 | \n",
+ " -0.321770 | \n",
+ " -0.445040 | \n",
+ " -1.038774 | \n",
+ " -0.933899 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1413 rows × 15 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ExterQual_2 BsmtQual_5 KitchenQual_2 KitchenQual_4 OverallQual \\\n",
+ "0 -1.267679 -0.304643 -0.997879 -0.274486 0.634231 \n",
+ "1 0.788843 -0.304643 1.002125 -0.274486 -0.102750 \n",
+ "2 -1.267679 -0.304643 -0.997879 -0.274486 0.634231 \n",
+ "3 0.788843 -0.304643 -0.997879 -0.274486 0.634231 \n",
+ "4 -1.267679 -0.304643 -0.997879 -0.274486 1.371212 \n",
+ "... ... ... ... ... ... \n",
+ "1455 0.788843 -0.304643 1.002125 -0.274486 -0.102750 \n",
+ "1456 0.788843 -0.304643 1.002125 -0.274486 -0.102750 \n",
+ "1457 -1.267679 -0.304643 -0.997879 -0.274486 0.634231 \n",
+ "1458 0.788843 -0.304643 -0.997879 -0.274486 -0.839731 \n",
+ "1459 -1.267679 -0.304643 1.002125 -0.274486 -0.839731 \n",
+ "\n",
+ " YearBuilt YearRemodAdd TotalBsmtSF _1stFlrSF GrLivArea FullBath \\\n",
+ "0 1.039155 0.870141 -0.559825 -0.796475 0.360898 0.798472 \n",
+ "1 0.149859 -0.449967 0.443885 0.257906 -0.489976 0.798472 \n",
+ "2 0.973281 0.821248 -0.401604 -0.630267 0.505243 0.798472 \n",
+ "3 -1.859290 -0.743324 -0.807044 -0.523790 0.374193 -1.018341 \n",
+ "4 0.940344 0.723462 0.154639 -0.045943 1.287743 0.798472 \n",
+ "... ... ... ... ... ... ... \n",
+ "1455 0.907407 0.723462 -0.320022 -0.544566 0.241244 0.798472 \n",
+ "1456 0.215733 0.136748 1.136099 2.364069 1.050334 0.798472 \n",
+ "1457 -1.002931 1.016819 0.171944 0.065728 1.557440 0.798472 \n",
+ "1458 -0.706499 0.527891 -0.010998 -0.219941 -0.839442 -1.018341 \n",
+ "1459 -0.212446 -0.987788 0.429052 0.242324 -0.501372 -1.018341 \n",
+ "\n",
+ " TotRmsAbvGrd GarageYrBlt GarageCars GarageArea \n",
+ "0 0.915403 0.997693 0.304124 0.341502 \n",
+ "1 -0.321770 -0.027407 0.304124 -0.071128 \n",
+ "2 -0.321770 0.921759 0.304124 0.622840 \n",
+ "3 0.296817 0.807859 1.647023 0.782265 \n",
+ "4 1.533990 0.883793 1.647023 1.691926 \n",
+ "... ... ... ... ... \n",
+ "1455 0.296817 0.845826 0.304124 -0.071128 \n",
+ "1456 0.296817 0.048526 0.304124 0.116431 \n",
+ "1457 1.533990 -1.356240 -1.038774 -1.046434 \n",
+ "1458 -0.940357 -1.014540 -1.038774 -1.102702 \n",
+ "1459 -0.321770 -0.445040 -1.038774 -0.933899 \n",
+ "\n",
+ "[1413 rows x 15 columns]"
+ ]
+ },
+ "execution_count": 434,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "#Generamos los dummies de las variables categoricas\n",
+ "stand_data = scaler.fit_transform(df_dummies)\n",
+ "#los centramos en 0 \n",
+ "df_stand_data = pd.DataFrame(stand_data, index = df_dummies.index, columns = df_dummies.columns)\n",
+ "df_stand_data\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### PCA process\n",
+ "Getting ratio where cumsum is at least 80%"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 435,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4"
+ ]
+ },
+ "execution_count": 435,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pca = PCA() # creamos una instancia de #PCA\n",
+ "pca.fit(df_stand_data) #prepara los datos y genera la matriz pca\n",
+ "ratio_80 = 0 # variable para guardar cuantos valores se necesitan para llegar al 80%\n",
+ "for i, num in enumerate(pca.explained_variance_ratio_.cumsum()):\n",
+ " if num > 0.8:\n",
+ " ratio_80 = i\n",
+ " break\n",
+ "ratio_80"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "fit and transform process for columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 436,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " comp1 | \n",
+ " comp2 | \n",
+ " comp3 | \n",
+ " comp4 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1.791821 | \n",
+ " -1.410191 | \n",
+ " -1.687428 | \n",
+ " 0.385872 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " -0.426395 | \n",
+ " 0.282228 | \n",
+ " 0.235221 | \n",
+ " -1.106798 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1.690532 | \n",
+ " -1.712462 | \n",
+ " -1.098508 | \n",
+ " 0.155258 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " -0.085184 | \n",
+ " 0.276780 | \n",
+ " -0.467497 | \n",
+ " -0.804128 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 3.450850 | \n",
+ " -0.302722 | \n",
+ " -1.637284 | \n",
+ " -0.804499 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 1455 | \n",
+ " 0.224075 | \n",
+ " -0.588041 | \n",
+ " -0.804471 | \n",
+ " -0.780430 | \n",
+ "
\n",
+ " \n",
+ " | 1456 | \n",
+ " 1.038271 | \n",
+ " 1.797159 | \n",
+ " 0.250557 | \n",
+ " -1.478160 | \n",
+ "
\n",
+ " \n",
+ " | 1457 | \n",
+ " 0.686651 | \n",
+ " 1.446927 | \n",
+ " -2.222381 | \n",
+ " 2.059979 | \n",
+ "
\n",
+ " \n",
+ " | 1458 | \n",
+ " -2.046225 | \n",
+ " -0.430441 | \n",
+ " 0.468704 | \n",
+ " 1.065916 | \n",
+ "
\n",
+ " \n",
+ " | 1459 | \n",
+ " -1.566947 | \n",
+ " 0.240105 | \n",
+ " 0.719748 | \n",
+ " 0.170087 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1413 rows × 4 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " comp1 comp2 comp3 comp4\n",
+ "0 1.791821 -1.410191 -1.687428 0.385872\n",
+ "1 -0.426395 0.282228 0.235221 -1.106798\n",
+ "2 1.690532 -1.712462 -1.098508 0.155258\n",
+ "3 -0.085184 0.276780 -0.467497 -0.804128\n",
+ "4 3.450850 -0.302722 -1.637284 -0.804499\n",
+ "... ... ... ... ...\n",
+ "1455 0.224075 -0.588041 -0.804471 -0.780430\n",
+ "1456 1.038271 1.797159 0.250557 -1.478160\n",
+ "1457 0.686651 1.446927 -2.222381 2.059979\n",
+ "1458 -2.046225 -0.430441 0.468704 1.065916\n",
+ "1459 -1.566947 0.240105 0.719748 0.170087\n",
+ "\n",
+ "[1413 rows x 4 columns]"
+ ]
+ },
+ "execution_count": 436,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pca_ratio_80 = PCA(n_components=ratio_80) # creas una instancia de PCA con el numero de componentes definido en ratio_80\n",
+ "pca_transform_ratio_80 = pca_ratio_80.fit_transform(df_stand_data) #normalizas y estandarizas los datos para el numero de componentes definido y se centran en 0\n",
+ "df_pca_transform = pd.DataFrame(pca_transform_ratio_80, index=df_stand_data.index,columns = ['comp'+str(x+1) for x in range(ratio_80)]) #creas el dataframe\n",
+ "df_pca_transform"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Creating component matrix"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 437,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ExterQual_2 | \n",
+ " BsmtQual_5 | \n",
+ " KitchenQual_2 | \n",
+ " KitchenQual_4 | \n",
+ " OverallQual | \n",
+ " YearBuilt | \n",
+ " YearRemodAdd | \n",
+ " TotalBsmtSF | \n",
+ " _1stFlrSF | \n",
+ " GrLivArea | \n",
+ " FullBath | \n",
+ " TotRmsAbvGrd | \n",
+ " GarageYrBlt | \n",
+ " GarageCars | \n",
+ " GarageArea | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | comp1 | \n",
+ " -0.279057 | \n",
+ " 0.208703 | \n",
+ " -0.247717 | \n",
+ " 0.180846 | \n",
+ " 0.319348 | \n",
+ " 0.266440 | \n",
+ " 0.248097 | \n",
+ " 0.256911 | \n",
+ " 0.249127 | \n",
+ " 0.259179 | \n",
+ " 0.265496 | \n",
+ " 0.202702 | \n",
+ " 0.273435 | \n",
+ " 0.294260 | \n",
+ " 0.286011 | \n",
+ "
\n",
+ " \n",
+ " | comp2 | \n",
+ " 0.214856 | \n",
+ " 0.098782 | \n",
+ " 0.195798 | \n",
+ " 0.161085 | \n",
+ " -0.003311 | \n",
+ " -0.374439 | \n",
+ " -0.302714 | \n",
+ " 0.245832 | \n",
+ " 0.336302 | \n",
+ " 0.393705 | \n",
+ " 0.059613 | \n",
+ " 0.421164 | \n",
+ " -0.368912 | \n",
+ " -0.048746 | \n",
+ " -0.001127 | \n",
+ "
\n",
+ " \n",
+ " | comp3 | \n",
+ " 0.085736 | \n",
+ " 0.400647 | \n",
+ " 0.136580 | \n",
+ " 0.387254 | \n",
+ " -0.036808 | \n",
+ " 0.060848 | \n",
+ " -0.114176 | \n",
+ " 0.354639 | \n",
+ " 0.268414 | \n",
+ " -0.325169 | \n",
+ " -0.402604 | \n",
+ " -0.409479 | \n",
+ " 0.029273 | \n",
+ " 0.022070 | \n",
+ " 0.103731 | \n",
+ "
\n",
+ " \n",
+ " | comp4 | \n",
+ " -0.221739 | \n",
+ " 0.304470 | \n",
+ " -0.335963 | \n",
+ " 0.465468 | \n",
+ " 0.143107 | \n",
+ " -0.115802 | \n",
+ " 0.217171 | \n",
+ " -0.167599 | \n",
+ " -0.176888 | \n",
+ " 0.037730 | \n",
+ " -0.004345 | \n",
+ " 0.057220 | \n",
+ " -0.198921 | \n",
+ " -0.396832 | \n",
+ " -0.431747 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ExterQual_2 BsmtQual_5 KitchenQual_2 KitchenQual_4 OverallQual \\\n",
+ "comp1 -0.279057 0.208703 -0.247717 0.180846 0.319348 \n",
+ "comp2 0.214856 0.098782 0.195798 0.161085 -0.003311 \n",
+ "comp3 0.085736 0.400647 0.136580 0.387254 -0.036808 \n",
+ "comp4 -0.221739 0.304470 -0.335963 0.465468 0.143107 \n",
+ "\n",
+ " YearBuilt YearRemodAdd TotalBsmtSF _1stFlrSF GrLivArea FullBath \\\n",
+ "comp1 0.266440 0.248097 0.256911 0.249127 0.259179 0.265496 \n",
+ "comp2 -0.374439 -0.302714 0.245832 0.336302 0.393705 0.059613 \n",
+ "comp3 0.060848 -0.114176 0.354639 0.268414 -0.325169 -0.402604 \n",
+ "comp4 -0.115802 0.217171 -0.167599 -0.176888 0.037730 -0.004345 \n",
+ "\n",
+ " TotRmsAbvGrd GarageYrBlt GarageCars GarageArea \n",
+ "comp1 0.202702 0.273435 0.294260 0.286011 \n",
+ "comp2 0.421164 -0.368912 -0.048746 -0.001127 \n",
+ "comp3 -0.409479 0.029273 0.022070 0.103731 \n",
+ "comp4 0.057220 -0.198921 -0.396832 -0.431747 "
+ ]
+ },
+ "execution_count": 437,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_matrix_componentes = pd.DataFrame(pca_ratio_80.components_,\n",
+ " columns = df_stand_data.columns,\n",
+ " index = df_pca_transform.columns) # se crea la matriz de componentes\n",
+ "df_matrix_componentes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 438,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " OLS Regression Results \n",
+ "==============================================================================\n",
+ "Dep. Variable: SalePrice R-squared: 0.776\n",
+ "Model: OLS Adj. R-squared: 0.775\n",
+ "Method: Least Squares F-statistic: 1220.\n",
+ "Date: Wed, 09 Feb 2022 Prob (F-statistic): 0.00\n",
+ "Time: 22:07:51 Log-Likelihood: -16886.\n",
+ "No. Observations: 1413 AIC: 3.378e+04\n",
+ "Df Residuals: 1408 BIC: 3.381e+04\n",
+ "Df Model: 4 \n",
+ "Covariance Type: nonrobust \n",
+ "==============================================================================\n",
+ " coef std err t P>|t| [0.025 0.975]\n",
+ "------------------------------------------------------------------------------\n",
+ "const 1.825e+05 998.878 182.719 0.000 1.81e+05 1.84e+05\n",
+ "comp1 2.567e+04 377.124 68.078 0.000 2.49e+04 2.64e+04\n",
+ "comp2 1.013e+04 725.331 13.966 0.000 8707.014 1.16e+04\n",
+ "comp3 1716.6880 883.844 1.942 0.052 -17.105 3450.481\n",
+ "comp4 6721.9297 974.073 6.901 0.000 4811.140 8632.720\n",
+ "==============================================================================\n",
+ "Omnibus: 481.794 Durbin-Watson: 1.974\n",
+ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 47849.223\n",
+ "Skew: -0.576 Prob(JB): 0.00\n",
+ "Kurtosis: 31.485 Cond. No. 2.65\n",
+ "==============================================================================\n",
+ "\n",
+ "Notes:\n",
+ "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
+ ]
+ }
+ ],
+ "source": [
+ "df_matrix = df_pca_transform\n",
+ "X = sm.add_constant(df_matrix)\n",
+ "linreg_statsm = sm.OLS(df_trains['SalePrice'], X).fit()\n",
+ "print(linreg_statsm.summary())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 439,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "LinearRegression()"
+ ]
+ },
+ "execution_count": 439,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.linear_model import LinearRegression\n",
+ "\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, df_trains['SalePrice'], random_state=22, test_size=0.2)\n",
+ "linreg = LinearRegression()\n",
+ "linreg.fit(X_train, y_train)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As we can see after train 80% of our data, r2 value are almost the same\\\n",
+ "that make us believe that PCA process has been trained in a good way, ans there is no either overfitting or underfitting"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 440,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.7666848219460827\n",
+ "0.8177669809392756\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(linreg.score(X_train, y_train) )\n",
+ "print(linreg.score(X_test, y_test) )"
+ ]
+ }
+ ],
+ "metadata": {
+ "interpreter": {
+ "hash": "a58b191ad2a25b19fea43b3067475f20bdb1c629ecc23182cd320980facf1bfc"
+ },
+ "kernelspec": {
+ "display_name": "Python 3.9.7 64-bit",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}