From 195d2d44c2e26c95250e09412f6b84165edea98b Mon Sep 17 00:00:00 2001 From: David Venegas Date: Wed, 9 Feb 2022 22:10:22 -0600 Subject: [PATCH] Project completed David V --- Analysis project.ipynb | 3001 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3001 insertions(+) create mode 100644 Analysis project.ipynb diff --git a/Analysis project.ipynb b/Analysis project.ipynb new file mode 100644 index 0000000..714066e --- /dev/null +++ b/Analysis project.ipynb @@ -0,0 +1,3001 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Importing libraries and overview of data" + ] + }, + { + "cell_type": "code", + "execution_count": 415, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "from statsmodels.formula.api import ols" + ] + }, + { + "cell_type": "code", + "execution_count": 416, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdMSSubClassMSZoningLotFrontageLotAreaStreetAlleyLotShapeLandContourUtilities...PoolAreaPoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionSalePrice
0160RL65.08450PaveNaNRegLvlAllPub...0NaNNaNNaN022008WDNormal208500
1220RL80.09600PaveNaNRegLvlAllPub...0NaNNaNNaN052007WDNormal181500
2360RL68.011250PaveNaNIR1LvlAllPub...0NaNNaNNaN092008WDNormal223500
3470RL60.09550PaveNaNIR1LvlAllPub...0NaNNaNNaN022006WDAbnorml140000
4560RL84.014260PaveNaNIR1LvlAllPub...0NaNNaNNaN0122008WDNormal250000
..................................................................
1455145660RL62.07917PaveNaNRegLvlAllPub...0NaNNaNNaN082007WDNormal175000
1456145720RL85.013175PaveNaNRegLvlAllPub...0NaNMnPrvNaN022010WDNormal210000
1457145870RL66.09042PaveNaNRegLvlAllPub...0NaNGdPrvShed250052010WDNormal266500
1458145920RL68.09717PaveNaNRegLvlAllPub...0NaNNaNNaN042010WDNormal142125
1459146020RL75.09937PaveNaNRegLvlAllPub...0NaNNaNNaN062008WDNormal147500
\n", + "

1460 rows × 81 columns

\n", + "
" + ], + "text/plain": [ + " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n", + "0 1 60 RL 65.0 8450 Pave NaN Reg \n", + "1 2 20 RL 80.0 9600 Pave NaN Reg \n", + "2 3 60 RL 68.0 11250 Pave NaN IR1 \n", + "3 4 70 RL 60.0 9550 Pave NaN IR1 \n", + "4 5 60 RL 84.0 14260 Pave NaN IR1 \n", + "... ... ... ... ... ... ... ... ... \n", + "1455 1456 60 RL 62.0 7917 Pave NaN Reg \n", + "1456 1457 20 RL 85.0 13175 Pave NaN Reg \n", + "1457 1458 70 RL 66.0 9042 Pave NaN Reg \n", + "1458 1459 20 RL 68.0 9717 Pave NaN Reg \n", + "1459 1460 20 RL 75.0 9937 Pave NaN Reg \n", + "\n", + " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n", + "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "2 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "4 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "... ... ... ... ... ... ... ... ... \n", + "1455 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "1456 Lvl AllPub ... 0 NaN MnPrv NaN 0 \n", + "1457 Lvl AllPub ... 0 NaN GdPrv Shed 2500 \n", + "1458 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "1459 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "\n", + " MoSold YrSold SaleType SaleCondition SalePrice \n", + "0 2 2008 WD Normal 208500 \n", + "1 5 2007 WD Normal 181500 \n", + "2 9 2008 WD Normal 223500 \n", + "3 2 2006 WD Abnorml 140000 \n", + "4 12 2008 WD Normal 250000 \n", + "... ... ... ... ... ... \n", + "1455 8 2007 WD Normal 175000 \n", + "1456 2 2010 WD Normal 210000 \n", + "1457 5 2010 WD Normal 266500 \n", + "1458 4 2010 WD Normal 142125 \n", + "1459 6 2008 WD Normal 147500 \n", + "\n", + "[1460 rows x 81 columns]" + ] + }, + "execution_count": 416, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_trains = pd.read_csv('house-prices-advanced-regression-techniques/train.csv')\n", + "df_trains" + ] + }, + { + "cell_type": "code", + "execution_count": 417, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " count mean std min 25% \\\n", + "Id 1460.0 730.500000 421.610009 1.0 365.75 \n", + "MSSubClass 1460.0 56.897260 42.300571 20.0 20.00 \n", + "LotFrontage 1201.0 70.049958 24.284752 21.0 59.00 \n", + "LotArea 1460.0 10516.828082 9981.264932 1300.0 7553.50 \n", + "OverallQual 1460.0 6.099315 1.382997 1.0 5.00 \n", + "OverallCond 1460.0 5.575342 1.112799 1.0 5.00 \n", + "YearBuilt 1460.0 1971.267808 30.202904 1872.0 1954.00 \n", + "YearRemodAdd 1460.0 1984.865753 20.645407 1950.0 1967.00 \n", + "MasVnrArea 1452.0 103.685262 181.066207 0.0 0.00 \n", + "BsmtFinSF1 1460.0 443.639726 456.098091 0.0 0.00 \n", + "BsmtFinSF2 1460.0 46.549315 161.319273 0.0 0.00 \n", + "BsmtUnfSF 1460.0 567.240411 441.866955 0.0 223.00 \n", + "TotalBsmtSF 1460.0 1057.429452 438.705324 0.0 795.75 \n", + "1stFlrSF 1460.0 1162.626712 386.587738 334.0 882.00 \n", + "2ndFlrSF 1460.0 346.992466 436.528436 0.0 0.00 \n", + "LowQualFinSF 1460.0 5.844521 48.623081 0.0 0.00 \n", + "GrLivArea 1460.0 1515.463699 525.480383 334.0 1129.50 \n", + "BsmtFullBath 1460.0 0.425342 0.518911 0.0 0.00 \n", + "BsmtHalfBath 1460.0 0.057534 0.238753 0.0 0.00 \n", + "FullBath 1460.0 1.565068 0.550916 0.0 1.00 \n", + "HalfBath 1460.0 0.382877 0.502885 0.0 0.00 \n", + "BedroomAbvGr 1460.0 2.866438 0.815778 0.0 2.00 \n", + "KitchenAbvGr 1460.0 1.046575 0.220338 0.0 1.00 \n", + "TotRmsAbvGrd 1460.0 6.517808 1.625393 2.0 5.00 \n", + "Fireplaces 1460.0 0.613014 0.644666 0.0 0.00 \n", + "GarageYrBlt 1379.0 1978.506164 24.689725 1900.0 1961.00 \n", + "GarageCars 1460.0 1.767123 0.747315 0.0 1.00 \n", + "GarageArea 1460.0 472.980137 213.804841 0.0 334.50 \n", + "WoodDeckSF 1460.0 94.244521 125.338794 0.0 0.00 \n", + "OpenPorchSF 1460.0 46.660274 66.256028 0.0 0.00 \n", + "EnclosedPorch 1460.0 21.954110 61.119149 0.0 0.00 \n", + "3SsnPorch 1460.0 3.409589 29.317331 0.0 0.00 \n", + "ScreenPorch 1460.0 15.060959 55.757415 0.0 0.00 \n", + "PoolArea 1460.0 2.758904 40.177307 0.0 0.00 \n", + "MiscVal 1460.0 43.489041 496.123024 0.0 0.00 \n", + "MoSold 1460.0 6.321918 2.703626 1.0 5.00 \n", + "YrSold 1460.0 2007.815753 1.328095 2006.0 2007.00 \n", + "SalePrice 1460.0 180921.195890 79442.502883 34900.0 129975.00 \n", + "\n", + " 50% 75% max \n", + "Id 730.5 1095.25 1460.0 \n", + "MSSubClass 50.0 70.00 190.0 \n", + "LotFrontage 69.0 80.00 313.0 \n", + "LotArea 9478.5 11601.50 215245.0 \n", + "OverallQual 6.0 7.00 10.0 \n", + "OverallCond 5.0 6.00 9.0 \n", + "YearBuilt 1973.0 2000.00 2010.0 \n", + "YearRemodAdd 1994.0 2004.00 2010.0 \n", + "MasVnrArea 0.0 166.00 1600.0 \n", + "BsmtFinSF1 383.5 712.25 5644.0 \n", + "BsmtFinSF2 0.0 0.00 1474.0 \n", + "BsmtUnfSF 477.5 808.00 2336.0 \n", + "TotalBsmtSF 991.5 1298.25 6110.0 \n", + "1stFlrSF 1087.0 1391.25 4692.0 \n", + "2ndFlrSF 0.0 728.00 2065.0 \n", + "LowQualFinSF 0.0 0.00 572.0 \n", + "GrLivArea 1464.0 1776.75 5642.0 \n", + "BsmtFullBath 0.0 1.00 3.0 \n", + "BsmtHalfBath 0.0 0.00 2.0 \n", + "FullBath 2.0 2.00 3.0 \n", + "HalfBath 0.0 1.00 2.0 \n", + "BedroomAbvGr 3.0 3.00 8.0 \n", + "KitchenAbvGr 1.0 1.00 3.0 \n", + "TotRmsAbvGrd 6.0 7.00 14.0 \n", + "Fireplaces 1.0 1.00 3.0 \n", + "GarageYrBlt 1980.0 2002.00 2010.0 \n", + "GarageCars 2.0 2.00 4.0 \n", + "GarageArea 480.0 576.00 1418.0 \n", + "WoodDeckSF 0.0 168.00 857.0 \n", + "OpenPorchSF 25.0 68.00 547.0 \n", + "EnclosedPorch 0.0 0.00 552.0 \n", + "3SsnPorch 0.0 0.00 508.0 \n", + "ScreenPorch 0.0 0.00 480.0 \n", + "PoolArea 0.0 0.00 738.0 \n", + "MiscVal 0.0 0.00 15500.0 \n", + "MoSold 6.0 8.00 12.0 \n", + "YrSold 2008.0 2009.00 2010.0 \n", + "SalePrice 163000.0 214000.00 755000.0 \n", + "\n", + "RangeIndex: 1460 entries, 0 to 1459\n", + "Data columns (total 81 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Id 1460 non-null int64 \n", + " 1 MSSubClass 1460 non-null int64 \n", + " 2 MSZoning 1460 non-null object \n", + " 3 LotFrontage 1201 non-null float64\n", + " 4 LotArea 1460 non-null int64 \n", + " 5 Street 1460 non-null object \n", + " 6 Alley 91 non-null object \n", + " 7 LotShape 1460 non-null object \n", + " 8 LandContour 1460 non-null object \n", + " 9 Utilities 1460 non-null object \n", + " 10 LotConfig 1460 non-null object \n", + " 11 LandSlope 1460 non-null object \n", + " 12 Neighborhood 1460 non-null object \n", + " 13 Condition1 1460 non-null object \n", + " 14 Condition2 1460 non-null object \n", + " 15 BldgType 1460 non-null object \n", + " 16 HouseStyle 1460 non-null object \n", + " 17 OverallQual 1460 non-null int64 \n", + " 18 OverallCond 1460 non-null int64 \n", + " 19 YearBuilt 1460 non-null int64 \n", + " 20 YearRemodAdd 1460 non-null int64 \n", + " 21 RoofStyle 1460 non-null object \n", + " 22 RoofMatl 1460 non-null object \n", + " 23 Exterior1st 1460 non-null object \n", + " 24 Exterior2nd 1460 non-null object \n", + " 25 MasVnrType 1452 non-null object \n", + " 26 MasVnrArea 1452 non-null float64\n", + " 27 ExterQual 1460 non-null object \n", + " 28 ExterCond 1460 non-null object \n", + " 29 Foundation 1460 non-null object \n", + " 30 BsmtQual 1423 non-null object \n", + " 31 BsmtCond 1423 non-null object \n", + " 32 BsmtExposure 1422 non-null object \n", + " 33 BsmtFinType1 1423 non-null object \n", + " 34 BsmtFinSF1 1460 non-null int64 \n", + " 35 BsmtFinType2 1422 non-null object \n", + " 36 BsmtFinSF2 1460 non-null int64 \n", + " 37 BsmtUnfSF 1460 non-null int64 \n", + " 38 TotalBsmtSF 1460 non-null int64 \n", + " 39 Heating 1460 non-null object \n", + " 40 HeatingQC 1460 non-null object \n", + " 41 CentralAir 1460 non-null object \n", + " 42 Electrical 1459 non-null object \n", + " 43 1stFlrSF 1460 non-null int64 \n", + " 44 2ndFlrSF 1460 non-null int64 \n", + " 45 LowQualFinSF 1460 non-null int64 \n", + " 46 GrLivArea 1460 non-null int64 \n", + " 47 BsmtFullBath 1460 non-null int64 \n", + " 48 BsmtHalfBath 1460 non-null int64 \n", + " 49 FullBath 1460 non-null int64 \n", + " 50 HalfBath 1460 non-null int64 \n", + " 51 BedroomAbvGr 1460 non-null int64 \n", + " 52 KitchenAbvGr 1460 non-null int64 \n", + " 53 KitchenQual 1460 non-null object \n", + " 54 TotRmsAbvGrd 1460 non-null int64 \n", + " 55 Functional 1460 non-null object \n", + " 56 Fireplaces 1460 non-null int64 \n", + " 57 FireplaceQu 770 non-null object \n", + " 58 GarageType 1379 non-null object \n", + " 59 GarageYrBlt 1379 non-null float64\n", + " 60 GarageFinish 1379 non-null object \n", + " 61 GarageCars 1460 non-null int64 \n", + " 62 GarageArea 1460 non-null int64 \n", + " 63 GarageQual 1379 non-null object \n", + " 64 GarageCond 1379 non-null object \n", + " 65 PavedDrive 1460 non-null object \n", + " 66 WoodDeckSF 1460 non-null int64 \n", + " 67 OpenPorchSF 1460 non-null int64 \n", + " 68 EnclosedPorch 1460 non-null int64 \n", + " 69 3SsnPorch 1460 non-null int64 \n", + " 70 ScreenPorch 1460 non-null int64 \n", + " 71 PoolArea 1460 non-null int64 \n", + " 72 PoolQC 7 non-null object \n", + " 73 Fence 281 non-null object \n", + " 74 MiscFeature 54 non-null object \n", + " 75 MiscVal 1460 non-null int64 \n", + " 76 MoSold 1460 non-null int64 \n", + " 77 YrSold 1460 non-null int64 \n", + " 78 SaleType 1460 non-null object \n", + " 79 SaleCondition 1460 non-null object \n", + " 80 SalePrice 1460 non-null int64 \n", + "dtypes: float64(3), int64(35), object(43)\n", + "memory usage: 924.0+ KB\n", + "None\n" + ] + } + ], + "source": [ + "# checking data measures and info\n", + "print(df_trains.describe().T)\n", + "print(df_trains.info() ) " + ] + }, + { + "cell_type": "code", + "execution_count": 418, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MSSubClassMSZoningLotFrontageLotAreaStreetAlleyLotShapeLandContourUtilitiesLotConfig...PoolAreaPoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionSalePrice
060RL65.08450PaveNaNRegLvlAllPubInside...0NaNNaNNaN022008WDNormal208500
120RL80.09600PaveNaNRegLvlAllPubFR2...0NaNNaNNaN052007WDNormal181500
260RL68.011250PaveNaNIR1LvlAllPubInside...0NaNNaNNaN092008WDNormal223500
370RL60.09550PaveNaNIR1LvlAllPubCorner...0NaNNaNNaN022006WDAbnorml140000
460RL84.014260PaveNaNIR1LvlAllPubFR2...0NaNNaNNaN0122008WDNormal250000
..................................................................
145560RL62.07917PaveNaNRegLvlAllPubInside...0NaNNaNNaN082007WDNormal175000
145620RL85.013175PaveNaNRegLvlAllPubInside...0NaNMnPrvNaN022010WDNormal210000
145770RL66.09042PaveNaNRegLvlAllPubInside...0NaNGdPrvShed250052010WDNormal266500
145820RL68.09717PaveNaNRegLvlAllPubInside...0NaNNaNNaN042010WDNormal142125
145920RL75.09937PaveNaNRegLvlAllPubInside...0NaNNaNNaN062008WDNormal147500
\n", + "

1460 rows × 80 columns

\n", + "
" + ], + "text/plain": [ + " MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n", + "0 60 RL 65.0 8450 Pave NaN Reg \n", + "1 20 RL 80.0 9600 Pave NaN Reg \n", + "2 60 RL 68.0 11250 Pave NaN IR1 \n", + "3 70 RL 60.0 9550 Pave NaN IR1 \n", + "4 60 RL 84.0 14260 Pave NaN IR1 \n", + "... ... ... ... ... ... ... ... \n", + "1455 60 RL 62.0 7917 Pave NaN Reg \n", + "1456 20 RL 85.0 13175 Pave NaN Reg \n", + "1457 70 RL 66.0 9042 Pave NaN Reg \n", + "1458 20 RL 68.0 9717 Pave NaN Reg \n", + "1459 20 RL 75.0 9937 Pave NaN Reg \n", + "\n", + " LandContour Utilities LotConfig ... PoolArea PoolQC Fence MiscFeature \\\n", + "0 Lvl AllPub Inside ... 0 NaN NaN NaN \n", + "1 Lvl AllPub FR2 ... 0 NaN NaN NaN \n", + "2 Lvl AllPub Inside ... 0 NaN NaN NaN \n", + "3 Lvl AllPub Corner ... 0 NaN NaN NaN \n", + "4 Lvl AllPub FR2 ... 0 NaN NaN NaN \n", + "... ... ... ... ... ... ... ... ... \n", + "1455 Lvl AllPub Inside ... 0 NaN NaN NaN \n", + "1456 Lvl AllPub Inside ... 0 NaN MnPrv NaN \n", + "1457 Lvl AllPub Inside ... 0 NaN GdPrv Shed \n", + "1458 Lvl AllPub Inside ... 0 NaN NaN NaN \n", + "1459 Lvl AllPub Inside ... 0 NaN NaN NaN \n", + "\n", + " MiscVal MoSold YrSold SaleType SaleCondition SalePrice \n", + "0 0 2 2008 WD Normal 208500 \n", + "1 0 5 2007 WD Normal 181500 \n", + "2 0 9 2008 WD Normal 223500 \n", + "3 0 2 2006 WD Abnorml 140000 \n", + "4 0 12 2008 WD Normal 250000 \n", + "... ... ... ... ... ... ... \n", + "1455 0 8 2007 WD Normal 175000 \n", + "1456 0 2 2010 WD Normal 210000 \n", + "1457 2500 5 2010 WD Normal 266500 \n", + "1458 0 4 2010 WD Normal 142125 \n", + "1459 0 6 2008 WD Normal 147500 \n", + "\n", + "[1460 rows x 80 columns]" + ] + }, + "execution_count": 418, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Removing id Column since there is no relevant information there\n", + "df_trains.drop('Id', axis=1, inplace=True)\n", + "df_trains" + ] + }, + { + "cell_type": "code", + "execution_count": 419, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LotFrontage - 259\n", + "Alley - 1369\n", + "MasVnrType - 8\n", + "MasVnrArea - 8\n", + "BsmtQual - 37\n", + "BsmtCond - 37\n", + "BsmtExposure - 38\n", + "BsmtFinType1 - 37\n", + "BsmtFinType2 - 38\n", + "Electrical - 1\n", + "FireplaceQu - 690\n", + "GarageType - 81\n", + "GarageYrBlt - 81\n", + "GarageFinish - 81\n", + "GarageQual - 81\n", + "GarageCond - 81\n", + "PoolQC - 1453\n", + "Fence - 1179\n", + "MiscFeature - 1406\n" + ] + } + ], + "source": [ + "# since no all information had been displayed, a piece of code to show null values was created\n", + "for i in df_trains.columns:\n", + " if df_trains[i].isnull().sum() > 0:\n", + " print(i, ' - ', df_trains[i].isnull().sum())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Creating dictionaries and setting numeric values" + ] + }, + { + "cell_type": "code", + "execution_count": 420, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + " ..\n", + "1455 0\n", + "1456 0\n", + "1457 0\n", + "1458 0\n", + "1459 0\n", + "Name: PoolQC, Length: 1460, dtype: int32" + ] + }, + "execution_count": 420, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#LotShape\n", + "dict_lotShape = {3:'Reg', 2:'IR1', 1:'IR2', 0:'IR3'}\n", + "df_trains.LotShape = df_trains.LotShape.str.replace('Reg','3').str.replace('IR1','2').str.replace('IR2','1').str.replace('IR3','0')\n", + "df_trains.LotShape.astype('int')\n", + "#ExterQual\n", + "dict_ExterQual = {4:'Ex', 3:'Gd', 2:'TA', 1:'Fa', 0:'Po'}\n", + "df_trains.ExterQual = df_trains.ExterQual.str.replace('Ex','4').str.replace('Gd','3').str.replace('TA','2').str.replace('Fa','1').str.replace('Po','0')\n", + "df_trains.ExterQual.astype('int')\n", + "#ExterCond\n", + "dict_ExterCond = {4:'Ex', 3:'Gd', 2:'TA', 1:'Fa', 0:'Po'}\n", + "df_trains.ExterCond = df_trains.ExterCond.str.replace('Ex','4').str.replace('Gd','3').str.replace('TA','2').str.replace('Fa','1').str.replace('Po','0')\n", + "df_trains.ExterCond.astype('int')\n", + "#BsmtQual\n", + "dict_BsmtQual = {5:'Ex', 4:'Gd', 3:'TA', 2:'Fa', 1:'Po', 0:'NA'}\n", + "df_trains.BsmtQual = df_trains.BsmtQual.str.replace('Ex','5').str.replace('Gd','4').str.replace('TA','3').str.replace('Fa','2').str.replace('Po','1').str.replace('NA','0')\n", + "df_trains.BsmtQual = df_trains.BsmtQual.fillna('0')\n", + "df_trains.BsmtQual.astype('int')\n", + "#BsmtCond\n", + "dict_BsmtCond = {5:'Ex', 4:'Gd', 3:'TA', 2:'Fa', 1:'Po', 0:'NA'}\n", + "df_trains.BsmtCond = df_trains.BsmtCond.str.replace('Ex','5').str.replace('Gd','4').str.replace('TA','3').str.replace('Fa','2').str.replace('Po','1').str.replace('NA','0')\n", + "df_trains.BsmtCond = df_trains.BsmtCond.fillna('0')\n", + "df_trains.BsmtCond.astype('int')\n", + "#BsmtExposure\n", + "dict_BsmtExposure = {4:'Gd', 3:'Av', 2:'Mn', 1:'No', 0:'NA'}\n", + "df_trains.BsmtExposure = df_trains.BsmtExposure.str.replace('Gd','4').str.replace('Av','3').str.replace('Mn','2').str.replace('No','1').str.replace('NA','0')\n", + "df_trains.BsmtExposure = df_trains.BsmtExposure.fillna('0')\n", + "df_trains.BsmtExposure.astype('int')\n", + "#HeatingQC\n", + "dict_HeatingQC = {4:'Ex', 3:'Gd', 2:'TA', 1:'Fa', 0:'Po'}\n", + "df_trains.HeatingQC = df_trains.HeatingQC.str.replace('Ex','4').str.replace('Gd','3').str.replace('TA','2').str.replace('Fa','1').str.replace('Po','0')\n", + "df_trains.HeatingQC.astype('int')\n", + "#CentralAir\n", + "dict_CentralAir = {1:'Y', 0:'N'}\n", + "df_trains.CentralAir = df_trains.CentralAir.str.replace('Y','1').str.replace('N','0')\n", + "df_trains.CentralAir.astype('int')\n", + "#KitchenQual\n", + "dict_KitchenQual = {4:'Ex', 3:'Gd', 2:'TA', 1:'Fa', 0:'Po'}\n", + "df_trains.KitchenQual = df_trains.KitchenQual.str.replace('Ex','4').str.replace('Gd','3').str.replace('TA','2').str.replace('Fa','1').str.replace('Po','0')\n", + "df_trains.KitchenQual.astype('int')\n", + "#FireplaceQu\n", + "dict_FireplaceQu = {5:'Ex', 4:'Gd', 3:'TA', 2:'Fa', 1:'Po', 0:'NA'}\n", + "df_trains.FireplaceQu = df_trains.FireplaceQu.str.replace('Ex','5').str.replace('Gd','4').str.replace('TA','3').str.replace('Fa','2').str.replace('Po','1').str.replace('NA','0')\n", + "df_trains.FireplaceQu = df_trains.FireplaceQu.fillna('0')\n", + "df_trains.FireplaceQu.astype('int')\n", + "#GarageFinish\n", + "dict_GarageFinish = {3:'Fin', 2:'RFn', 1:'Unf', 0:'NA'}\n", + "df_trains.GarageFinish = df_trains.GarageFinish.str.replace('Fin','3').str.replace('RFn','2').str.replace('Unf','1').str.replace('NA','0')\n", + "df_trains.GarageFinish = df_trains.GarageFinish.fillna('0')\n", + "df_trains.GarageFinish.astype('int')\n", + "#GarageQual\n", + "dict_GarageQual = {5:'Ex', 4:'Gd', 3:'TA', 2:'Fa', 1:'Po', 0:'NA'}\n", + "df_trains.GarageQual = df_trains.GarageQual.str.replace('Ex','5').str.replace('Gd','4').str.replace('TA','3').str.replace('Fa','2').str.replace('Po','1').str.replace('NA','0')\n", + "df_trains.GarageQual = df_trains.GarageQual.fillna('0')\n", + "df_trains.GarageQual.astype('int')\n", + "#GarageCond\n", + "dict_GarageCond = {5:'Ex', 4:'Gd', 3:'TA', 2:'Fa', 1:'Po', 0:'NA'}\n", + "df_trains.GarageCond = df_trains.GarageCond.str.replace('Ex','5').str.replace('Gd','4').str.replace('TA','3').str.replace('Fa','2').str.replace('Po','1').str.replace('NA','0')\n", + "df_trains.GarageCond = df_trains.GarageCond.fillna('0')\n", + "df_trains.GarageCond.astype('int')\n", + "#PoolQC\n", + "dict_PoolQC = {4:'Ex', 3:'Gd', 2:'TA', 1:'Fa', 0:'NA'}\n", + "df_trains.PoolQC = df_trains.PoolQC.str.replace('Ex','4').str.replace('Gd','3').str.replace('TA','2').str.replace('Fa','1').str.replace('NA','0')\n", + "df_trains.PoolQC = df_trains.PoolQC.fillna('0')\n", + "df_trains.PoolQC.astype('int')" + ] + }, + { + "cell_type": "code", + "execution_count": 421, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LotFrontage - 259\n", + "Alley - 1369\n", + "MasVnrType - 8\n", + "MasVnrArea - 8\n", + "BsmtFinType1 - 37\n", + "BsmtFinType2 - 38\n", + "Electrical - 1\n", + "GarageType - 81\n", + "GarageYrBlt - 81\n", + "Fence - 1179\n", + "MiscFeature - 1406\n" + ] + } + ], + "source": [ + "for i in df_trains.columns:\n", + " if df_trains[i].isnull().sum() > 0:\n", + " print(i, ' - ', df_trains[i].isnull().sum())\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Checking this information we can do following assumptions:\n", + "Alley:\\\n", + " It is not relevant since there are more recors with no alley access to the ones that have, so this information could be converted into outliers \\\n", + " Grvl\tGravel \\\n", + " Pave\tPaved \\\n", + " NA \tNo alley access\n", + "\n", + "Records with MasVnrType, MasVnrArea has 8 nulls in there, so those records can be deleted\n", + "\n", + "Electrical: \\\n", + " it is not relevant, there is just 1 record null, so this record can be deleted\n", + "\n", + "GarageType: \\\n", + " NA values could be set as NA instead of nan\n", + "\n", + "GarageYrBlt: \\\n", + " since there are 81 null values, year build values will be considered to fill the missing ones.\n", + "\n", + "Fence: \\\n", + "cannot assign numeric value in there since values are not as logical as thought, so this column should be deleted\n", + " \n", + "MiscFeature: \\\n", + "since there are several records with null value, has been decided to remove this column because it can generate outliers. " + ] + }, + { + "cell_type": "code", + "execution_count": 422, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\David\\AppData\\Local\\Temp/ipykernel_7488/2391990203.py:17: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df_trains['GarageYrBlt'][i]= df_trains['YearBuilt'] [i]\n" + ] + } + ], + "source": [ + "#removing columns\n", + "df_trains.drop(['Alley','MiscFeature','Fence','LotFrontage'], axis=1, inplace=True)\n", + "\n", + "#Removing rows\n", + "\n", + "#Electrical\n", + "df_trains.drop(df_trains[df_trains['Electrical'].isnull()].index, inplace=True)\n", + "# There is a relation between MasVnrType and MasVnrArea, so if one of them is removed the other one too\n", + "df_trains.drop(df_trains[df_trains['MasVnrType'].isnull()].index, inplace=True)\n", + "#there is a raltion between BsmtinType1 and BsmtinType2 however, there was just one more value set as null in BsmtFinType2 so this column has been considered to be removed.\n", + "df_trains.drop(df_trains[df_trains['BsmtFinType2'].isnull()].index, inplace=True)\n", + "#adding values in GarageYrBlt and GarageType\n", + "df_trains['GarageType'] = df_trains['GarageType'].fillna('NA')\n", + "#df_trains.reset_index(inplace=True)\n", + "df_trains[df_trains['GarageYrBlt'].isnull()]['GarageYrBlt'].index\n", + "for i in df_trains[df_trains['GarageYrBlt'].isnull()]['GarageYrBlt'].index:\n", + " df_trains['GarageYrBlt'][i]= df_trains['YearBuilt'] [i]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 423, + "metadata": {}, + "outputs": [], + "source": [ + "for i in df_trains.columns:\n", + " if df_trains[i].isnull().sum() > 0:\n", + " print(i, ' - ', df_trains[i].isnull().sum())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are some column names that starts with number, so a character will be added at the beginning in order to complete every analysis needed" + ] + }, + { + "cell_type": "code", + "execution_count": 424, + "metadata": {}, + "outputs": [], + "source": [ + "for i in df_trains.columns:\n", + " if i[0].isdigit():\n", + " df_trains.rename(columns={i:'_'+i},inplace=True)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Have checked if there are no more columns with nulls in there, there were not.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, will check which distribution would fit it" + ] + }, + { + "cell_type": "code", + "execution_count": 425, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 425, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA3sAAAFzCAYAAACHARCnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAABJY0lEQVR4nO3dd5hcZd3/8c93Zna2l2xJdpMtqaSQQAIhoSugoYoNFESxoOij/ixYHtHH9qiP2FCxoFixodIRQULvpAAhCUk2vewmW5NsbzNz//7Y2bAJm2ST7OyZ8n5d17nmzH3OzHx2zmay37nvc25zzgkAAAAAkFx8XgcAAAAAAIw8ij0AAAAASEIUewAAAACQhCj2AAAAACAJUewBAAAAQBKi2AMAAACAJBTwOsCxKC4udhMnTvQ6BgAAAAB44sUXX2xyzpUMtS2hi72JEydq+fLlXscAAAAAAE+Y2baDbWMYJwAAAAAkIYo9AAAAAEhCFHsAAAAAkIQo9gAAAAAgCVHsAQAAAEASotgDAAAAgCREsQcAAAAASYhiDwAAAACSEMUeAAAAACQhij0AAAAASEIxK/bM7Pdm1mBmqwe1/cPMVkSXrWa2Ito+0cy6Bm37VaxyAQAAAEAqCMTwuf8o6eeS/jTQ4Jx798C6mf1IUsug/Tc55+bGMA8AAAAApIyYFXvOuafMbOJQ28zMJL1L0rmxen0AAAAASGVenbN3lqR659yGQW2TzOxlM3vSzM462APN7FozW25myxsbG2OfFAmjorJKZnZUS0VlldfxAQAAgBEVy2Gch3KlpNsG3d8lqdI512xmJ0u6x8yOd861HvhA59wtkm6RpPnz57tRSYuEULNju25cXH1Uj71u0fQRTgMAAAB4a9R79swsIOkdkv4x0Oac63HONUfXX5S0SdJxo50NAAAAAJKFF8M43yRpnXOuZqDBzErMzB9dnyxpmqTNHmQDAAAAgKQQy6kXbpP0vKTpZlZjZtdEN12h/YdwStLZklZGp2K4Q9LHnHO7Y5UNAAAAAJJdLK/GeeVB2j8wRNudku6MVRYAAAAASDVeXY0TAAAAABBDFHsAAAAAkIQo9gAAAAAgCVHsAQAAAEASotgDAAAAgCREsQcAAAAASYhiDwAAAACSEMUeIEnmk5kd9VJRWeX1TwAAAADsJ2aTqgMJxUV04+Lqo374dYumj2AYAAAA4NjRswcAAAAASYhiDwAAAACSEMUeAAAAACQhij0AAAAASEIUewAAAACQhCj2AAAAACAJUewBAAAAQBKi2AMAAACAJESxBwAAAABJiGIPAAAAAJIQxR4AAAAAJCGKPQAAAABIQhR7AAAAAJCEKPYAAAAAIAlR7AEAAABAEqLYAwAAAIAkRLEHAAAAAEmIYg8AAAAAkhDFHgAAAAAkIYo9AAAAAEhCFHsAAAAAkIQo9gAAAAAgCVHsAQAAAEASotgDAAAAgCREsQcAAAAASYhiDwAAAACSUMyKPTP7vZk1mNnqQW3fMLNaM1sRXS4atO16M9toZtVmdn6scgEAAABAKohlz94fJV0wRPuPnXNzo8sDkmRmsyRdIen46GN+aWb+GGYDAAAAgKQWiNUTO+eeMrOJw9z9rZL+7pzrkbTFzDZKWiDp+VjlAw4Ujjg1tfeouy+snlBEPX0R5WQENLEoS2bmdTwAAADgiMSs2DuET5rZ1ZKWS/qcc26PpAmSXhi0T020DRgVaUUV+sfyHWps63ndtqLsoE6ZWKhp43Lko+gDAABAghjtYu9mSd+S5KK3P5L0oSN5AjO7VtK1klRZWTnS+ZBinHN6paZFpe//idq7Q3rTzLEqyAoqI+BTMOBT7d4uLduyR/95tU5LtqTp7ONKNLEo2+vYAAAAwGGN6tU4nXP1zrmwcy4i6TfqH6opSbWSKgbtWh5tG+o5bnHOzXfOzS8pKYltYCS17r6w7lmxU0+ub1T3tld01cJKHT8+XxMKMlWUk67cjDTNKM3Te0+t1EVzSmVmuu+VnVq7q9Xr6AAAAMBhjWqxZ2Zlg+6+XdLAlTrvk3SFmaWb2SRJ0yQtHc1sSC3OOT2ytl41ezp1zvQSNd75v8pOH7qj28w0bWyu3j2/QuUFmVq8pl4rduwd3cAAAADAEYrZME4zu03SGyUVm1mNpK9LeqOZzVX/MM6tkj4qSc65V83sn5LWSApJ+oRzLhyrbMCq2hZtauzQWdOKdUJ5wbAeEwz4dOnc8frP6jo9ub5RPX1hLZhUyMVbAAAAEJdieTXOK4do/t0h9v+OpO/EKg8woKm9R09taFJVYZbmVRQc0WMDPp8uml2mR9bV64Utu2VmWjCpMDZBAQAAgGPgxdU4Ac+EwhH9Z3Wdgn6f3jxr3FH1yvl8pjfPHCfnpOc3N6skNz0GSQEAAIBjM6rn7AFee3pjk5o7enX+8eMOeo7ecJiZzp0xViU56Xro1ToFCsoO/yAAAABgFFHsIWXUtXRrZU2L5lUWqGoEpk9I8/t08QllMkklb/+yOntDxx4SAAAAGCEUe0gZS7fuVkbAp1MnFY3Yc+ZnpumC2aVKK6nSl+5cJefciD03AAAAcCwo9pASGtt6tKWpQ/MqxygYGNlf+6qibO196s+675Wd+vuyHSP63AAAAMDRothDSli2dbeCfp9OLM+PyfO3vnCHTp9SpO/8e61q93bF5DUAAACAI0Gxh6S3u6NXGxradWJFvtLT/DF6FafvvfMERZzTl+5cyXBOAAAAeI5iD0lv2dbdCvhMc49wTr0jVVGYpesvmqmnNzQd0XDOisoqmdlRLxWVVTH8qQAAAJComGcPSW1vZ6+q69s0t6JAWcHY/7pftaBSD67ape/8e63OPq5EEwoyD/uYmh3bdePi6qN+zesWTT/qxwIAACB50bOHpPbitj3ymemkyjGj8no+nzGcEwAAAHGBYg9Jq7svrLW72jSrLE85xzCB+pEaPJzznhW1o/a6AAAAwGAUe0ha6+vbFHZOsyfkjfprX7WgUidWFOg7/16nlq6+UX99AAAAgGIPSWvtrjYV5QRVkpM+6q/t85m+/dbZau7o0Y8fXj/qrw8AAABQ7CEp7enoVV1rt2aW5snMPMkwpzxf711YpT89v1Wra1s8yQAAAIDURbGHpLS2rlUmaUZprqc5Pr9ousZkBfXVe1crEuFiLQAAABg9FHtIOs45rd3VpsqiLGWP4oVZhpKflabrL5qpl7fv1e0vDn/uPQAAAOBYUewh6dTs6VJ7T0gzS0f/wixDeedJE3TKxDG64UEu1gIAAIDRQ7GHpLO2rlVBv09TSrK9jiJJMjN949LjtberTz97dIPXcQAAAJAiKPaQVPrCEW1saNe0cTkK+OPn1/v48fl69/wK/fG5rdrc2O51HAAAAKSA+PlrGBgBGxva1Rd2cTOEc7DPLZqujDS//u+BdV5HAQAAQAqg2ENSWVfXpryMgMYXZHgd5XVKctP1iXOm6pG19XpmQ5PXcQAAAJDkKPaQNCw9WzV7OjVtXK5nc+sdzgfPmKjyMZn61v1rFApHvI4DAACAJEaxh6SROflkRZzi5sIsQ8lI8+vLF81UdX2b/r6MqRgAAAAQOxR7SBpZUxcqM82vcXnxN4RzsAtnl2rBxEL9+OH1au8JeR0HAAAASYpiD0mhNxRR5uSTNak4W744HcI5wMx0/UUz1NzRq1ue2ux1HAAAACQpij0khWVbd8uXkRPXQzgHm1c5RhfPKdNvn94sX3aB13EAAACQhCj2kBQeXlOvSF+PKgqzvI4ybF84f7p6QxEVnPEer6MAAAAgCVHsIeE55/Twmnp1b12htDiaSP1wJhZn6z0LK5Vz4vna09HrdRwAAAAkmcT5yxg4iHV1bard26WujS94HeWIfeq8aXJ9PXp2E/PuAQAAYGRR7CHhPbymXmZS58ZlXkc5YsU56Wpdepc2NXZoV0uX13EAAACQRCj2kPAeWVuvuRUFinTu9TrKUWlddo+ygn49u7FZzjmv4wAAACBJUOwhodW1dGtlTYveNHOc11GOmuvr1oKJhard26Xtuzu9jgMAAIAkQbGHhPbounpJ0qJZHhd75pOZHdUiScdPyFNuRkDPb6Z3DwAAACMj4HUA4Fg8vq5BFYWZmjo2x9sgLqIbF1cf1UOvWzRdAZ9PCycV6pG1Ddrc1KEpJR7/PAAAAEh49OwhYfWFI3ph826dPa1kXw9ZIptZmqeCzDR69wAAADAiYlbsmdnvzazBzFYPavuBma0zs5VmdreZFUTbJ5pZl5mtiC6/ilUuJI8VO/aqvSeks6YVex1lRPh8plMnF6m5vVfr69u9jgMAAIAEF8uevT9KuuCAtoclzXbOnSBpvaTrB23b5JybG10+FsNcSBJPb2iSz6TTpiRHsSdJx43LUVFOUC9sblYkQu8eAAAAjl7Mij3n3FOSdh/Qttg5F4refUFSeaxeH8nvmQ2NOqG8QPmZaV5HGTFmptMmF2lvV5/W1rV6HQcAAAAJzMtz9j4k6cFB9yeZ2ctm9qSZneVVKCSG1u4+vVLTkjRDOAebXJytsbnpWrZ1D717AAAAOGqeFHtm9hVJIUl/jTbtklTpnJsn6TpJfzOzvIM89lozW25myxsbG0cnMOLO85uaFY44nTk1+Yo9M9PCSYVq6epTdX2b13EAAACQoEa92DOzD0i6RNJVLnrJQedcj3OuObr+oqRNko4b6vHOuVucc/Odc/NLSkpGKTXizTMbmpQV9Gte5Rivo8TEpOJsFecEtXTLbnr3AAAAcFRGtdgzswskfVHSpc65zkHtJWbmj65PljRN0ubRzIbE8szGJi2cVKhgIDlnD+nv3es/d299A717AAAAOHKxnHrhNknPS5puZjVmdo2kn0vKlfTwAVMsnC1ppZmtkHSHpI8553YP9bxAzZ5ObWnq0JnTkrtnd0pJtooGeveYdw8AAABHKBCrJ3bOXTlE8+8Osu+dku6MVRYkl2c2NElSUl6cZTAz08KJhXpgdZ02NrTruHG5XkcCAABAAknOMXBIak9vbNK4vHRNG5vjdZSYmzo2R4XZ/b17jt49AAAAHAGKPSSUSMTpuY1NOmNqsczM6zgxZ2ZaMLFQzR292tTY4XUcAAAAJBCKPSSUV3e2ak9nX9IP4Rxs2tgc5Wemafk2evcAAAAwfBR7SCjPbuo/X++MJJxf72B8PtNJlQWqb+1R7d4ur+MAAAAgQVDsIaEs3bJbU0qyNTY3w+soo2pWWZ6ygn4t37rH6ygAAABIEBR7SBjhiNOyrbu1YFKh11FGXcDv09yKAm3b3anGth6v4wAAACABUOwhYVTXtamtO5SSxZ4knTAhX0G/T8u3MQUlAAAADo9iD3GjorJKZnbQ5dS3XCVJuvwNJw25Pdmlp/k1Z0K+NtS3q6Wrz+s4AAAAiHMxm1QdOFI1O7brxsXVB93+71W7VN/arR/c+cyQ269bND1W0eLG3MoCrdixVy9u26NzZ4z1Og4AAADiGD17SAjOOdXu6dKEgkyvo3gqJz2gGWW5WrOrVZ29Ia/jAAAAII5R7CEh7O3sU1dfOOWLPUk6qXKMwhGnVTUtXkcBAABAHKPYQ0IYmF+OYk8qzA6qqihLK2tbFIpEvI4DAACAOEWxh4RQu7dLmWl+FWSleR0lLsyrKFBnb1jr69q9jgIAAIA4RbGHhFC7t0sTxmSmxFU3h6OyMEtF2UG9vINJ1gEAADA0ij3EvdauPrV1hxjCOYiZaV5lgZrae5VReYLXcQAAABCHKPYQ93Zyvt6Qpo/LVWaaX7mnvM3rKAAAAIhDFHuIe7V7uxQM+FSUE/Q6SlwJ+H06oTxfWVMXaFMj5+4BAABgfxR7iHu1e7s0Pj9DPs7Xe505E/LlQn36w7NbvI4CAACAOEOxh7jW2RvSns4+TRjDEM6hZKcH1LHmCd35Yq1auvq8jgMAAIA4QrGHuMb8eofX9tL96uoL666XaryOAgAAgDhCsYe4tnNPtwI+09jcDK+jxK3e+k2aW1GgP7+wTc45r+MAAAAgTlDsIa7V7u1SaX6G/D7O1zuUq0+r0ubGDj23qdnrKAAAAIgTFHuIWz19YTW29zCEcxgumlOmMVlp+vPz27yOAgAAgDhBsYe4tbOlWxLn6w1HRppf7zqlQg+vrdeuli6v4wAAACAOUOwhbtXu7ZLPpNJ8ztcbjvcurFLEOd22dIfXUQAAABAHKPYQt3bu7dK4vAyl+fk1HY6KwiydM32sblu6Xb2hiNdxAAAA4DH+ikZc6gtHVN/azRDOI/S+U6vU2NajxWvqvI4CAAAAj1HsIS7VtXQr4qTxFHtH5OzjSlRRmMmFWgAAAECxh/g0MJn6+ALO1zsSfp/pvQurtGTLblXXtXkdBwAAAB6i2ENcqt3bpZLcdKUH/F5HSTiXz69QMODTX16gdw8AACCVUewh7oQjTnUtnK93tAqzg7rkhDLd9VKN2ntCXscBAACARyj2EHca2roVijiKvWNw9WkT1dEb1t0v1XgdBQAAAB6h2EPcqd3D+XrH6sTyfM2ZkK8/v7BNzjmv4wAAAMADFHuIO7V7uzQmK01ZwYDXURKWmel9p1ZpfX27lm7Z7XUcAAAAeIBiD3El4px2cr7eiHjLieOVn5mmP3GhFgAAgJREsYe40tzeq95QRBPGUOwdq8ygX5efXK6HVtepobXb6zgAAAAYZTEt9szs92bWYGarB7UVmtnDZrYhejsm2m5mdpOZbTSzlWZ2UiyzIT4NzK9Hz97IuOrUKoUiTn9ftsPrKAAAABhlse7Z+6OkCw5o+5KkR51z0yQ9Gr0vSRdKmhZdrpV0c4yzIQ7V7ulSXkZAuRlpXkdJCpOKs3XWtGLdtnS7QuGI13EAAAAwimJa7DnnnpJ04NUh3irp1uj6rZLeNqj9T67fC5IKzKwslvkQf2r3dmk8vXpHxnwys4Mud37vs9rV0q28Gae/bltFZZXX6QEAABAjXlzucJxzbld0vU7SuOj6BEmDx5rVRNt2DWqTmV2r/p4/VVZWxjYpRlWgcIK6+sIM4TxSLqIbF1cfdHMk4vSH57aq6iPf1dvmTdhv23WLpsc6HQAAADzi6QVaXP8EYEc0CZhz7hbn3Hzn3PySkpIYJYMXMipmSxIXZxlhPp/p+PF52ra7Uy1dfV7HAQAAwCgZVrFnZmcMp22Y6geGZ0ZvG6LttZIqBu1XHm1DikgvP15ZQb8KMjlfb6TNHp8vM2lVbYvXUQAAADBKhtuz97Nhtg3HfZLeH11/v6R7B7VfHb0q56mSWgYN90QKyKiYrQkFmTIzr6MknZyMgCYXZ2vNzlaFIlyoBQAAIBUc8pw9MztN0umSSszsukGb8iT5D/fkZnabpDdKKjazGklfl3SDpH+a2TWStkl6V3T3ByRdJGmjpE5JHzyinwQJrWZPpwL5Y7k4SwzNmZCvTY0d2tjQrhmleV7HAQAAQIwd7gItQUk50f1yB7W3SrrscE/unLvyIJvOG2JfJ+kTh3tOJKelW/ov2srFWWKnsjBL+ZlpWlXTQrEHAACQAg5Z7DnnnpT0pJn90Tm3bZQyIQUt27pb4e52FeUEvY6StMxMcybk65mNTWpq71FxTrrXkQAAABBDwz1nL93MbjGzxWb22MAS02RIKUu27FZPzRr5OF8vpmaNz5PfZ1pVw4VaAAAAkt1w59m7XdKvJP1WUjh2cZCKGtt6tLmxQz07VnsdJellpvk1bWyO1tW16YypxV7HAQAAQAwNt9gLOedujmkSpKxlW/vP1+um2BsVJ5Tna11dm6rr27yOAgAAgBga7jDOf5nZx82szMwKB5aYJkPKWLpltzLT/Oqt3+R1lJRQmpeh4pwgQzkBAACS3HB79gbmxfvCoDYnafLIxkEqWrJlt06qKtC6CCOER8PAhVoer25UcPx0r+MAAAAgRobVs+ecmzTEQqGHY9bS1ad1da1aMLHI6ygpZUZpntL8pty5F3odBQAAADEyrJ49M7t6qHbn3J9GNg5SzYvbdss5acEkRgWPpmDApxmleXplxlna29mrgiymvAAAAEg2wz1n75RBy1mSviHp0hhlQgpZsmW30vymeZUFXkdJOXMm5MuXlq47XqzxOgoAAABiYFg9e865/zf4vpkVSPp7LAIhtSzdslsnlBcoI83vdZSUU5Kbru6aNfrrkmx96IxJ8vmY4xAAACCZDLdn70AdkiaNZBCkns7ekFbVtDCE00PtLz+gLU0demZjk9dRAAAAMMKGVeyZ2b/M7L7o8m9J1ZLujm00JLsV2/cqFHEUex7qqH5GxTlB3frcVq+jAAAAYIQNd+qFHw5aD0na5pzjRB8ckyVbdstn0slVY7yOkrrCIb1nYZV+9tgGbW3q0MTibK8TAQAAYIQMd+qFJyWtk5QraYyk3liGQmpYumW3Zo3PU15GmtdRUtp7F1bKb6Zbn9/qdRQAAACMoOEO43yXpKWSLpf0LklLzOyyWAZDcusNRfTS9j3MrxcHxuZl6OITynT78hq194S8jgMAAIARMtwLtHxF0inOufc7566WtEDSV2MXC8luVe1e9YQiWjCJIZzx4INnTFJ7T0h3Mg0DAABA0hhusedzzjUMut98BI8FXmfJlt2SpFMmcnGWeDC3okBzKwp063NbFYk4r+MAAABgBAy3YPuPmT1kZh8wsw9I+rekB2IXC8lu2Zbdmjo2R0U56V5HQdQHz5iozU0dempDo9dRAAAAMAIOWeyZ2VQzO8M59wVJv5Z0QnR5XtIto5APSSgccVq+dQ9TLsSZC2eXqSQ3XX94dqvXUQAAADACDtez9xNJrZLknLvLOXedc+469c+x95PYRkOyWrurVW09IS2k2IsrwYBP7zu1Sk+ub9SG+jav4wAAAOAYHa7YG+ecW3VgY7RtYkwSIekt5Xy9uPXeU6uUkebTLU9t9joKAAAAjtHhir2CQ2zLHMEcSCFLt+xW+ZhMjS/gVyjeFGYH9e75FbpnRa3qWrq9jgMAAIBjcLhib7mZfeTARjP7sKQXYxMJySwScVqypVkLJzG/Xrz68FmTFY44/eG5LV5HAQAAwDEIHGb7ZyTdbWZX6bXibr6koKS3xzAXktT6hjbt6ezTaVMo9uJVRWGWLppTpr+9sF2fOGeq8jLSvI4EAACAo3DInj3nXL1z7nRJ35S0Nbp80zl3mnOuLvbxkGye39QsSVycJc599OwpausJ6bYl272OAgAAgKN0uJ49SZJz7nFJj8c4C1LAC5ubVT4mUxWFWV5HwSHMKc/X6VOK9Ptnt+iDZ0xSMDDcKTkBAAAQL/gLDqOm/3y93TptMkM4E8FH3zBF9a09undFrddRAAAAcBQo9jBq1tW1aW9nn06l2EsIZ08r1ozSXP3qyU0KR5zXcQAAAHCEKPYwal7Y3H++3qlcnCUhmJk+ee5UbWrs0P0rd3odBwAAAEeIYg+j5oXNzaoszNIE5tdLGBfNLtNx43J006Mb6N0DAABIMBR7GBUD5+udOpmrcCYSn8/06fOOo3cPAAAgAVHsYVSsrWtVSxfz6yWiC2eXavq4XP2U3j0AAICEQrGHUfHa/HoUe4nG5zN9+k3TtLmxQ/96hd49AACAREGxh1HxwubdqirK0njO10tIFxxfqhmlubrp0Q0KhSNexwEAAMAwUOwh5sIRp6VbmplfL4H5fKbPvGmaNjd16D569wAAABLCqBd7ZjbdzFYMWlrN7DNm9g0zqx3UftFoZ0NsrN3VqtbuEPPrJbhFs0o1syxPNz68Xj2hsNdxAAAAcBijXuw556qdc3Odc3MlnSypU9Ld0c0/HtjmnHtgtLMhNvbNr0exl9B8PtOXL5qhmj1duvW5rV7HAQAAwGF4PYzzPEmbnHPbPM6BGHp2Y5MmFWerND/D6yg4RmdNK9Ebp5foZ49t1O6OXq/jAAAA4BC8LvaukHTboPufNLOVZvZ7MxvjVSiMnN5QREu27NaZU4u9joIR8uWLZqqjJ6SbHt3gdRQAAAAcgmfFnpkFJV0q6fZo082SpkiaK2mXpB8d5HHXmtlyM1ve2Ng4GlFxDFbs2KvO3rDOoNhLGseNy9UVCyr1lxe2aXNju9dxAAAAcBBe9uxdKOkl51y9JDnn6p1zYedcRNJvJC0Y6kHOuVucc/Odc/NLSkpGMS6OxjMbm+QzMZl6kvnsm45TesCnGx5c53UUAAAAHISXxd6VGjSE08zKBm17u6TVo54II+6ZDY06obxA+ZlpXkfBCCrJTdfHz5mqxWvq912ABwAAAPHFk2LPzLIlvVnSXYOav29mq8xspaRzJH3Wi2wYOa3dfXqlpoXz9ZLUNWdO0oSCTH3t3tXqDTHROgAAQLzxpNhzznU454qccy2D2t7nnJvjnDvBOXepc26XF9kwcpZs3q1wxHG+XpLKSPPrf996vNbXt+s3T2/2Og4AAAAO4PXVOJHEnt3YpIw0n06qKvA6CmLkvJnjdOHsUt306AZta+7wOg4AAAAGodhDzDyzsUkLJhUpPeD3Ogpi6OtvOV5pfp/+557Vcs55HQcAAABRFHuIibqWbm1saNeZU7kKZ7Irzc/QF86frqc3NOm+V3Z6HQcAAABRFHuIiWc2NkkS5+uliPeeWqUTyvP1rfvXqKWzz+s4AAAAEMUeYuTZjU0qyg5qZmme11EwCvw+0/+9fY72dPbpa/cxawoAAEA8oNjDiHPO6ZmNTTp9arF8PvM6DkbJ7An5+tS503Tvip0M5wQAAIgDFHsYcRsa2tXY1sP5einoE+dM0bzKAv3P3au0q6XL6zgAAAApjWIPI+6p9Y2SOF8vFQX8Pv34XXPVF3b6/O2vKBLh6pwAAABeodjDiHtyfaOmlGSrfEyW11HggYnF2frqJbP07MZm/fG5rV7HAQAASFkUexhRnb0hLdm8W+dMH+t1FHjoygUVOm/GWN3wn3Vas7N1yH0qKqtkZke9VFRWjfJPBQAAkFgCXgdAcnluY7N6wxG9kWIvpZmZvnfZCbrkpmf0X399Ufd98kzlZ6btt0/Nju26cXH1Ub/GdYumH2tMAACApEbPHkbUE+sblBX065RJY7yOguEwX8x614pz0vWLq+apdk+XvnD7K3KO8/cAAABGEz17GDHOOT1R3ajTpxQpPeD3Og6Gw0Vi2rt2clWhrr9opr51/xrd8tRmffQNU476tQAAAHBk6NnDiNnU2KGaPV16A0M4MciHzpioi+eU6fsPVWvJ5mav4wAAAKQMij2MmCeqGyRJbzyuxOMkGDXDGAbq8/l084fOVHfjDl1244MK5JXIzLxODgAAkPQYxokR8+T6Rk0dm6OKQqZcSBlHMAy0ub1H/1xeo3n/fZsuP7lc/33hzBiHAwAASG307GFEDEy5QK8eDqYoJ10XzC5VU1uPHnq1ThK9ewAAALFEsYcR8fwmplzA4U0qztaZ04q1qbFD+Wdd5XUcAACApEaxhxHxRHUjUy5gWOZVFGhWWZ4KTr9C6+qGnnAdAAAAx45iD8fMOafHqxuYcgHDYmY6d8ZYdW9fpUfWNKhmT6fXkQAAAJISxR6OGVMu4Ej5fabGu7+j/Mw0/WvlLjW193gdCQAAIOlQ7OGYPbK2XpJ03gyKPQxfpLtdb503Xml+070rdqqtu8/rSAAAAEmFYg/HbPGrdZo9IU/jCzK9joIEk5eRpreeOEG9oYjuWbFT3X1hryMBAAAkDYo9HJPGth69vGOv3jyz1OsoSFAluem65IQy7e3s1b9W7lQoHPE6EgAAQFKg2MMxeXRtvZyTFh0/zusoSGAVhVlaNKtUO/d266FX6xVxzutIAAAACY9iD8dk8Zp6lY/J1IzSXK+jIMFNL83VWdOKtbGxXU+tb5Sj4AMAADgmFHs4ah09IT2zsUlvnjVOZuZ1HCSBkyrHaF5lgV6padGL2/Z4HQcAACChUezhqD29oVG9oYgWzeo/X6+iskpmdtQLIElnTS3WceNy9OymZq3ZyaTrAAAARyvgdQDEl4rKKtXs2D6sfYsu+qwyp56i06eNlVz/RTVuXFx91K993aLpR/1YJA8z05tnjVNXX1iPrK1XMODT1LE5XscCAABIOBR72E/Nju3DKtgiEaffPL1ZE4uzdd1DayVRrGHkBHw+XTJnvO5+uVb/WV2nS+eOV2VhltexAAAAEgrDOHFUdrZ0qTsU0eSSbK+jIEkFAz69de54FWSn6f6VO7WrpcvrSAAAAAmFYg9HZVNjh/w+U1UhxR5iJyPNr7fPnaCsYED3rtippvYeryMBAAAkDIo9HDHnnDY3tqtiTKaCAX6FEFvZ6QG9Y94Epfl9uvvlWu3t7PU6EgAAQELgL3Ucsca2HrV2hzSlhItmYHTkZabp7fMmKOKc7nq5Vu3dIa8jAQAAxD2KPRyx9Q3t8pk0hSskYhQVZgf1trkT1NMX0d0v18qXmed1JAAAgLhGsYcj4pzThvo2VRRmKTPN73UcpJhxeRl6y4llaunu09jLv6m27j6vIwEAAMQtz4o9M9tqZqvMbIWZLY+2FZrZw2a2IXo7xqt8GFpDdAjnNHr14JHyMVm6eE6ZgmMn6cO3Lld3X9jrSAAAAHHJ6569c5xzc51z86P3vyTpUefcNEmPRu8jjmyojw7h5Hw9eGhScbaa/n2jlm7drY//9SX1hSNeRwIAAIg7Xhd7B3qrpFuj67dKept3UXAg55zWN7SpsjBLGQzhhMc61z6lb79tth5b16DP3/6KIhHndSQAAIC44mWx5yQtNrMXzezaaNs459yu6HqdpHEHPsjMrjWz5Wa2vLGxcbSyQlJ9a4/aukM6blyu11EASdJVC6v03xfM0L0rduqr966WcxR8AAAAAwIevvaZzrlaMxsr6WEzWzd4o3POmdnr/nJzzt0i6RZJmj9/Pn/ZjaL1DW3ym2lyMROpI3781xunqLW7Tzc/sUnBgE9fu2SWzMzrWAAAAJ7zrNhzztVGbxvM7G5JCyTVm1mZc26XmZVJavAqH/bXfxXOdlUWZSmdIZyIM188f7p6+iL6/bNbFPT79KULZ1DwAQCAlOfJME4zyzaz3IF1SYskrZZ0n6T3R3d7v6R7vciH19vV0q32npCO4yqciENmpq9eMlPvO7VKv35qs258eL3XkQAAADznVc/eOEl3R795D0j6m3PuP2a2TNI/zewaSdskvcujfDjAhoZ2+X2mSSUM4UR8MjN989LjFYpE9LPHNirN79OnzpvmdSwAAADPeFLsOec2SzpxiPZmSeeNfiIcSiQ6kfrEoiylBxjCifjl85m+87Y56gs73fjwegX8po+/carXsQAAADzh5QVakCB27O5UR29Y00u5Cifin89n+t47T1BfOKLv/6daQb9PHz5rstexAAAARh3FHg5r7a42pQd8msRVOJEg/D7Tjy4/UaGw07f/vVZpfp/ef/pEr2MBAACMKoo9HFJPKKxNje2aWZangM/LaRmBIxPw+/STK+aqLxzR1+97VT6f6X2nVnkdCwAAYNTw1zsOaWNDu0IRp5llDOFE4knz+/Tz95ykN80cq6/es1q/e2aL15EAAABGDcUeDmntrjYVZKapNC/D6yjAUQkGfPrlVSfrwtml+tb9a3TzE5u8jgQAADAqKPZwUK1dfard26WZZXlMUI2EFgz49LMr5+nSE8fre/9Zp588sl7OOa9jAQAAxBTn7OGg1tW1SZJmcBVOJIGA36cfv3uuggGffvLIBvWGIvrC+dP5IgMAACQtij0MyTmntbtaVV6QqbzMNK/jACPC7zN9/50nKM3v0y+f2KSeUET/c/FMCj4AAJCUKPYwpLrWbu3t6tP8iWO8jgKMKJ/P9H9vn630gE+/e2aLekJh/e+ls+XzUfABAIDkQrGHIa3Z1aqAzzRtLEM4kXzMTF9/yyylB3z69VOb1RuK6LvvOEF+Cj4AAJBEKPbwOr2hiNbXtWva2BwFA1zDB8nJzPSlC2coPeDTTY9tVEdPWDe++0SlB/xeRwMAABgRFHt4nfX1beoNRzR7Qr7XUYCYMjNdt2i6cjPS9J0H1qq1u0+/eu/Jyk7noxEAACQ+um3wOqtqW1SUHVRZPnPrITlVVFbJzPYt175hipoe+Imeqq7X1I/8VP7MvP22H7hUVFZ5/SMAAAAcFl9fYz/B0qlqaOvRG48r4QqFSFo1O7brxsXVr2vf1NiuB1cHdOL1d+pt88YrN2PoK9Fet2h6rCMCAAAcM3r2sJ+cuRcq4DPNKOPCLEg9U0py9La549XeE9LtL9ZoT2ev15EAAACOGsUe9mnr7lP2zLN13LhcLlKB+Ge+Qw61PNRyKOVjsvTOkyYoFHa6fXmNGlq7R+kHAgAAGFkM48Q+96zYKV8wU3O4MAsSgYsMORRzOA43DHNsXoYun1+uu1+u1Z0v1eotJ5apfEzWUb0WAACAV+jZgyTJOae/Ldmu3vpNGpeX7nUcwHNjsoK6/ORy5aQHdM/LO7Whvs3rSAAAAEeEYg+SpJd37NXaXa1qW/EfLswCROVmpOmy+eUam5euB1bXacWOvV5HAgAAGDaKPUiS/vz8NmUH/epY84TXUYC4kpnm1zvmTdDk4mw9ub5Rz2xs8joSAADAsFDsQXUt3frXKzv1rlMq5Hq7vI4DxJ2A36eLTyjTnAn5enHbHhVdfJ16QxGvYwEAABwSxR70p+e3KuycPnj6JK+jAHHLZ6ZzppfotMlFypl9rq65dZnae0JexwIAADgoir0U19kb0l+XbNf5s0pVWcTVBoFDMTMtmFSopgd+quc2NeuKW55XY1uP17EAAACGRLGX4u58qVYtXX265ix69YDh6lj1sH5z9cna1NChd9z8rDY3tnsdCQAA4HUo9lJYJOL0+2e26MTyfM2vGuN1HCChnDtjnG679lR19IT1jpuf05LNzV5HAgAA2A/FXgp7vLpBW5o6dM1Zk5luATgKcysKdPfHT1dhdlDv/d0S3f1yjdeRAAAA9qHYS2G/fXqLyvIzdOHsUq+jAInFfDIzmZkmFufoya9corbNK/TZf7yigjOv2rdtqKWissrr9AAAIEUEvA4Ab7y6s0XPb27W9RfOUJqfmh84Ii6iGxdX79cUjjg9uq5ea898j0697KM6b+ZYBXyv/7d13aLpo5USAACkOIq9FPWLxzcqNz2gKxZUeh0FSAp+n+nNM8epICuo5zc1q7W7T5ecMF6ZaX6vowEAgBRFl04K2lDfpgdX1+n9p09Ufmaa13GApGFmWjCxUBccX6r61h79c9kO7ens9ToWAABIURR7Kejnj29UZppfHzqT6RaAWJhemqt3zJugnlBE/1y2Q9t3d3odCQAApCCKvRSzubFd/3plp953WpUKs4NexwGS1viCTL1rfrmy0wO65+VavbR9j5xzXscCAAAphHP2UswvHt+kYMCnj5w12esoQNIryArqXfMrtHhNnZ7e0KSGth5ZgC9ZAADA6KBnL4Vsb+7UPStq9Z4FVSrOSfc6DpASggGfLp5TptMmF6m6rk3jrvq+tjZ1eB0LAACkAIq9FPLLJzbK7zN99A306gGjycy0YFKhLj1xvAL543TJz57Rfa/s9DoWAABIcqNe7JlZhZk9bmZrzOxVM/t0tP0bZlZrZiuiy0WjnS2Z7djdqTtfqtEVp1RoXF6G13GAlDSpOFu7/vApTS/N1adue1nX37VSXb3hUXv9isqqQ074friFCeEBAEgsXpyzF5L0OefcS2aWK+lFM3s4uu3HzrkfepAp6f1wcbX8PtN/vXGK11GAlBZua9Tfrz1VP354vX75xCYt27pHP7z8RM2tKIj5a9fs2P66yeCPBBPCAwCQWEa9Z885t8s591J0vU3SWkkTRjtHKlld26J7V+zUh86YpLL8TK/jACkvze/TFy+YoT9fs0AdPSG945fP6oYH16m7b/R6+QAAQPLz9Jw9M5soaZ6kJdGmT5rZSjP7vZmNOchjrjWz5Wa2vLGxcbSiJiznnP7vgbUak5Wmj9GrB8SVs6aV6KHPnq13n1KhXz25SRff9LSWb93tdSwAAJAkPCv2zCxH0p2SPuOca5V0s6QpkuZK2iXpR0M9zjl3i3NuvnNufklJyWjFTVhPrm/Uc5ua9anzpikvI83rOAAOkJeRpu++4wT96UML1N0X0WW/el6fuu1l7dzb5XU0AACQ4Dwp9swsTf2F3l+dc3dJknOu3jkXds5FJP1G0gIvsiWTcMTphgfXqbIwS1ct5MIKQDw7+7gSPXzd2frUedP00Kt1OvdHT+jHD69XZ2/I62gAACBBeXE1TpP0O0lrnXM3DmovG7Tb2yWtHu1syebul2u1rq5NXzh/uoIBZtkA4l1WMKDr3nycHv3cG/SmmeP000c36IwbHtMvHt+otu4+r+MBAIAE40UFcIak90k694BpFr5vZqvMbKWkcyR91oNsSaOzN6QfLa7WieX5unhO2eEfACBulI/J0s/fc5Lu+vjpmltRoB88VK0zbnhMNy6uVkNrt9fxAABAghj1qRecc89IsiE2PTDaWZLZTx/doF0t3frZlfPk8w31dgOIdydVjtEfPrhAq2pa9LPHNuimxzbql09s0ptnjdN7FlbqjCnF/PsGAAAH5cU8e4ix6ro2/e7pLXrX/HLNn1jodRwAx2hOeb5uuXq+Nje267al23XHizV6cHWdKguz9JYTy3TxnPGaWZar/lHyAAAA/Sj2koxzTl+9Z7VyMgL60oUzvY4DYARNLsnRVy6epc8tmq6HXq3THS/W6FdPbtYvHt+kySXZunB2qc6bOU4nlhfIT48fAAApj2IvydzxYo2Wbt2t771zjgqzg17HARADGWl+vXXuBL117gQ1t/fooVfrdf/KnfsKv8LsoN44vUTnzRins44rZtoVAABSFMVeEtnT0avvPrhOJ1eN0eUnV3gdB8AoKMpJ13sWVuo9Cyu1t7NXT65v1OPrGvTYugbd9VKtAj7TKRMLdd7MsQqMGe91XAAAMIoo9pLIDQ+uU0tXn779ttlctAFIQQVZwX09fqFwRC/v2KvH1jXosbUN+va/12rCtbfoj89t1aTibE0qztaEgkyGewIAkMQo9pLEY+vq9Y/lO/TRN0zWzLI8r+MA8FjA79MpEwt1ysRC/fcFM1Szp1Mzz7tcVVd+XqtqW7Rix14F/T5VFmZpYnGWJhZlKzud/xIAAEgm/M+eBJrbe/TFO1ZpRmmurnvzcV7HARCHysdkqf3lB/S27/1YfeGIduzu1JamDm1p7tDGxnZJUkVhpmaW5mlKSY6CAS+mYQUAACOJYi/BOed0/V2r1NrVpz9fs0DpAb/XkQAcivmOaYqE8opK7di+7ZgipPl9mlySo8klOXLOqam9Vxsb2lVd36bFa+oV8DVoytgczRmfr/EFGUzpAABAgqLYS3C3L6/R4jX1+vJFMxi+CSQCF9GNi6uP+uHXLZo+gmEkM1NJbrpKctN16uRC7Wrp1tq6Vq2vb1d1XZuKsoOaMyFfM8pyR/R1AQBA7FHsJbDtzZ365r9e1amTC/XhMyd7HQfAaDjGnsFDPrWZxhdkanxBps6eVqLq+jatqmnRE+sb9eymJo055xrt3Nul8QWZMXl9AAAwsij2ElR3X1ifvO0l+cz0w8tP5OqbQKo4hp7BI+kVTPP7NHt8vmaPz1dda7dWbN+r3vmX6uzvP65LTxyva98wWTNKGU0AAEA84wz8BOSc01fuXq2VNS360btOVPmYLK8jAUhipXkZumB2qWp//RG977QqPbi6Thf85Gl94A9L9dymJjnnvI4IAACGQLGXgG59bqvufKlGnz5vmhYdX+p1HAApItzaoK+/5Xg9f/25+vyi47S6tkXv+c0SvfUXz+rfK3cpHKHoAwAgnlDsJZjnNzXrW/9eqzfNHKdPnzfN6zgAUlBBVlCfPHeanvnvc/V/b5+jtu6QPvG3l/SmG5/U35duV08o7HVEAAAgir2EUrOnU5/420uaWJSlH7+b8/QAeCsjza/3LKzUI9e9Qb+86iTlpAf0pbtW6azvPa5bntqk9p6Q1xEBAEhpFHsJoqm9R1f/bqn6whHdcvV85WakeR0JACRJfp/pojlluu+TZ+gv1yzUtHE5+r8H1un07z6qHy2uVnN7j9cRAQBISVyNMwG0dPXp6t8t1c6WLv3lmoWaUpLjdSQAqegIpn0Ilk5Tx6mX6aauXv30oVelbcv1n5u+pFnjuYInAACjhWIvznX1hvXhW5dpQ0ObfnP1fM2fWHjI/Ssqq1SzY/sopQOQUo5i2ofdHb16efserdTJuuimp7VgUqGuPq1Kb541TukBf4yCAgAAiWIvrvWEwvrYX17U8m179LMr5+mN08ce9jE1O7Yf9Rxc0pHNwwUAh1OYHdR5M8fpwS9eqF8tXqlbn9+qT/7tZeVnpumtc8frspPLNWdCfswmigcAIJVR7MWp9p6QPvrn5Xp2Y7O++445uuSE8V5HAoCjFunp0EfOnqwPnTlJz25s0h0v1ugfy3boT89v0+SSbJ1/fKkWzRqnE8sLYnLxqWMZ9VBeUakd27eNcCIAAGKPYi8ONbX36AN/WKq1u9r0w8tP1GUnl3sdCQBGhN9nOvu4Ep19XIlauvr075W79MCqXfrNU5t18xObNC4vXWdPK9HCyUVaOKlQFYVZI/K6xzLqgREPAIBERbEXZ7Y3d+qsr/1DLiNPTffcoMu/u9zrSAAQE/mZaXrPwkq9Z2GlWjr79Fh1vRa/Wq+H19br9hdrJEkTCjI1Z0K+ZpblaUZZrmaW5ml8QYYCfi4mDQDA4VDsxZElm5v1ib+9rEggQ1eeNlVlF/z1iJ+Db6ABJKL8rDS9fV653j6vXJGIU3V9m5ZsbtayrXu0ZlerHlpTJ+f69/X7TGX5GSofk6nx+ZnKz0pTQWZQ+ZmBfet5mWnKz0xTXmZAOen8VwcASE38DxgHnHP69VOb9YOHqlVZmKVX/vpFlV3yqNexAMATPp9pZlmeZpbl6QNnTJIkdfSEVF3fpvV1barZ06WaPZ2q2dOlJVt2q6Wr77ATuFd+4V7d/OQmBf0+BQO+/W7TAqZ0v1/BgE9ZQX90CSg73c+cpgCAhEax57GWzj597vZX9Mjael08p0w3vHOO8r5Q43UsAIgr2ekBnVQ5RidVjhlye184otauPrUcsLR2h9TRE9L1X/2G3vCuj6knHFZfyKk3HFFvKKL2npB6QxH1hiPqC0XkDnheM2n8R3+r9/52iSoKs1RVlKWqwixVFmVpUnG2soL8NwoAiF/8L+Whx6sb9D93r1Z9a7e+/pZZ+sDpE7n8OAAMYSTmEH3DV796yO0R59TdF1ZHT1idvSG194TU0tWnJ159Qm09M/Sf1bu0p7Nv3/5m0sSibM0ozdWM0tfOKSwfkxmTK4oCAHCkKPY80Nzeo2/dv0b3rNipKSXZuv1jp2neQb6tBgCMzhyiPjNlBQPR3rr0fe133H+j7vvXDyVJFsxSoKBUaWPKlFZUofaxk7S+ZKIeGFMms/6LxkR6OtXbtE19DVvUW79JBdapDUsfV0Yak8gDAEYXxd4oCoUjuvOlGt3w4Dq194T06fOm6ePnTFF6gD8AACBuuchhC82+cETN7b1qau/pX8YVqbHqePWGIpKkWV/7j6aU5GjW+DwdPz5Ps8ryNWt8ngqzg6PxEwAAUhTF3ihwzmnxmnr94KFqbWxo18lVY/Tdd8zRceNyvY4GAKPDfEk9TD3N71NpfoZK8zP2tTnn1Nod0vc//xF9++d/0JpdrVq6ZbfuXbFz3z5l+RmaFZ1WYtrYXE0dm6MpJTnKDPIlIADg2FHsxVAk4vTkhkb99JENWrFjryaXZOtX7z1J5x9fmtR/9ADA6wyjd+xQEnFaGTNTfmaaujY8v1/+3R29WrurVa/ubNGana16dWernljfqHDERR/XP7/g1LE5mjY2R1PH5mhiUbYqi7I0LjeD8wEBAMNGsRcD3X1h3fVSrX7/7BZtbGhXWX6GvvfOOXrnSeVMBAwAqWY4vZq+QP95gMWV/ecCFpVrc1GlHi8qlwVeG+oZDPhUPiaz/4qghVmqiC7j8zNVVpChouwgXyYCAPah2BthD67apa/cs1q7O3p1/Pg8/fjdJ+riOeMVDFDkAUBKOoZezevOn6mtja3avrtT25o7tWN3p7ZHl+Vb96jtgPkFgwGfyvIzVJafsa8ALMvP1PiB2/xM5WUGKAgBIEVQ7I2w8jFZal63TPVP36ZtO1brAa8DAQAS2sTinINu82XkKlAwTv7cEgXyiuXPLdaevBJV5xb3388pkvn2P/8v0tulcFuTQq2N/bdtTQq3NSvc1qxQe/9tpKtV5RWV2rF9W6x/PABADFHsjbA55fna/revpNy5KQCAGDjWcx3Pn6lv3rtS7T0htXWH1N4dUltPSG3dxWrvmaq27pA6e8Ove5zfTD17dumym5/TuPwMleb1L4PXx+alM50EAMQ5ij0AAJKViyg3I025GWkqyx96l3DE7ZtEvr0npI6esNp7Qnpy9WMKnDRLa3a26rG1Derqe31RmJcRUFFOugqzgyrMDqooeluYHVRRTlCF2en7tVEcAsDoirtiz8wukPRTSX5Jv3XO3eBxJAAAkpbfZ/sKwsHuuv9H+nt0MvmBaSTqW7tV19KtutZu1bd0q6m9R80dvdrd0asduzu1Ysde7enoVSh6ZdEDuVCfIr2divR0KtLTIdfbpUhPhyI9nXL72juVn5Wun/zwBmWm+ZUZ9Cszza+sYECZQZ8ygwFlRdvTA8k9pceRqqisUs2O7Uf1WIbtIlWk2r+TuCr2zMwv6ReS3iypRtIyM7vPObfG22QAAKSugWkk8jPTDjtHrHNOrV0hFZdP1Gd+/YC6+sLq6g2rqy+snlBEvaGIesPR21BEPaHwa+vhiFy0Trzun68MI5eihaBfGdHb1xeI/tftk5HWXyimp/mUHoiuB/zR+4Pa0nxK8/vkN5PPZ/L7LLou+cz2tR/sfQhHnEKR124j+92PKBR26gv3vx99A+vR96cvNERbdOm/37+tL/Ta4ztnvUUXf/7dwzyo/cN1fdGf4bHbbtZPHlmvNL9PAZ8p4PcpzW8K+HwK+G3f+v5tA/tGn8f636OBW//A++R77XUG3j9/tN3MNFCvD7yTAwX8a/cHttt+93WY7Yd9Pr4oSEk1O7YPa3i8c05h52Tq/x01SZ87f0bsA46wuCr2JC2QtNE5t1mSzOzvkt4qiWIPAIDRNJwpIw5jfEHmEe3vXH8x9OXLz5QFgrK0dFlahnxp6QesZ8gC6bJghloC0fvRfXxpGfIFM6RA+muPC7z22FgYKALNJOekcLTQizUX6pML98lFQlIkrMypC7W5qeN1+x14FJ36c0aciy5S3qmX6yePbIh55njlXEQKh+XCva+9rwO34T65UK8UPqA91KesjKA+9P73KRjo/6Ig6PcpGOhfAn6fgtGiOC1aPL+2vv/9YCBaTAf6C2gz7SugfdZfyJpP++77bKAAOeD+KBSwLvqNjBv0K+4O3Kb+3699X3iEo194uMH3IwpH+guqUPjAL0eiX3aEIvt9IRIa+MJj0JcdfeGI+iKD93X7vhgJhd1+X5T0hZxK3/dD3bZ0+77Xjuy77R/WHokWeW6If8JV/32/uvvCCTUkPd6KvQmSdgy6XyNpoUdZAABIXcd6cZijuNiYWX8PUri9+Zhfe6jHO+fUF37tj8kDe97CEadff+Va3XH3veoJhdXTF1FPKLLvj9b+22iRdOAfik6KRJzMTIFoL2DAZ/L7B+779m+PLsHAwB/7Pl10/iJ94vu39veEDer9GugRG9xr5hviD/vrFk3X5452mo9F0xUOR9QX7XEMhd2+9b5wRKHIwB/ar/0hHhpojzhdcOFF+vC3fy3nFF363xMnt6+wHCgyndu/7d5ffVc33nhj9BhFj5XcAfd10O1f/vKXddGHrtt/R+2/3wE3g7ZLi//ycy266pP7ivRwZP/fkf0W99rvSjjitLepUXe9VKPecP/vylAFwmgaKBJN/bf7Kv0D3jdpqPd20LYD9ol3wYHiOfrvKc03aP2AojvS06ns9IB8pkG99YN6pAf1PpvPpEG/xw/95ecK+C70+sc9Iubi6Cia2WWSLnDOfTh6/32SFjrnPjlon2slXRu9O11Ss6Sm0c6KI1YsjlMi4DjFP45RYuA4JQaOU/zjGCUGjpO3qpxzJUNtiLeevVpJFYPul0fb9nHO3SLploH7ZrbcOTd/dOLhaHGcEgPHKf5xjBIDxykxcJziH8coMXCc4pfP6wAHWCZpmplNMrOgpCsk3edxJgAAAABIOHHVs+ecC5nZJyU9pP6pF37vnHvV41gAAAAAkHDiqtiTJOfcA5IeOIKH3HL4XRAHOE6JgeMU/zhGiYHjlBg4TvGPY5QYOE5xKq4u0AIAAAAAGBnxds4eAAAAAGAEJHSxZ2YXmFm1mW00sy95nScZmdnvzazBzFYPais0s4fNbEP0dky03czspujxWGlmJw16zPuj+28ws/cPaj/ZzFZFH3OTRScNOthrYGhmVmFmj5vZGjN71cw+HW3nWMUJM8sws6Vm9kr0GH0z2j7JzJZE39d/RC9OJTNLj97fGN0+cdBzXR9trzaz8we1D/mZeLDXwMGZmd/MXjaz+6P3OU5xxsy2Rj+TVpjZ8mgbn3lxxMwKzOwOM1tnZmvN7DSOUXwxs+nRf0MDS6uZfYbjlET6J7ZMvEX9F3DZJGmypKCkVyTN8jpXsi2SzpZ0kqTVg9q+L+lL0fUvSfpedP0iSQ+qfwrPUyUtibYXStocvR0TXR8T3bY0uq9FH3vhoV6D5aDHqUzSSdH1XEnrJc3iWMXPEn3fcqLraZKWRN/Pf0q6Itr+K0n/FV3/uKRfRdevkPSP6Pqs6OdduqRJ0c9B/6E+Ew/2GiyHPF7XSfqbpPsP9R5ynDw9RlslFR/QxmdeHC2SbpX04eh6UFIBxyh+l+jnU52kKo5T8iyeBzjq4NJpkh4adP96Sdd7nSsZF0kTtX+xVy2pLLpeJqk6uv5rSVceuJ+kKyX9elD7r6NtZZLWDWrft9/BXoNl2MfsXklv5ljF5yIpS9JLkhaqfxLaQLR93+ea+q9KfFp0PRDdzw78rBvY72CfidHHDPkaLAc9PuWSHpV0rqT7D/Uecpw8PU5b9fpij8+8OFkk5Uvaouj1IThG8b9IWiTpWY5Tci2JPIxzgqQdg+7XRNsQe+Occ7ui63WSxkXXD3ZMDtVeM0T7oV4DhxEdRjZP/T1HHKs4Eh0auEJSg6SH1d/Ds9c5F4ruMvh93XcsottbJBXpyI9d0SFeA0P7iaQvSopE7x/qPeQ4ecdJWmxmL5rZtdE2PvPixyRJjZL+YP1Don9rZtniGMWzKyTdFl3nOCWJRC72EAdc/9cxLtFfI1mYWY6kOyV9xjnXOngbx8p7zrmwc26u+nuOFkia4W0iHMjMLpHU4Jx70essOKwznXMnSbpQ0ifM7OzBG/nM81xA/aeB3OycmyepQ/1D9fbhGMWP6DnCl0q6/cBtHKfElsjFXq2kikH3y6NtiL16MyuTpOhtQ7T9YMfkUO3lQ7Qf6jVwEGaWpv5C76/OubuizRyrOOSc2yvpcfUP1Ssws4E5Twe/r/uORXR7vqRmHfmxaz7Ea+D1zpB0qZltlfR39Q/l/Kk4TnHHOVcbvW2QdLf6v0DhMy9+1Eiqcc4tid6/Q/3FH8coPl0o6SXnXH30PscpSSRysbdM0jTrv3pZUP1dz/d5nClV3Cfp/dH196v//LCB9qujV2o6VVJLtHv+IUmLzGxM9EpLi9R/LsouSa1mdmr0ykxXH/BcQ70GhhB9/34naa1z7sZBmzhWccLMSsysILqeqf5zKteqv+i7LLrbgcdo4H29TNJj0W8+75N0hfVfBXKSpGnqP/l9yM/E6GMO9ho4gHPueudcuXNuovrfw8ecc1eJ4xRXzCzbzHIH1tX/WbVafObFDedcnaQdZjY92nSepDXiGMWrK/XaEE6J45Q8vD5p8FgW9V8RaL36z3v5itd5knFR/z/8XZL61P8t3TXqP7fkUUkbJD0iqTC6r0n6RfR4rJI0f9DzfEjSxujywUHt89X/H/QmST9X9ETug70Gy0GP05nqH/6wUtKK6HIRxyp+FkknSHo5eoxWS/patH2y+ouAjeofPpMebc+I3t8Y3T550HN9JXocqhW9qlm0fcjPxIO9Bsthj9kb9drVODlOcbRE36tXosurA+8jn3nxtUiaK2l59HPvHvVfpZFjFGeLpGz1jy7IH9TGcUqSZeDNBgAAAAAkkUQexgkAAAAAOAiKPQAAAABIQhR7AAAAAJCEKPYAAAAAIAlR7AEAAABAEqLYAwCkDDP7ipm9amYrzWyFmS08xL5/NLPLDrZ90D5bos/1kpmddpD9/tfM3nSs+QEAOBIBrwMAADAaooXYJZJOcs71mFmxpOAIPPUXnHN3mNkiSb9W/5yKg1/X75z72gi8DgAAR4SePQBAqiiT1OSc65Ek51yTc26nmX3NzJaZ2Wozu8XM7MAHmtnJZvakmb1oZg+ZWdkQz/+UpKnR/bea2ffM7CVJlw/uJTSzU8zsOTN7xcyWmlmumfnN7AfRHCvN7KOxexsAAKmCYg8AkCoWS6ows/Vm9ksze0O0/efOuVOcc7MlZaq/928fM0uT9DNJlznnTpb0e0nfGeL53yJp1aD7zc65k5xzfx/0XEFJ/5D0aefciZLeJKlL0jWSWpxzp0g6RdJHzGzSCPzMAIAUxjBOAEBKcM61m9nJks6SdI6kf5jZlyS1mdkXJWVJKpT0qqR/DXrodEmzJT0c7fTzS9o1aPsPzOx/JDWqv2gb8I8hYkyXtMs5tyyaqVWSokNATxh0jmC+pGmSthz9TwwASHUUewCAlOGcC0t6QtITZrZK0kfVf47dfOfcDjP7hqSMAx5mkl51zg158RVFz9kbor3jCKKZpP/nnHvoCB4DAMAhMYwTAJASzGy6mU0b1DRXUnV0vcnMciQNdfXNakklA1faNLM0Mzv+KGNUSyozs1Oiz5VrZgFJD0n6r+iQUZnZcWaWfZSvAQCAJHr2AACpI0fSz8ysQFJI0kZJ10raK2m1pDpJyw58kHOuNzq88iYzy1f//50/Uf9wzyMSfa53R3Nkqv98vTdJ+q2kiZJeil4gplHS2470+QEAGMycc15nAAAAAACMMIZxAgAAAEASotgDAAAAgCREsQcAAAAASYhiDwAAAACSEMUeAAAAACQhij0AAAAASEIUewAAAACQhCj2AAAAACAJ/X+D413KL5f41wAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(15,6))\n", + "sns.histplot(df_trains['SalePrice'], kde=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Aa we can see, it seems to be a poisson distribution.\\\n", + "So next step would be check p-values in order to know if attributes are relevant against SalePrice" + ] + }, + { + "cell_type": "code", + "execution_count": 426, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['MSSubClass', 'MSZoning', 'LotArea', 'Street', 'LotShape',\n", + " 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood',\n", + " 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'OverallQual',\n", + " 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl',\n", + " 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea', 'ExterQual',\n", + " 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure',\n", + " 'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF',\n", + " 'TotalBsmtSF', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical',\n", + " '_1stFlrSF', '_2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath',\n", + " 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr',\n", + " 'KitchenQual', 'TotRmsAbvGrd', 'Functional', 'Fireplaces',\n", + " 'FireplaceQu', 'GarageType', 'GarageYrBlt', 'GarageFinish',\n", + " 'GarageCars', 'GarageArea', 'GarageQual', 'GarageCond', 'PavedDrive',\n", + " 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '_3SsnPorch',\n", + " 'ScreenPorch', 'PoolArea', 'PoolQC', 'MiscVal', 'MoSold', 'YrSold',\n", + " 'SaleType', 'SaleCondition', 'SalePrice'],\n", + " dtype='object')" + ] + }, + "execution_count": 426, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_trains.columns\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 427, + "metadata": {}, + "outputs": [], + "source": [ + "sig_col_prob = {}\n", + "df_categ = []\n", + "string = 'SalePrice ~'\n", + "for i in df_trains.columns:\n", + " string_model = ''\n", + " if df_trains[i].dtypes == 'object':\n", + " model = ols(f'SalePrice ~ C({i})', data = df_trains).fit()\n", + " df_categ.append(i)\n", + " string_model = f' C({i}) +'\n", + " else:\n", + " model = ols(f'SalePrice ~ {i}', data = df_trains).fit()\n", + " string_model = ' '+ i + ' +'\n", + " if model.f_pvalue < 0.05 and i != 'SalePrice':\n", + " sig_col_prob[i] = model.f_pvalue\n", + " string = string + string_model\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see in sig_col_prob variable, most of the data are significat relevants for SalePrice\n", + "Sales price column should be deleted from them since p value is 0 and is the value to compare with other columns" + ] + }, + { + "cell_type": "code", + "execution_count": 428, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'MSSubClass': 0.0019668997898389627,\n", + " 'MSZoning': 1.5774716114609725e-35,\n", + " 'LotArea': 4.188494058573459e-24,\n", + " 'LotShape': 1.2291837166364689e-23,\n", + " 'LandContour': 1.7873973312019462e-08,\n", + " 'LotConfig': 2.3104813531408423e-06,\n", + " 'Neighborhood': 7.944897112489736e-215,\n", + " 'Condition1': 1.6009219573634665e-07,\n", + " 'Condition2': 0.03953280209407048,\n", + " 'BldgType': 4.308280969293859e-08,\n", + " 'HouseStyle': 1.0205088692647994e-24,\n", + " 'OverallQual': 7.0904967242108365e-298,\n", + " 'OverallCond': 0.0004263317175746006,\n", + " 'YearBuilt': 4.090600212307172e-98,\n", + " 'YearRemodAdd': 2.4778743399914053e-90,\n", + " 'RoofStyle': 1.1401534170341191e-18,\n", + " 'RoofMatl': 8.1385685613492e-08,\n", + " 'Exterior1st': 2.089890352267967e-42,\n", + " 'Exterior2nd': 7.891304017791327e-41,\n", + " 'MasVnrType': 2.9479832665389148e-61,\n", + " 'MasVnrArea': 3.019275242084859e-80,\n", + " 'ExterQual': 2.7034189043674756e-192,\n", + " 'ExterCond': 4.180159538400305e-06,\n", + " 'Foundation': 4.157463082303885e-85,\n", + " 'BsmtQual': 3.2687659316175138e-183,\n", + " 'BsmtCond': 1.692626862971258e-08,\n", + " 'BsmtExposure': 5.724344667880617e-41,\n", + " 'BsmtFinType1': 1.929029875513618e-62,\n", + " 'BsmtFinSF1': 1.0213727645941967e-46,\n", + " 'BsmtFinType2': 0.021048762692804365,\n", + " 'BsmtUnfSF': 7.932120134626303e-13,\n", + " 'TotalBsmtSF': 2.2083254456346077e-145,\n", + " 'Heating': 0.002473827878914888,\n", + " 'HeatingQC': 2.0295271499167507e-62,\n", + " 'CentralAir': 3.3396741377559114e-18,\n", + " 'Electrical': 9.426298309197317e-16,\n", + " '_1stFlrSF': 1.4979097874823512e-147,\n", + " '_2ndFlrSF': 1.5058392150966874e-34,\n", + " 'GrLivArea': 4.4019696964122803e-218,\n", + " 'BsmtFullBath': 2.3232454159882026e-15,\n", + " 'FullBath': 1.9532215024769195e-125,\n", + " 'HalfBath': 4.7812069711283865e-25,\n", + " 'BedroomAbvGr': 1.2055786109175643e-10,\n", + " 'KitchenAbvGr': 6.803353238903545e-05,\n", + " 'KitchenQual': 7.207797634752796e-182,\n", + " 'TotRmsAbvGrd': 2.2183533266083552e-110,\n", + " 'Functional': 0.0012342972262940743,\n", + " 'Fireplaces': 5.785107942885739e-76,\n", + " 'FireplaceQu': 7.90173630716701e-103,\n", + " 'GarageType': 1.0658261450802592e-80,\n", + " 'GarageYrBlt': 4.1061923281445587e-91,\n", + " 'GarageFinish': 9.900255473029922e-108,\n", + " 'GarageCars': 6.995346991827569e-165,\n", + " 'GarageArea': 1.617669668252519e-151,\n", + " 'GarageQual': 1.870218396971908e-23,\n", + " 'GarageCond': 1.1222351417909253e-22,\n", + " 'PavedDrive': 2.8134604962488835e-16,\n", + " 'WoodDeckSF': 1.963043699407181e-34,\n", + " 'OpenPorchSF': 8.470965811353925e-32,\n", + " 'EnclosedPorch': 3.9911095702792166e-07,\n", + " 'ScreenPorch': 4.604602126747757e-05,\n", + " 'PoolArea': 0.0004801124771611024,\n", + " 'PoolQC': 8.554588306049216e-07,\n", + " 'SaleType': 7.253528527759462e-40,\n", + " 'SaleCondition': 2.2710946808736094e-41}" + ] + }, + "execution_count": 428, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sig_col_prob" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Will take all those variables in our matrix\n", + "so next step will be OLS method" + ] + }, + { + "cell_type": "code", + "execution_count": 429, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: SalePrice R-squared: 0.929\n", + "Model: OLS Adj. R-squared: 0.916\n", + "Method: Least Squares F-statistic: 69.26\n", + "Date: Wed, 09 Feb 2022 Prob (F-statistic): 0.00\n", + "Time: 22:07:50 Log-Likelihood: -16073.\n", + "No. Observations: 1413 AIC: 3.260e+04\n", + "Df Residuals: 1187 BIC: 3.378e+04\n", + "Df Model: 225 \n", + "Covariance Type: nonrobust \n", + "===============================================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "-----------------------------------------------------------------------------------------------\n", + "Intercept -9.814e+05 1.4e+05 -7.024 0.000 -1.26e+06 -7.07e+05\n", + "C(MSZoning)[T.FV] 3.742e+04 1.21e+04 3.091 0.002 1.37e+04 6.12e+04\n", + "C(MSZoning)[T.RH] 2.819e+04 1.2e+04 2.343 0.019 4579.932 5.18e+04\n", + "C(MSZoning)[T.RL] 3.004e+04 1.03e+04 2.915 0.004 9820.460 5.03e+04\n", + "C(MSZoning)[T.RM] 2.706e+04 9588.138 2.822 0.005 8248.474 4.59e+04\n", + "C(LotShape)[T.1] -2200.1829 9540.418 -0.231 0.818 -2.09e+04 1.65e+04\n", + "C(LotShape)[T.2] -6849.3731 8994.745 -0.761 0.447 -2.45e+04 1.08e+04\n", + "C(LotShape)[T.3] -5321.9323 9049.707 -0.588 0.557 -2.31e+04 1.24e+04\n", + "C(LandContour)[T.HLS] 8875.6057 5224.029 1.699 0.090 -1373.755 1.91e+04\n", + "C(LandContour)[T.Low] -7143.7490 6537.156 -1.093 0.275 -2e+04 5681.919\n", + "C(LandContour)[T.Lvl] 4574.3476 3773.285 1.212 0.226 -2828.703 1.2e+04\n", + "C(LotConfig)[T.CulDSac] 7984.4736 3255.886 2.452 0.014 1596.541 1.44e+04\n", + "C(LotConfig)[T.FR2] -7881.9487 4079.475 -1.932 0.054 -1.59e+04 121.837\n", + "C(LotConfig)[T.FR3] -1.478e+04 1.27e+04 -1.160 0.246 -3.98e+04 1.02e+04\n", + "C(LotConfig)[T.Inside] -1379.0496 1797.435 -0.767 0.443 -4905.554 2147.455\n", + "C(Neighborhood)[T.Blueste] 987.2761 1.92e+04 0.051 0.959 -3.68e+04 3.87e+04\n", + "C(Neighborhood)[T.BrDale] -373.0447 1.11e+04 -0.034 0.973 -2.22e+04 2.15e+04\n", + "C(Neighborhood)[T.BrkSide] -1831.8944 9586.521 -0.191 0.848 -2.06e+04 1.7e+04\n", + "C(Neighborhood)[T.ClearCr] -1.271e+04 9462.242 -1.344 0.179 -3.13e+04 5850.691\n", + "C(Neighborhood)[T.CollgCr] -8437.4333 7373.875 -1.144 0.253 -2.29e+04 6029.848\n", + "C(Neighborhood)[T.Crawfor] 1.623e+04 8698.840 1.865 0.062 -840.438 3.33e+04\n", + "C(Neighborhood)[T.Edwards] -1.785e+04 8163.026 -2.187 0.029 -3.39e+04 -1839.270\n", + "C(Neighborhood)[T.Gilbert] -8477.6664 7793.449 -1.088 0.277 -2.38e+04 6812.804\n", + "C(Neighborhood)[T.IDOTRR] -7822.0452 1.09e+04 -0.717 0.474 -2.92e+04 1.36e+04\n", + "C(Neighborhood)[T.MeadowV] -7152.3107 1.13e+04 -0.630 0.529 -2.94e+04 1.51e+04\n", + "C(Neighborhood)[T.Mitchel] -1.764e+04 8330.643 -2.118 0.034 -3.4e+04 -1297.379\n", + "C(Neighborhood)[T.NAmes] -1.539e+04 7926.908 -1.942 0.052 -3.09e+04 160.870\n", + "C(Neighborhood)[T.NPkVill] 1.235e+04 1.42e+04 0.867 0.386 -1.56e+04 4.03e+04\n", + "C(Neighborhood)[T.NWAmes] -1.602e+04 8125.454 -1.971 0.049 -3.2e+04 -76.225\n", + "C(Neighborhood)[T.NoRidge] 2.732e+04 8551.259 3.195 0.001 1.05e+04 4.41e+04\n", + "C(Neighborhood)[T.NridgHt] 1.867e+04 7622.734 2.449 0.014 3715.333 3.36e+04\n", + "C(Neighborhood)[T.OldTown] -1.15e+04 9770.899 -1.177 0.239 -3.07e+04 7669.256\n", + "C(Neighborhood)[T.SWISU] -5686.7763 9821.168 -0.579 0.563 -2.5e+04 1.36e+04\n", + "C(Neighborhood)[T.Sawyer] -8397.8147 8243.455 -1.019 0.309 -2.46e+04 7775.551\n", + "C(Neighborhood)[T.SawyerW] -2214.8204 7952.487 -0.279 0.781 -1.78e+04 1.34e+04\n", + "C(Neighborhood)[T.Somerst] 124.6180 9139.365 0.014 0.989 -1.78e+04 1.81e+04\n", + "C(Neighborhood)[T.StoneBr] 3.894e+04 8420.487 4.624 0.000 2.24e+04 5.55e+04\n", + "C(Neighborhood)[T.Timber] -1.205e+04 8255.331 -1.460 0.145 -2.82e+04 4144.479\n", + "C(Neighborhood)[T.Veenker] 2821.7828 1.06e+04 0.266 0.791 -1.8e+04 2.37e+04\n", + "C(Condition1)[T.Feedr] 6002.4176 5165.818 1.162 0.245 -4132.734 1.61e+04\n", + "C(Condition1)[T.Norm] 1.459e+04 4292.563 3.399 0.001 6166.996 2.3e+04\n", + "C(Condition1)[T.PosA] 9998.1984 1.01e+04 0.992 0.321 -9776.357 2.98e+04\n", + "C(Condition1)[T.PosN] 1.158e+04 7515.926 1.540 0.124 -3168.350 2.63e+04\n", + "C(Condition1)[T.RRAe] -1.358e+04 9602.918 -1.414 0.158 -3.24e+04 5263.447\n", + "C(Condition1)[T.RRAn] 1.253e+04 7022.796 1.784 0.075 -1248.493 2.63e+04\n", + "C(Condition1)[T.RRNe] 27.4000 1.76e+04 0.002 0.999 -3.46e+04 3.46e+04\n", + "C(Condition1)[T.RRNn] 3973.1493 1.29e+04 0.308 0.758 -2.14e+04 2.93e+04\n", + "C(Condition2)[T.Feedr] -4028.4922 2.42e+04 -0.167 0.868 -5.14e+04 4.34e+04\n", + "C(Condition2)[T.Norm] -5459.8097 2.1e+04 -0.260 0.795 -4.67e+04 3.58e+04\n", + "C(Condition2)[T.PosA] 4.422e+04 3.79e+04 1.168 0.243 -3.01e+04 1.19e+05\n", + "C(Condition2)[T.PosN] -2.309e+05 2.83e+04 -8.161 0.000 -2.86e+05 -1.75e+05\n", + "C(Condition2)[T.RRAe] -7.997e+04 4.42e+04 -1.810 0.071 -1.67e+05 6714.071\n", + "C(Condition2)[T.RRAn] -1.404e+04 3.23e+04 -0.435 0.664 -7.74e+04 4.93e+04\n", + "C(Condition2)[T.RRNn] 2049.8201 2.78e+04 0.074 0.941 -5.25e+04 5.66e+04\n", + "C(BldgType)[T.2fmCon] -2378.0050 1.26e+04 -0.189 0.850 -2.7e+04 2.22e+04\n", + "C(BldgType)[T.Duplex] -1.361e+04 7514.510 -1.811 0.070 -2.84e+04 1134.801\n", + "C(BldgType)[T.Twnhs] -1.957e+04 9991.219 -1.959 0.050 -3.92e+04 33.361\n", + "C(BldgType)[T.TwnhsE] -1.428e+04 9019.408 -1.583 0.114 -3.2e+04 3418.204\n", + "C(HouseStyle)[T.1.5Unf] 1.279e+04 8094.930 1.579 0.115 -3096.539 2.87e+04\n", + "C(HouseStyle)[T.1Story] 6220.4270 4537.597 1.371 0.171 -2682.178 1.51e+04\n", + "C(HouseStyle)[T.2.5Fin] -1.525e+04 1.26e+04 -1.215 0.225 -3.99e+04 9378.694\n", + "C(HouseStyle)[T.2.5Unf] -6876.6639 9358.825 -0.735 0.463 -2.52e+04 1.15e+04\n", + "C(HouseStyle)[T.2Story] -5046.3751 3612.626 -1.397 0.163 -1.21e+04 2041.470\n", + "C(HouseStyle)[T.SFoyer] 3522.7167 6630.304 0.531 0.595 -9485.705 1.65e+04\n", + "C(HouseStyle)[T.SLvl] 3883.3669 5674.855 0.684 0.494 -7250.498 1.5e+04\n", + "C(RoofStyle)[T.Gable] 1.198e+04 1.87e+04 0.639 0.523 -2.48e+04 4.88e+04\n", + "C(RoofStyle)[T.Gambrel] 1.364e+04 2.05e+04 0.666 0.505 -2.65e+04 5.38e+04\n", + "C(RoofStyle)[T.Hip] 1.261e+04 1.88e+04 0.671 0.503 -2.43e+04 4.95e+04\n", + "C(RoofStyle)[T.Mansard] 2.453e+04 2.18e+04 1.125 0.261 -1.82e+04 6.73e+04\n", + "C(RoofStyle)[T.Shed] 6.269e+04 3.4e+04 1.844 0.065 -4010.079 1.29e+05\n", + "C(RoofMatl)[T.CompShg] 5.66e+05 4.7e+04 12.045 0.000 4.74e+05 6.58e+05\n", + "C(RoofMatl)[T.Membran] 6.275e+05 5.77e+04 10.878 0.000 5.14e+05 7.41e+05\n", + "C(RoofMatl)[T.Metal] 5.943e+05 5.65e+04 10.513 0.000 4.83e+05 7.05e+05\n", + "C(RoofMatl)[T.Roll] 5.577e+05 5.37e+04 10.386 0.000 4.52e+05 6.63e+05\n", + "C(RoofMatl)[T.Tar&Grv] 5.637e+05 5.11e+04 11.033 0.000 4.63e+05 6.64e+05\n", + "C(RoofMatl)[T.WdShake] 5.568e+05 4.98e+04 11.183 0.000 4.59e+05 6.54e+05\n", + "C(RoofMatl)[T.WdShngl] 6.233e+05 4.79e+04 13.005 0.000 5.29e+05 7.17e+05\n", + "C(Exterior1st)[T.BrkComm] -3.659e+04 3.37e+04 -1.085 0.278 -1.03e+05 2.96e+04\n", + "C(Exterior1st)[T.BrkFace] 5662.9980 1.36e+04 0.417 0.677 -2.1e+04 3.23e+04\n", + "C(Exterior1st)[T.CBlock] -5610.5903 1.38e+04 -0.407 0.684 -3.27e+04 2.14e+04\n", + "C(Exterior1st)[T.CemntBd] -8681.0563 1.95e+04 -0.444 0.657 -4.7e+04 2.97e+04\n", + "C(Exterior1st)[T.HdBoard] -1.615e+04 1.37e+04 -1.176 0.240 -4.31e+04 1.08e+04\n", + "C(Exterior1st)[T.ImStucc] -3.599e+04 2.86e+04 -1.257 0.209 -9.21e+04 2.02e+04\n", + "C(Exterior1st)[T.MetalSd] -9428.2118 1.53e+04 -0.616 0.538 -3.94e+04 2.06e+04\n", + "C(Exterior1st)[T.Plywood] -1.742e+04 1.36e+04 -1.281 0.200 -4.41e+04 9254.197\n", + "C(Exterior1st)[T.Stone] -762.7865 2.69e+04 -0.028 0.977 -5.35e+04 5.2e+04\n", + "C(Exterior1st)[T.Stucco] -1.409e+04 1.5e+04 -0.942 0.347 -4.34e+04 1.53e+04\n", + "C(Exterior1st)[T.VinylSd] -1.647e+04 1.39e+04 -1.187 0.236 -4.37e+04 1.08e+04\n", + "C(Exterior1st)[T.Wd Sdng] -1.645e+04 1.32e+04 -1.248 0.212 -4.23e+04 9419.619\n", + "C(Exterior1st)[T.WdShing] -1.445e+04 1.42e+04 -1.020 0.308 -4.23e+04 1.34e+04\n", + "C(Exterior2nd)[T.AsphShn] 1.431e+04 2.28e+04 0.629 0.530 -3.03e+04 5.9e+04\n", + "C(Exterior2nd)[T.Brk Cmn] 7890.5080 2.08e+04 0.379 0.705 -3.29e+04 4.87e+04\n", + "C(Exterior2nd)[T.BrkFace] 6170.9966 1.41e+04 0.438 0.662 -2.15e+04 3.38e+04\n", + "C(Exterior2nd)[T.CBlock] -5610.5903 1.38e+04 -0.407 0.684 -3.27e+04 2.14e+04\n", + "C(Exterior2nd)[T.CmentBd] 9177.9617 1.93e+04 0.476 0.634 -2.87e+04 4.7e+04\n", + "C(Exterior2nd)[T.HdBoard] 1.073e+04 1.33e+04 0.809 0.419 -1.53e+04 3.68e+04\n", + "C(Exterior2nd)[T.ImStucc] 2.219e+04 1.51e+04 1.470 0.142 -7436.990 5.18e+04\n", + "C(Exterior2nd)[T.MetalSd] 8170.2654 1.49e+04 0.547 0.585 -2.11e+04 3.75e+04\n", + "C(Exterior2nd)[T.Other] -1.516e+04 2.77e+04 -0.547 0.585 -6.96e+04 3.92e+04\n", + "C(Exterior2nd)[T.Plywood] 8968.0944 1.29e+04 0.693 0.488 -1.64e+04 3.43e+04\n", + "C(Exterior2nd)[T.Stone] -1.226e+04 2.44e+04 -0.503 0.615 -6.01e+04 3.55e+04\n", + "C(Exterior2nd)[T.Stucco] 1.119e+04 1.44e+04 0.780 0.436 -1.7e+04 3.94e+04\n", + "C(Exterior2nd)[T.VinylSd] 1.455e+04 1.34e+04 1.088 0.277 -1.17e+04 4.08e+04\n", + "C(Exterior2nd)[T.Wd Sdng] 1.384e+04 1.28e+04 1.084 0.279 -1.12e+04 3.89e+04\n", + "C(Exterior2nd)[T.Wd Shng] 8768.5281 1.33e+04 0.662 0.508 -1.72e+04 3.48e+04\n", + "C(MasVnrType)[T.BrkFace] 6986.6411 6911.529 1.011 0.312 -6573.534 2.05e+04\n", + "C(MasVnrType)[T.None] 1.03e+04 6974.031 1.477 0.140 -3384.859 2.4e+04\n", + "C(MasVnrType)[T.Stone] 1.17e+04 7297.153 1.603 0.109 -2617.682 2.6e+04\n", + "C(ExterQual)[T.2] -5701.6040 9876.582 -0.577 0.564 -2.51e+04 1.37e+04\n", + "C(ExterQual)[T.3] -5251.7123 1.01e+04 -0.518 0.605 -2.52e+04 1.47e+04\n", + "C(ExterQual)[T.4] 1.422e+04 1.12e+04 1.266 0.206 -7807.374 3.62e+04\n", + "C(ExterCond)[T.1] -2532.1160 2.7e+04 -0.094 0.925 -5.54e+04 5.04e+04\n", + "C(ExterCond)[T.2] -7816.7501 2.65e+04 -0.294 0.768 -5.99e+04 4.43e+04\n", + "C(ExterCond)[T.3] -1.11e+04 2.67e+04 -0.416 0.677 -6.34e+04 4.12e+04\n", + "C(ExterCond)[T.4] -3062.6573 3.19e+04 -0.096 0.924 -6.57e+04 5.96e+04\n", + "C(Foundation)[T.CBlock] 4529.3984 3239.513 1.398 0.162 -1826.411 1.09e+04\n", + "C(Foundation)[T.PConc] 4435.5194 3501.266 1.267 0.205 -2433.841 1.13e+04\n", + "C(Foundation)[T.Stone] 1.073e+04 1.12e+04 0.958 0.338 -1.12e+04 3.27e+04\n", + "C(Foundation)[T.Wood] -1.831e+04 1.46e+04 -1.250 0.211 -4.7e+04 1.04e+04\n", + "C(BsmtQual)[T.3] -4066.0106 4993.560 -0.814 0.416 -1.39e+04 5731.176\n", + "C(BsmtQual)[T.4] -6270.0218 5523.768 -1.135 0.257 -1.71e+04 4567.415\n", + "C(BsmtQual)[T.5] 1.271e+04 6446.058 1.971 0.049 58.790 2.54e+04\n", + "C(BsmtCond)[T.2] -2.337e+04 2.9e+04 -0.807 0.420 -8.02e+04 3.35e+04\n", + "C(BsmtCond)[T.3] -2.16e+04 2.92e+04 -0.739 0.460 -7.89e+04 3.57e+04\n", + "C(BsmtCond)[T.4] -2.454e+04 2.94e+04 -0.834 0.404 -8.22e+04 3.32e+04\n", + "C(BsmtExposure)[T.1] 1595.2945 2.33e+04 0.068 0.945 -4.41e+04 4.73e+04\n", + "C(BsmtExposure)[T.2] 3813.1041 2.34e+04 0.163 0.871 -4.21e+04 4.98e+04\n", + "C(BsmtExposure)[T.3] 6946.5587 2.33e+04 0.298 0.766 -3.88e+04 5.27e+04\n", + "C(BsmtExposure)[T.4] 2.226e+04 2.34e+04 0.950 0.342 -2.37e+04 6.82e+04\n", + "C(BsmtFinType1)[T.BLQ] 2460.8109 2825.411 0.871 0.384 -3082.545 8004.167\n", + "C(BsmtFinType1)[T.GLQ] 4384.5898 2542.374 1.725 0.085 -603.458 9372.637\n", + "C(BsmtFinType1)[T.LwQ] -4945.9715 3795.368 -1.303 0.193 -1.24e+04 2500.405\n", + "C(BsmtFinType1)[T.Rec] -745.1046 3027.960 -0.246 0.806 -6685.855 5195.646\n", + "C(BsmtFinType1)[T.Unf] 1459.2519 2953.112 0.494 0.621 -4334.649 7253.153\n", + "C(BsmtFinType2)[T.BLQ] -1.24e+04 7662.603 -1.619 0.106 -2.74e+04 2630.013\n", + "C(BsmtFinType2)[T.GLQ] -4619.3928 9485.148 -0.487 0.626 -2.32e+04 1.4e+04\n", + "C(BsmtFinType2)[T.LwQ] -1.148e+04 7478.853 -1.535 0.125 -2.62e+04 3191.867\n", + "C(BsmtFinType2)[T.Rec] -9178.5479 7195.119 -1.276 0.202 -2.33e+04 4938.021\n", + "C(BsmtFinType2)[T.Unf] -6810.5418 7647.899 -0.891 0.373 -2.18e+04 8194.365\n", + "C(Heating)[T.GasW] -3114.4210 7180.743 -0.434 0.665 -1.72e+04 1.1e+04\n", + "C(Heating)[T.Grav] -2131.7181 1.2e+04 -0.178 0.859 -2.57e+04 2.14e+04\n", + "C(Heating)[T.OthW] -2.35e+04 1.88e+04 -1.248 0.212 -6.04e+04 1.35e+04\n", + "C(HeatingQC)[T.1] 4418.3780 2.74e+04 0.161 0.872 -4.94e+04 5.82e+04\n", + "C(HeatingQC)[T.2] 318.8335 2.71e+04 0.012 0.991 -5.28e+04 5.34e+04\n", + "C(HeatingQC)[T.3] -427.6546 2.71e+04 -0.016 0.987 -5.36e+04 5.27e+04\n", + "C(HeatingQC)[T.4] 3776.9433 2.71e+04 0.140 0.889 -4.93e+04 5.69e+04\n", + "C(CentralAir)[T.1] 1311.6619 4056.183 0.323 0.746 -6646.426 9269.750\n", + "C(Electrical)[T.FuseF] -2823.0719 6400.397 -0.441 0.659 -1.54e+04 9734.281\n", + "C(Electrical)[T.FuseP] -4673.2589 2.25e+04 -0.207 0.836 -4.89e+04 3.95e+04\n", + "C(Electrical)[T.Mix] -75.8922 4.45e+04 -0.002 0.999 -8.74e+04 8.72e+04\n", + "C(Electrical)[T.SBrkr] -1938.8937 3044.597 -0.637 0.524 -7912.284 4034.497\n", + "C(KitchenQual)[T.2] -1124.4568 4988.154 -0.225 0.822 -1.09e+04 8662.124\n", + "C(KitchenQual)[T.3] -2318.7271 5418.511 -0.428 0.669 -1.29e+04 8312.199\n", + "C(KitchenQual)[T.4] 2.108e+04 6320.944 3.335 0.001 8678.299 3.35e+04\n", + "C(Functional)[T.Maj2] -3356.9864 1.47e+04 -0.228 0.820 -3.23e+04 2.56e+04\n", + "C(Functional)[T.Min1] 6278.4208 8990.992 0.698 0.485 -1.14e+04 2.39e+04\n", + "C(Functional)[T.Min2] 7127.2254 8975.127 0.794 0.427 -1.05e+04 2.47e+04\n", + "C(Functional)[T.Mod] -3465.5594 1.16e+04 -0.299 0.765 -2.62e+04 1.92e+04\n", + "C(Functional)[T.Sev] -3.185e+04 3.06e+04 -1.042 0.298 -9.18e+04 2.81e+04\n", + "C(Functional)[T.Typ] 1.781e+04 7776.153 2.290 0.022 2550.092 3.31e+04\n", + "C(FireplaceQu)[T.1] 3342.8752 6567.474 0.509 0.611 -9542.276 1.62e+04\n", + "C(FireplaceQu)[T.2] -9022.9753 5395.127 -1.672 0.095 -1.96e+04 1562.073\n", + "C(FireplaceQu)[T.3] -4348.6171 3648.302 -1.192 0.234 -1.15e+04 2809.222\n", + "C(FireplaceQu)[T.4] -5603.0024 3490.552 -1.605 0.109 -1.25e+04 1245.337\n", + "C(FireplaceQu)[T.5] -7032.3220 6444.743 -1.091 0.275 -1.97e+04 5612.036\n", + "C(GarageType)[T.Attchd] 1.993e+04 1.09e+04 1.830 0.068 -1442.598 4.13e+04\n", + "C(GarageType)[T.Basment] 2.036e+04 1.26e+04 1.610 0.108 -4449.219 4.52e+04\n", + "C(GarageType)[T.BuiltIn] 2.045e+04 1.14e+04 1.797 0.073 -1880.630 4.28e+04\n", + "C(GarageType)[T.CarPort] 2.853e+04 1.6e+04 1.781 0.075 -2903.167 6e+04\n", + "C(GarageType)[T.Detchd] 2.266e+04 1.09e+04 2.074 0.038 1229.079 4.41e+04\n", + "C(GarageType)[T.NA] -3.952e+05 5.96e+04 -6.627 0.000 -5.12e+05 -2.78e+05\n", + "C(GarageFinish)[T.1] -1.95e+05 2.69e+04 -7.247 0.000 -2.48e+05 -1.42e+05\n", + "C(GarageFinish)[T.2] -1.966e+05 2.7e+04 -7.289 0.000 -2.5e+05 -1.44e+05\n", + "C(GarageFinish)[T.3] -1.946e+05 2.72e+04 -7.167 0.000 -2.48e+05 -1.41e+05\n", + "C(GarageQual)[T.1] -1.573e+05 2.66e+04 -5.916 0.000 -2.09e+05 -1.05e+05\n", + "C(GarageQual)[T.2] -1.384e+05 1.78e+04 -7.777 0.000 -1.73e+05 -1.03e+05\n", + "C(GarageQual)[T.3] -1.34e+05 1.81e+04 -7.415 0.000 -1.69e+05 -9.86e+04\n", + "C(GarageQual)[T.4] -1.307e+05 1.92e+04 -6.800 0.000 -1.68e+05 -9.3e+04\n", + "C(GarageQual)[T.5] -2.579e+04 2.97e+04 -0.869 0.385 -8.4e+04 3.24e+04\n", + "C(GarageCond)[T.1] -9.528e+04 2.1e+04 -4.538 0.000 -1.36e+05 -5.41e+04\n", + "C(GarageCond)[T.2] -9.724e+04 1.83e+04 -5.318 0.000 -1.33e+05 -6.14e+04\n", + "C(GarageCond)[T.3] -9.472e+04 1.8e+04 -5.267 0.000 -1.3e+05 -5.94e+04\n", + "C(GarageCond)[T.4] -1.021e+05 1.94e+04 -5.263 0.000 -1.4e+05 -6.41e+04\n", + "C(GarageCond)[T.5] -1.968e+05 3.25e+04 -6.063 0.000 -2.61e+05 -1.33e+05\n", + "C(PavedDrive)[T.P] -1461.1196 5737.942 -0.255 0.799 -1.27e+04 9796.518\n", + "C(PavedDrive)[T.Y] 675.0555 3664.489 0.184 0.854 -6514.541 7864.653\n", + "C(PoolQC)[T.1] -3.431e+05 1.05e+05 -3.261 0.001 -5.5e+05 -1.37e+05\n", + "C(PoolQC)[T.3] -3.259e+05 1.15e+05 -2.830 0.005 -5.52e+05 -1e+05\n", + "C(PoolQC)[T.4] -2.078e+05 9.68e+04 -2.148 0.032 -3.98e+05 -1.8e+04\n", + "C(SaleType)[T.CWD] 1.602e+04 1.31e+04 1.227 0.220 -9603.820 4.17e+04\n", + "C(SaleType)[T.Con] 2.5e+04 1.79e+04 1.400 0.162 -1e+04 6e+04\n", + "C(SaleType)[T.ConLD] 1.725e+04 1.04e+04 1.655 0.098 -3198.700 3.77e+04\n", + "C(SaleType)[T.ConLI] 2104.8763 1.18e+04 0.179 0.858 -2.1e+04 2.52e+04\n", + "C(SaleType)[T.ConLw] 2489.1970 1.24e+04 0.201 0.840 -2.18e+04 2.67e+04\n", + "C(SaleType)[T.New] 3.42e+04 1.55e+04 2.209 0.027 3818.106 6.46e+04\n", + "C(SaleType)[T.Oth] 9624.0524 1.46e+04 0.657 0.511 -1.91e+04 3.83e+04\n", + "C(SaleType)[T.WD] -248.7934 4306.908 -0.058 0.954 -8698.794 8201.207\n", + "C(SaleCondition)[T.AdjLand] 1.683e+04 1.6e+04 1.049 0.294 -1.46e+04 4.83e+04\n", + "C(SaleCondition)[T.Alloca] 5815.5561 1.05e+04 0.556 0.578 -1.47e+04 2.63e+04\n", + "C(SaleCondition)[T.Family] -932.0291 6181.968 -0.151 0.880 -1.31e+04 1.12e+04\n", + "C(SaleCondition)[T.Normal] 7039.6176 2946.502 2.389 0.017 1258.684 1.28e+04\n", + "C(SaleCondition)[T.Partial] -1.297e+04 1.49e+04 -0.871 0.384 -4.22e+04 1.63e+04\n", + "MSSubClass -67.6730 82.910 -0.816 0.415 -230.340 94.994\n", + "LotArea 0.4440 0.088 5.026 0.000 0.271 0.617\n", + "OverallQual 6478.6538 1037.638 6.244 0.000 4442.846 8514.462\n", + "OverallCond 5686.2059 894.487 6.357 0.000 3931.255 7441.157\n", + "YearBuilt 335.1348 82.245 4.075 0.000 173.773 496.497\n", + "YearRemodAdd 93.4559 57.144 1.635 0.102 -18.659 205.571\n", + "MasVnrArea 21.9689 5.858 3.750 0.000 10.475 33.463\n", + "BsmtFinSF1 6.7297 8.069 0.834 0.404 -9.101 22.561\n", + "BsmtUnfSF -10.0959 8.190 -1.233 0.218 -26.164 5.972\n", + "TotalBsmtSF 31.6512 9.287 3.408 0.001 13.430 49.872\n", + "_1stFlrSF 41.6425 19.662 2.118 0.034 3.067 80.218\n", + "_2ndFlrSF 59.0321 18.334 3.220 0.001 23.062 95.003\n", + "GrLivArea 3.9596 19.184 0.206 0.837 -33.679 41.598\n", + "BsmtFullBath 1569.8423 1903.132 0.825 0.410 -2164.036 5303.720\n", + "FullBath 4239.9643 2264.774 1.872 0.061 -203.442 8683.371\n", + "HalfBath 1228.6119 2144.829 0.573 0.567 -2979.466 5436.690\n", + "BedroomAbvGr -3451.0551 1410.704 -2.446 0.015 -6218.806 -683.304\n", + "KitchenAbvGr -1.218e+04 5835.297 -2.087 0.037 -2.36e+04 -729.912\n", + "TotRmsAbvGrd 2041.6346 978.559 2.086 0.037 121.736 3961.533\n", + "Fireplaces 5537.4548 2613.273 2.119 0.034 410.305 1.07e+04\n", + "GarageYrBlt -42.0536 60.677 -0.693 0.488 -161.100 76.993\n", + "GarageCars 4250.3124 2325.997 1.827 0.068 -313.211 8813.836\n", + "GarageArea 18.1000 8.021 2.256 0.024 2.362 33.838\n", + "WoodDeckSF 11.8701 5.959 1.992 0.047 0.178 23.562\n", + "OpenPorchSF -1.8788 11.691 -0.161 0.872 -24.816 21.058\n", + "EnclosedPorch 4.2685 12.829 0.333 0.739 -20.902 29.440\n", + "ScreenPorch 32.8250 12.523 2.621 0.009 8.256 57.394\n", + "PoolArea 588.1664 177.050 3.322 0.001 240.801 935.531\n", + "==============================================================================\n", + "Omnibus: 371.064 Durbin-Watson: 1.921\n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 12823.924\n", + "Skew: 0.513 Prob(JB): 0.00\n", + "Kurtosis: 17.723 Cond. No. 5.34e+18\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", + "[2] The smallest eigenvalue is 1.11e-26. This might indicate that there are\n", + "strong multicollinearity problems or that the design matrix is singular.\n" + ] + } + ], + "source": [ + "model = ols(string[:-2], data = df_trains).fit()\n", + "print(model.summary())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are at least 65 columns that seem to be relevant\\\n", + "\n", + "Creating correlation matrix with orinary values" + ] + }, + { + "cell_type": "code", + "execution_count": 430, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "OverallQual 0.786705\n", + "YearBuilt 0.518637\n", + "YearRemodAdd 0.500217\n", + "TotalBsmtSF 0.611020\n", + "_1stFlrSF 0.614622\n", + "GrLivArea 0.711060\n", + "FullBath 0.575542\n", + "TotRmsAbvGrd 0.545546\n", + "GarageYrBlt 0.502116\n", + "GarageCars 0.641828\n", + "GarageArea 0.621091\n", + "SalePrice 1.000000\n", + "Name: SalePrice, dtype: float64" + ] + }, + "execution_count": 430, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_trains_corr = df_trains.corr()[df_trains.corr()['SalePrice'].abs() > 0.50]['SalePrice']#en la correlacion se necesita que sea mayor a 50\n", + "df_trains_corr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PCA Analysis\n", + "\n", + "Matching variables with good correlation of SalePrice againts the ones that seem to be relevant" + ] + }, + { + "cell_type": "code", + "execution_count": 431, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "scaler = StandardScaler() #Se crea una instancia de la StandardScaler\n", + "df_dummies = pd.get_dummies(data=df_trains[df_categ]) # generamos dummies de las variables categoricas" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Getting ordinary columns with good correlation and add them to the dummies one\\\n", + "Checking which variables have good correlation" + ] + }, + { + "cell_type": "code", + "execution_count": 432, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SalePrice
ExterQual_2-0.587583
BsmtQual_50.551399
KitchenQual_2-0.520576
KitchenQual_40.501267
OverallQual0.786705
YearBuilt0.518637
YearRemodAdd0.500217
TotalBsmtSF0.611020
_1stFlrSF0.614622
GrLivArea0.711060
FullBath0.575542
TotRmsAbvGrd0.545546
GarageYrBlt0.502116
GarageCars0.641828
GarageArea0.621091
SalePrice1.000000
\n", + "
" + ], + "text/plain": [ + " SalePrice\n", + "ExterQual_2 -0.587583\n", + "BsmtQual_5 0.551399\n", + "KitchenQual_2 -0.520576\n", + "KitchenQual_4 0.501267\n", + "OverallQual 0.786705\n", + "YearBuilt 0.518637\n", + "YearRemodAdd 0.500217\n", + "TotalBsmtSF 0.611020\n", + "_1stFlrSF 0.614622\n", + "GrLivArea 0.711060\n", + "FullBath 0.575542\n", + "TotRmsAbvGrd 0.545546\n", + "GarageYrBlt 0.502116\n", + "GarageCars 0.641828\n", + "GarageArea 0.621091\n", + "SalePrice 1.000000" + ] + }, + "execution_count": 432, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#agregamos las variables ordinarias que tienen una correlación mayor al 50%\n", + "df_trains_corr_cols = pd.DataFrame(df_trains_corr)\n", + "df_trains_corr_cols.reset_index(inplace=True)\n", + "df_trains[df_trains_corr_cols['index']]\n", + "df_dummies = pd.concat([df_dummies,df_trains[df_trains_corr_cols['index']]], axis=1, join='inner') # concatenamos las variables ordinarias con los dummies de las variables categoricas con buena correlación\n", + "df_dummies_corr = df_dummies.corr()[df_dummies.corr()['SalePrice'].abs() > 0.50 ][['SalePrice']] # vemos que hay una alta correlación entre las variables de precio con algunos dummies\n", + "df_dummies_corr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Taking off SalePrice column and leave all columns that have been gotten above" + ] + }, + { + "cell_type": "code", + "execution_count": 433, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ExterQual_2BsmtQual_5KitchenQual_2KitchenQual_4OverallQualYearBuiltYearRemodAddTotalBsmtSF_1stFlrSFGrLivAreaFullBathTotRmsAbvGrdGarageYrBltGarageCarsGarageArea
000007200320038568561710282003.02548
11010619761976126212621262261976.02460
200007200120029209201786262001.02608
310007191519707569611717171998.03642
40000820002000114511452198292000.03836
................................................
145510106199920009539531647271999.02460
14561010619781988154220732073271978.02500
14570000719412006115211882340291941.01252
14581000519501996107810781078151950.01240
14590010519651965125612561256161965.01276
\n", + "

1413 rows × 15 columns

\n", + "
" + ], + "text/plain": [ + " ExterQual_2 BsmtQual_5 KitchenQual_2 KitchenQual_4 OverallQual \\\n", + "0 0 0 0 0 7 \n", + "1 1 0 1 0 6 \n", + "2 0 0 0 0 7 \n", + "3 1 0 0 0 7 \n", + "4 0 0 0 0 8 \n", + "... ... ... ... ... ... \n", + "1455 1 0 1 0 6 \n", + "1456 1 0 1 0 6 \n", + "1457 0 0 0 0 7 \n", + "1458 1 0 0 0 5 \n", + "1459 0 0 1 0 5 \n", + "\n", + " YearBuilt YearRemodAdd TotalBsmtSF _1stFlrSF GrLivArea FullBath \\\n", + "0 2003 2003 856 856 1710 2 \n", + "1 1976 1976 1262 1262 1262 2 \n", + "2 2001 2002 920 920 1786 2 \n", + "3 1915 1970 756 961 1717 1 \n", + "4 2000 2000 1145 1145 2198 2 \n", + "... ... ... ... ... ... ... \n", + "1455 1999 2000 953 953 1647 2 \n", + "1456 1978 1988 1542 2073 2073 2 \n", + "1457 1941 2006 1152 1188 2340 2 \n", + "1458 1950 1996 1078 1078 1078 1 \n", + "1459 1965 1965 1256 1256 1256 1 \n", + "\n", + " TotRmsAbvGrd GarageYrBlt GarageCars GarageArea \n", + "0 8 2003.0 2 548 \n", + "1 6 1976.0 2 460 \n", + "2 6 2001.0 2 608 \n", + "3 7 1998.0 3 642 \n", + "4 9 2000.0 3 836 \n", + "... ... ... ... ... \n", + "1455 7 1999.0 2 460 \n", + "1456 7 1978.0 2 500 \n", + "1457 9 1941.0 1 252 \n", + "1458 5 1950.0 1 240 \n", + "1459 6 1965.0 1 276 \n", + "\n", + "[1413 rows x 15 columns]" + ] + }, + "execution_count": 433, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#tomamos solo las columnas que esta correlacionadas con el precio y excluimos el precio de nuestra matriz\n", + "df_dummies_corr.reset_index(inplace=True)\n", + "df_dummies = df_dummies[df_dummies_corr['index']]\n", + "df_dummies.drop(columns='SalePrice', inplace=True)\n", + "df_dummies" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Starting Fit_transform method" + ] + }, + { + "cell_type": "code", + "execution_count": 434, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ExterQual_2BsmtQual_5KitchenQual_2KitchenQual_4OverallQualYearBuiltYearRemodAddTotalBsmtSF_1stFlrSFGrLivAreaFullBathTotRmsAbvGrdGarageYrBltGarageCarsGarageArea
0-1.267679-0.304643-0.997879-0.2744860.6342311.0391550.870141-0.559825-0.7964750.3608980.7984720.9154030.9976930.3041240.341502
10.788843-0.3046431.002125-0.274486-0.1027500.149859-0.4499670.4438850.257906-0.4899760.798472-0.321770-0.0274070.304124-0.071128
2-1.267679-0.304643-0.997879-0.2744860.6342310.9732810.821248-0.401604-0.6302670.5052430.798472-0.3217700.9217590.3041240.622840
30.788843-0.304643-0.997879-0.2744860.634231-1.859290-0.743324-0.807044-0.5237900.374193-1.0183410.2968170.8078591.6470230.782265
4-1.267679-0.304643-0.997879-0.2744861.3712120.9403440.7234620.154639-0.0459431.2877430.7984721.5339900.8837931.6470231.691926
................................................
14550.788843-0.3046431.002125-0.274486-0.1027500.9074070.723462-0.320022-0.5445660.2412440.7984720.2968170.8458260.304124-0.071128
14560.788843-0.3046431.002125-0.274486-0.1027500.2157330.1367481.1360992.3640691.0503340.7984720.2968170.0485260.3041240.116431
1457-1.267679-0.304643-0.997879-0.2744860.634231-1.0029311.0168190.1719440.0657281.5574400.7984721.533990-1.356240-1.038774-1.046434
14580.788843-0.304643-0.997879-0.274486-0.839731-0.7064990.527891-0.010998-0.219941-0.839442-1.018341-0.940357-1.014540-1.038774-1.102702
1459-1.267679-0.3046431.002125-0.274486-0.839731-0.212446-0.9877880.4290520.242324-0.501372-1.018341-0.321770-0.445040-1.038774-0.933899
\n", + "

1413 rows × 15 columns

\n", + "
" + ], + "text/plain": [ + " ExterQual_2 BsmtQual_5 KitchenQual_2 KitchenQual_4 OverallQual \\\n", + "0 -1.267679 -0.304643 -0.997879 -0.274486 0.634231 \n", + "1 0.788843 -0.304643 1.002125 -0.274486 -0.102750 \n", + "2 -1.267679 -0.304643 -0.997879 -0.274486 0.634231 \n", + "3 0.788843 -0.304643 -0.997879 -0.274486 0.634231 \n", + "4 -1.267679 -0.304643 -0.997879 -0.274486 1.371212 \n", + "... ... ... ... ... ... \n", + "1455 0.788843 -0.304643 1.002125 -0.274486 -0.102750 \n", + "1456 0.788843 -0.304643 1.002125 -0.274486 -0.102750 \n", + "1457 -1.267679 -0.304643 -0.997879 -0.274486 0.634231 \n", + "1458 0.788843 -0.304643 -0.997879 -0.274486 -0.839731 \n", + "1459 -1.267679 -0.304643 1.002125 -0.274486 -0.839731 \n", + "\n", + " YearBuilt YearRemodAdd TotalBsmtSF _1stFlrSF GrLivArea FullBath \\\n", + "0 1.039155 0.870141 -0.559825 -0.796475 0.360898 0.798472 \n", + "1 0.149859 -0.449967 0.443885 0.257906 -0.489976 0.798472 \n", + "2 0.973281 0.821248 -0.401604 -0.630267 0.505243 0.798472 \n", + "3 -1.859290 -0.743324 -0.807044 -0.523790 0.374193 -1.018341 \n", + "4 0.940344 0.723462 0.154639 -0.045943 1.287743 0.798472 \n", + "... ... ... ... ... ... ... \n", + "1455 0.907407 0.723462 -0.320022 -0.544566 0.241244 0.798472 \n", + "1456 0.215733 0.136748 1.136099 2.364069 1.050334 0.798472 \n", + "1457 -1.002931 1.016819 0.171944 0.065728 1.557440 0.798472 \n", + "1458 -0.706499 0.527891 -0.010998 -0.219941 -0.839442 -1.018341 \n", + "1459 -0.212446 -0.987788 0.429052 0.242324 -0.501372 -1.018341 \n", + "\n", + " TotRmsAbvGrd GarageYrBlt GarageCars GarageArea \n", + "0 0.915403 0.997693 0.304124 0.341502 \n", + "1 -0.321770 -0.027407 0.304124 -0.071128 \n", + "2 -0.321770 0.921759 0.304124 0.622840 \n", + "3 0.296817 0.807859 1.647023 0.782265 \n", + "4 1.533990 0.883793 1.647023 1.691926 \n", + "... ... ... ... ... \n", + "1455 0.296817 0.845826 0.304124 -0.071128 \n", + "1456 0.296817 0.048526 0.304124 0.116431 \n", + "1457 1.533990 -1.356240 -1.038774 -1.046434 \n", + "1458 -0.940357 -1.014540 -1.038774 -1.102702 \n", + "1459 -0.321770 -0.445040 -1.038774 -0.933899 \n", + "\n", + "[1413 rows x 15 columns]" + ] + }, + "execution_count": 434, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "#Generamos los dummies de las variables categoricas\n", + "stand_data = scaler.fit_transform(df_dummies)\n", + "#los centramos en 0 \n", + "df_stand_data = pd.DataFrame(stand_data, index = df_dummies.index, columns = df_dummies.columns)\n", + "df_stand_data\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### PCA process\n", + "Getting ratio where cumsum is at least 80%" + ] + }, + { + "cell_type": "code", + "execution_count": 435, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4" + ] + }, + "execution_count": 435, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pca = PCA() # creamos una instancia de #PCA\n", + "pca.fit(df_stand_data) #prepara los datos y genera la matriz pca\n", + "ratio_80 = 0 # variable para guardar cuantos valores se necesitan para llegar al 80%\n", + "for i, num in enumerate(pca.explained_variance_ratio_.cumsum()):\n", + " if num > 0.8:\n", + " ratio_80 = i\n", + " break\n", + "ratio_80" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "fit and transform process for columns" + ] + }, + { + "cell_type": "code", + "execution_count": 436, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
comp1comp2comp3comp4
01.791821-1.410191-1.6874280.385872
1-0.4263950.2822280.235221-1.106798
21.690532-1.712462-1.0985080.155258
3-0.0851840.276780-0.467497-0.804128
43.450850-0.302722-1.637284-0.804499
...............
14550.224075-0.588041-0.804471-0.780430
14561.0382711.7971590.250557-1.478160
14570.6866511.446927-2.2223812.059979
1458-2.046225-0.4304410.4687041.065916
1459-1.5669470.2401050.7197480.170087
\n", + "

1413 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " comp1 comp2 comp3 comp4\n", + "0 1.791821 -1.410191 -1.687428 0.385872\n", + "1 -0.426395 0.282228 0.235221 -1.106798\n", + "2 1.690532 -1.712462 -1.098508 0.155258\n", + "3 -0.085184 0.276780 -0.467497 -0.804128\n", + "4 3.450850 -0.302722 -1.637284 -0.804499\n", + "... ... ... ... ...\n", + "1455 0.224075 -0.588041 -0.804471 -0.780430\n", + "1456 1.038271 1.797159 0.250557 -1.478160\n", + "1457 0.686651 1.446927 -2.222381 2.059979\n", + "1458 -2.046225 -0.430441 0.468704 1.065916\n", + "1459 -1.566947 0.240105 0.719748 0.170087\n", + "\n", + "[1413 rows x 4 columns]" + ] + }, + "execution_count": 436, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pca_ratio_80 = PCA(n_components=ratio_80) # creas una instancia de PCA con el numero de componentes definido en ratio_80\n", + "pca_transform_ratio_80 = pca_ratio_80.fit_transform(df_stand_data) #normalizas y estandarizas los datos para el numero de componentes definido y se centran en 0\n", + "df_pca_transform = pd.DataFrame(pca_transform_ratio_80, index=df_stand_data.index,columns = ['comp'+str(x+1) for x in range(ratio_80)]) #creas el dataframe\n", + "df_pca_transform" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Creating component matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 437, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ExterQual_2BsmtQual_5KitchenQual_2KitchenQual_4OverallQualYearBuiltYearRemodAddTotalBsmtSF_1stFlrSFGrLivAreaFullBathTotRmsAbvGrdGarageYrBltGarageCarsGarageArea
comp1-0.2790570.208703-0.2477170.1808460.3193480.2664400.2480970.2569110.2491270.2591790.2654960.2027020.2734350.2942600.286011
comp20.2148560.0987820.1957980.161085-0.003311-0.374439-0.3027140.2458320.3363020.3937050.0596130.421164-0.368912-0.048746-0.001127
comp30.0857360.4006470.1365800.387254-0.0368080.060848-0.1141760.3546390.268414-0.325169-0.402604-0.4094790.0292730.0220700.103731
comp4-0.2217390.304470-0.3359630.4654680.143107-0.1158020.217171-0.167599-0.1768880.037730-0.0043450.057220-0.198921-0.396832-0.431747
\n", + "
" + ], + "text/plain": [ + " ExterQual_2 BsmtQual_5 KitchenQual_2 KitchenQual_4 OverallQual \\\n", + "comp1 -0.279057 0.208703 -0.247717 0.180846 0.319348 \n", + "comp2 0.214856 0.098782 0.195798 0.161085 -0.003311 \n", + "comp3 0.085736 0.400647 0.136580 0.387254 -0.036808 \n", + "comp4 -0.221739 0.304470 -0.335963 0.465468 0.143107 \n", + "\n", + " YearBuilt YearRemodAdd TotalBsmtSF _1stFlrSF GrLivArea FullBath \\\n", + "comp1 0.266440 0.248097 0.256911 0.249127 0.259179 0.265496 \n", + "comp2 -0.374439 -0.302714 0.245832 0.336302 0.393705 0.059613 \n", + "comp3 0.060848 -0.114176 0.354639 0.268414 -0.325169 -0.402604 \n", + "comp4 -0.115802 0.217171 -0.167599 -0.176888 0.037730 -0.004345 \n", + "\n", + " TotRmsAbvGrd GarageYrBlt GarageCars GarageArea \n", + "comp1 0.202702 0.273435 0.294260 0.286011 \n", + "comp2 0.421164 -0.368912 -0.048746 -0.001127 \n", + "comp3 -0.409479 0.029273 0.022070 0.103731 \n", + "comp4 0.057220 -0.198921 -0.396832 -0.431747 " + ] + }, + "execution_count": 437, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_matrix_componentes = pd.DataFrame(pca_ratio_80.components_,\n", + " columns = df_stand_data.columns,\n", + " index = df_pca_transform.columns) # se crea la matriz de componentes\n", + "df_matrix_componentes" + ] + }, + { + "cell_type": "code", + "execution_count": 438, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: SalePrice R-squared: 0.776\n", + "Model: OLS Adj. R-squared: 0.775\n", + "Method: Least Squares F-statistic: 1220.\n", + "Date: Wed, 09 Feb 2022 Prob (F-statistic): 0.00\n", + "Time: 22:07:51 Log-Likelihood: -16886.\n", + "No. Observations: 1413 AIC: 3.378e+04\n", + "Df Residuals: 1408 BIC: 3.381e+04\n", + "Df Model: 4 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const 1.825e+05 998.878 182.719 0.000 1.81e+05 1.84e+05\n", + "comp1 2.567e+04 377.124 68.078 0.000 2.49e+04 2.64e+04\n", + "comp2 1.013e+04 725.331 13.966 0.000 8707.014 1.16e+04\n", + "comp3 1716.6880 883.844 1.942 0.052 -17.105 3450.481\n", + "comp4 6721.9297 974.073 6.901 0.000 4811.140 8632.720\n", + "==============================================================================\n", + "Omnibus: 481.794 Durbin-Watson: 1.974\n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 47849.223\n", + "Skew: -0.576 Prob(JB): 0.00\n", + "Kurtosis: 31.485 Cond. No. 2.65\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + } + ], + "source": [ + "df_matrix = df_pca_transform\n", + "X = sm.add_constant(df_matrix)\n", + "linreg_statsm = sm.OLS(df_trains['SalePrice'], X).fit()\n", + "print(linreg_statsm.summary())" + ] + }, + { + "cell_type": "code", + "execution_count": 439, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 439, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LinearRegression\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, df_trains['SalePrice'], random_state=22, test_size=0.2)\n", + "linreg = LinearRegression()\n", + "linreg.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see after train 80% of our data, r2 value are almost the same\\\n", + "that make us believe that PCA process has been trained in a good way, ans there is no either overfitting or underfitting" + ] + }, + { + "cell_type": "code", + "execution_count": 440, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.7666848219460827\n", + "0.8177669809392756\n" + ] + } + ], + "source": [ + "print(linreg.score(X_train, y_train) )\n", + "print(linreg.score(X_test, y_test) )" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "a58b191ad2a25b19fea43b3067475f20bdb1c629ecc23182cd320980facf1bfc" + }, + "kernelspec": { + "display_name": "Python 3.9.7 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}