From 86ca65dd29930ee511e22733be47cacb2c17214f Mon Sep 17 00:00:00 2001 From: Caio Nunez Date: Wed, 8 Mar 2023 14:34:13 +0000 Subject: [PATCH] supervisedsklearn --- your-code/main.ipynb | 775 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 671 insertions(+), 104 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 8a9fa9e..059813a 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,11 +12,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "# Import your libraries:\n" + "# Import your libraries:\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "from sklearn.model_selection import train_test_split" ] }, { @@ -37,11 +42,95 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'data': array([[ 0.03807591, 0.05068012, 0.06169621, ..., -0.00259226,\n", + " 0.01990842, -0.01764613],\n", + " [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,\n", + " -0.06832974, -0.09220405],\n", + " [ 0.08529891, 0.05068012, 0.04445121, ..., -0.00259226,\n", + " 0.00286377, -0.02593034],\n", + " ...,\n", + " [ 0.04170844, 0.05068012, -0.01590626, ..., -0.01107952,\n", + " -0.04687948, 0.01549073],\n", + " [-0.04547248, -0.04464164, 0.03906215, ..., 0.02655962,\n", + " 0.04452837, -0.02593034],\n", + " [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,\n", + " -0.00421986, 0.00306441]]),\n", + " 'target': array([151., 75., 141., 206., 135., 97., 138., 63., 110., 310., 101.,\n", + " 69., 179., 185., 118., 171., 166., 144., 97., 168., 68., 49.,\n", + " 68., 245., 184., 202., 137., 85., 131., 283., 129., 59., 341.,\n", + " 87., 65., 102., 265., 276., 252., 90., 100., 55., 61., 92.,\n", + " 259., 53., 190., 142., 75., 142., 155., 225., 59., 104., 182.,\n", + " 128., 52., 37., 170., 170., 61., 144., 52., 128., 71., 163.,\n", + " 150., 97., 160., 178., 48., 270., 202., 111., 85., 42., 170.,\n", + " 200., 252., 113., 143., 51., 52., 210., 65., 141., 55., 134.,\n", + " 42., 111., 98., 164., 48., 96., 90., 162., 150., 279., 92.,\n", + " 83., 128., 102., 302., 198., 95., 53., 134., 144., 232., 81.,\n", + " 104., 59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,\n", + " 173., 180., 84., 121., 161., 99., 109., 115., 268., 274., 158.,\n", + " 107., 83., 103., 272., 85., 280., 336., 281., 118., 317., 235.,\n", + " 60., 174., 259., 178., 128., 96., 126., 288., 88., 292., 71.,\n", + " 197., 186., 25., 84., 96., 195., 53., 217., 172., 131., 214.,\n", + " 59., 70., 220., 268., 152., 47., 74., 295., 101., 151., 127.,\n", + " 237., 225., 81., 151., 107., 64., 138., 185., 265., 101., 137.,\n", + " 143., 141., 79., 292., 178., 91., 116., 86., 122., 72., 129.,\n", + " 142., 90., 158., 39., 196., 222., 277., 99., 196., 202., 155.,\n", + " 77., 191., 70., 73., 49., 65., 263., 248., 296., 214., 185.,\n", + " 78., 93., 252., 150., 77., 208., 77., 108., 160., 53., 220.,\n", + " 154., 259., 90., 246., 124., 67., 72., 257., 262., 275., 177.,\n", + " 71., 47., 187., 125., 78., 51., 258., 215., 303., 243., 91.,\n", + " 150., 310., 153., 346., 63., 89., 50., 39., 103., 308., 116.,\n", + " 145., 74., 45., 115., 264., 87., 202., 127., 182., 241., 66.,\n", + " 94., 283., 64., 102., 200., 265., 94., 230., 181., 156., 233.,\n", + " 60., 219., 80., 68., 332., 248., 84., 200., 55., 85., 89.,\n", + " 31., 129., 83., 275., 65., 198., 236., 253., 124., 44., 172.,\n", + " 114., 142., 109., 180., 144., 163., 147., 97., 220., 190., 109.,\n", + " 191., 122., 230., 242., 248., 249., 192., 131., 237., 78., 135.,\n", + " 244., 199., 270., 164., 72., 96., 306., 91., 214., 95., 216.,\n", + " 263., 178., 113., 200., 139., 139., 88., 148., 88., 243., 71.,\n", + " 77., 109., 272., 60., 54., 221., 90., 311., 281., 182., 321.,\n", + " 58., 262., 206., 233., 242., 123., 167., 63., 197., 71., 168.,\n", + " 140., 217., 121., 235., 245., 40., 52., 104., 132., 88., 69.,\n", + " 219., 72., 201., 110., 51., 277., 63., 118., 69., 273., 258.,\n", + " 43., 198., 242., 232., 175., 93., 168., 275., 293., 281., 72.,\n", + " 140., 189., 181., 209., 136., 261., 113., 131., 174., 257., 55.,\n", + " 84., 42., 146., 212., 233., 91., 111., 152., 120., 67., 310.,\n", + " 94., 183., 66., 173., 72., 49., 64., 48., 178., 104., 132.,\n", + " 220., 57.]),\n", + " 'frame': None,\n", + " 'DESCR': '.. _diabetes_dataset:\\n\\nDiabetes dataset\\n----------------\\n\\nTen baseline variables, age, sex, body mass index, average blood\\npressure, and six blood serum measurements were obtained for each of n =\\n442 diabetes patients, as well as the response of interest, a\\nquantitative measure of disease progression one year after baseline.\\n\\n**Data Set Characteristics:**\\n\\n :Number of Instances: 442\\n\\n :Number of Attributes: First 10 columns are numeric predictive values\\n\\n :Target: Column 11 is a quantitative measure of disease progression one year after baseline\\n\\n :Attribute Information:\\n - age age in years\\n - sex\\n - bmi body mass index\\n - bp average blood pressure\\n - s1 tc, total serum cholesterol\\n - s2 ldl, low-density lipoproteins\\n - s3 hdl, high-density lipoproteins\\n - s4 tch, total cholesterol / HDL\\n - s5 ltg, possibly log of serum triglycerides level\\n - s6 glu, blood sugar level\\n\\nNote: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\\n\\nSource URL:\\nhttps://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\\n\\nFor more information see:\\nBradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\\n(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)',\n", + " 'feature_names': ['age',\n", + " 'sex',\n", + " 'bmi',\n", + " 'bp',\n", + " 's1',\n", + " 's2',\n", + " 's3',\n", + " 's4',\n", + " 's5',\n", + " 's6'],\n", + " 'data_filename': 'diabetes_data.csv.gz',\n", + " 'target_filename': 'diabetes_target.csv.gz',\n", + " 'data_module': 'sklearn.datasets.data'}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "\n", + "from sklearn.datasets import load_diabetes\n", + "\n", + "diabetes = load_diabetes()\n", + "diabetes" ] }, { @@ -53,11 +142,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename', 'data_module'])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "\n", + "diabetes.keys()" ] }, { @@ -73,13 +175,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "scrolled": false }, - "outputs": [], - "source": [ - "# Your code here:\n" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".. _diabetes_dataset:\n", + "\n", + "Diabetes dataset\n", + "----------------\n", + "\n", + "Ten baseline variables, age, sex, body mass index, average blood\n", + "pressure, and six blood serum measurements were obtained for each of n =\n", + "442 diabetes patients, as well as the response of interest, a\n", + "quantitative measure of disease progression one year after baseline.\n", + "\n", + "**Data Set Characteristics:**\n", + "\n", + " :Number of Instances: 442\n", + "\n", + " :Number of Attributes: First 10 columns are numeric predictive values\n", + "\n", + " :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n", + "\n", + " :Attribute Information:\n", + " - age age in years\n", + " - sex\n", + " - bmi body mass index\n", + " - bp average blood pressure\n", + " - s1 tc, total serum cholesterol\n", + " - s2 ldl, low-density lipoproteins\n", + " - s3 hdl, high-density lipoproteins\n", + " - s4 tch, total cholesterol / HDL\n", + " - s5 ltg, possibly log of serum triglycerides level\n", + " - s6 glu, blood sugar level\n", + "\n", + "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n", + "\n", + "Source URL:\n", + "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n", + "\n", + "For more information see:\n", + "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n", + "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "\n", + "print(diabetes.DESCR)" ] }, { @@ -101,7 +250,16 @@ "metadata": {}, "outputs": [], "source": [ - "# Enter your answer here:\n" + "# Enter your answer here:\n", + "\n", + "\"\"\" \n", + "\n", + "1. There are 10 attributes. Numeric predictive values in the first 10 columns\n", + "2. Diabetes[data] is the set of attributes/features that we will use to predict diabetes[target], which is the actual\n", + "measure that we want to predict/evaluate with the model\n", + "3. There are 442 records in the data\n", + "\n", + "\"\"\"" ] }, { @@ -115,11 +273,46 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(442, 10)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "\n", + "diabetes[\"data\"].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(442,)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diabetes[\"target\"].shape\n", + "\n", + "## They match" ] }, { @@ -156,11 +349,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "\n", + "from sklearn.linear_model import LinearRegression" ] }, { @@ -172,11 +367,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "\n", + "diabetes_model = LinearRegression()" ] }, { @@ -190,11 +387,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "\n", + "features = diabetes[\"data\"]\n", + "target = diabetes[\"target\"]\n", + "\n", + "diabetes_data_train, diabetes_data_test, diabetes_target_train, diabetes_target_test = train_test_split(features, target, test_size = 0.20)" ] }, { @@ -206,11 +408,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "152.55472220850726\n", + "[ 17.54816161 -276.23079567 508.22184486 301.77211175 -340.77743688\n", + " 211.28056101 -179.85591454 -14.29758702 612.03383667 112.81057961]\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "\n", + "diabetes_model.fit(diabetes_data_train, diabetes_target_train)\n", + "print(diabetes_model.intercept_)\n", + "print(diabetes_model.coef_)" ] }, { @@ -231,11 +447,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "\n", + "pred = diabetes_model.predict(diabetes_data_test)" ] }, { @@ -247,11 +465,46 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 94.38335042 122.65198523 169.22244428 235.88003275 101.06403208\n", + " 144.19926493 189.58328191 166.18711692 166.31744813 257.98404066\n", + " 85.63857299 158.71997794 184.00440706 120.50358102 181.21935084\n", + " 193.19017561 171.24205217 104.23055219 59.98613892 187.56771049\n", + " 87.60721017 127.07332764 119.35812122 138.80008617 148.53422288\n", + " 162.60677441 61.56487255 123.86812274 93.48881537 181.11167804\n", + " 125.266734 251.93450268 158.41501333 209.23915245 247.30909879\n", + " 160.77681226 269.63146777 219.95202458 227.5199142 162.62826983\n", + " 88.42219866 65.51787117 247.7462447 105.70179774 123.26993379\n", + " 111.79889248 217.89107582 88.84501394 160.6125096 179.80776481\n", + " 168.12523594 136.01768364 126.84602099 222.51656033 137.25169426\n", + " 219.24454491 147.14584268 92.1163227 128.40451421 153.5556762\n", + " 95.64457192 80.10574998 163.84896896 160.81658332 70.97370019\n", + " 138.86415525 164.54804226 81.0605143 89.63805287 105.13815998\n", + " 105.53622297 128.77503404 225.89249705 71.32855345 131.12927936\n", + " 142.89122931 234.15161481 169.97723831 75.93654611 225.61601134\n", + " 188.15989464 104.78110262 147.88134061 126.65426 194.38772868\n", + " 55.20423087 125.00248632 114.92596064 179.0183498 ]\n", + "[125. 59. 151. 259. 97. 172. 129. 120. 206. 132. 114. 118. 229. 150.\n", + " 85. 222. 144. 87. 85. 170. 55. 139. 183. 103. 97. 151. 99. 168.\n", + " 115. 163. 78. 336. 237. 189. 341. 262. 303. 180. 173. 235. 53. 83.\n", + " 215. 102. 68. 61. 202. 54. 184. 140. 121. 59. 178. 275. 83. 279.\n", + " 115. 94. 148. 252. 137. 89. 252. 185. 128. 190. 258. 71. 170. 49.\n", + " 71. 135. 208. 59. 74. 202. 152. 141. 72. 295. 78. 128. 81. 127.\n", + " 164. 78. 63. 53. 66.]\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "\n", + "print(pred)\n", + "print(diabetes_target_test)" ] }, { @@ -267,7 +520,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Your explanation here:\n" + "# Your explanation here:\n", + "\n", + "## No. Model is not good enough to predict with precision, although already manages to follow\n", + "## partially the trends of the target_test\n" ] }, { @@ -351,11 +607,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "\n", + "auto = pd.read_csv(\"/Users/caionunez/Desktop/Ironhack/Week7/lab-supervised-learning-sklearn/auto-mpg.csv\")" ] }, { @@ -367,11 +625,125 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mpgcylindersdisplacementhorse_powerweightaccelerationmodel_yearcar_name
018.08307.0130.0350412.070\\t\"chevrolet chevelle malibu\"
115.08350.0165.0369311.570\\t\"buick skylark 320\"
218.08318.0150.0343611.070\\t\"plymouth satellite\"
316.08304.0150.0343312.070\\t\"amc rebel sst\"
417.08302.0140.0344910.570\\t\"ford torino\"
\n", + "
" + ], + "text/plain": [ + " mpg cylinders displacement horse_power weight acceleration \\\n", + "0 18.0 8 307.0 130.0 3504 12.0 \n", + "1 15.0 8 350.0 165.0 3693 11.5 \n", + "2 18.0 8 318.0 150.0 3436 11.0 \n", + "3 16.0 8 304.0 150.0 3433 12.0 \n", + "4 17.0 8 302.0 140.0 3449 10.5 \n", + "\n", + " model_year car_name \n", + "0 70 \\t\"chevrolet chevelle malibu\" \n", + "1 70 \\t\"buick skylark 320\" \n", + "2 70 \\t\"plymouth satellite\" \n", + "3 70 \\t\"amc rebel sst\" \n", + "4 70 \\t\"ford torino\" " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "\n", + "auto.head()" ] }, { @@ -383,11 +755,34 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mpg float64\n", + "cylinders int64\n", + "displacement float64\n", + "horse_power float64\n", + "weight int64\n", + "acceleration float64\n", + "model_year int64\n", + "car_name object\n", + "dtype: object" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "\n", + "auto.dtypes\n", + "\n", + "## Only object column is \"car name\" so we are good." ] }, { @@ -399,11 +794,23 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Newest model year 82\n", + "Oldest model year 70\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "\n", + "print(\"Newest model year\", auto[\"model_year\"].max())\n", + "print(\"Oldest model year\", auto[\"model_year\"].min())" ] }, { @@ -415,11 +822,61 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mpg 0\n", + "cylinders 0\n", + "displacement 0\n", + "horse_power 6\n", + "weight 0\n", + "acceleration 0\n", + "model_year 0\n", + "car_name 0\n", + "dtype: int64" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "\n", + "auto.isnull().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mpg 0\n", + "cylinders 0\n", + "displacement 0\n", + "horse_power 0\n", + "weight 0\n", + "acceleration 0\n", + "model_year 0\n", + "car_name 0\n", + "dtype: int64" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "auto.dropna(inplace=True)\n", + "auto.isnull().sum()" ] }, { @@ -431,11 +888,31 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4 199\n", + "8 103\n", + "6 83\n", + "3 4\n", + "5 3\n", + "Name: cylinders, dtype: int64" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "\n", + "auto[\"cylinders\"].value_counts()\n", + "\n", + "## 5 possible cylinders value" ] }, { @@ -451,11 +928,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "\n", + "#auto.drop(columns='car_name', inplace=True, axis=1)\n", + "\n", + "features = auto.drop(columns = [\"mpg\"])\n", + "target = auto[\"mpg\"]\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.20)" ] }, { @@ -469,11 +953,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "\n", + "auto_model = LinearRegression()\n", + "auto_model.fit(X_train, y_train)" ] }, { @@ -493,11 +991,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.7738352694130818\n", + "0.8151047252127355\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "\n", + "print(auto_model.score(X_test, y_test))\n", + "\n", + "from sklearn.metrics import r2_score\n", + "\n", + "y_pred = auto_model.predict(X_train)\n", + "print(r2_score(y_train, y_pred))" ] }, { @@ -513,11 +1027,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.7738352694130818\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "\n", + "## Already did that above I think, but anyway\n", + "\n", + "y_test_pred = auto_model.predict(X_test)\n", + "print(r2_score(y_test, y_test_pred))" ] }, { @@ -542,11 +1069,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "\n", + "X_train09, X_test09, y_train09, y_test09 = train_test_split(features, target, test_size = 0.10)" ] }, { @@ -558,11 +1087,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "\n", + "auto_model09 = LinearRegression()\n", + "auto_model09.fit(X_train09, y_train09)" ] }, { @@ -574,11 +1117,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.8137446885335688\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "\n", + "y_pred09 = auto_model09.predict(X_train09)\n", + "print(r2_score(y_train09, y_pred09))" ] }, { @@ -590,11 +1144,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.7452575439654096\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "\n", + "y_pred_test = auto_model09.predict(X_test09)\n", + "print(r2_score(y_test09, y_pred_test))\n", + "\n", + "## It didn't improve and I was expecting it to improve. Will try to review the code later on" ] }, { @@ -703,7 +1270,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -717,7 +1284,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.9.13" } }, "nbformat": 4,