From 2f270165c2ec59297a778b4766b4f5366fc48010 Mon Sep 17 00:00:00 2001
From: almsasantos <alms.asantos@gmail.com>
Date: Sat, 7 Mar 2020 13:59:58 +0100
Subject: [PATCH 1/4] create pull

---
 module-3/lab-supervised-learning-sklearn/your-code/main.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
index 0102ef94..244f2661 100644
--- a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
+++ b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
@@ -726,7 +726,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.6"
+   "version": "3.7.5"
   }
  },
  "nbformat": 4,

From 6d3e719413635ca0d71e00867a788353e1441516 Mon Sep 17 00:00:00 2001
From: almsasantos <alms.asantos@gmail.com>
Date: Sat, 7 Mar 2020 14:02:12 +0100
Subject: [PATCH 2/4] create pull

---
 module-3/lab-supervised-learning-sklearn/your-code/main.ipynb | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
index 244f2661..891e69c6 100644
--- a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
+++ b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
@@ -16,7 +16,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Import your libraries:\n"
+    "# Import your libraries:\n",
+    "import pandas as pd"
    ]
   },
   {

From adb1852aba6a9ad2e0b73883d9d06b21a78d6495 Mon Sep 17 00:00:00 2001
From: almsasantos <alms.asantos@gmail.com>
Date: Sun, 8 Mar 2020 20:22:54 +0100
Subject: [PATCH 3/4] still gotta do challenge 5 and bonus

---
 .../your-code/main.ipynb                      | 735 ++++++++++++++++--
 1 file changed, 658 insertions(+), 77 deletions(-)

diff --git a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
index 891e69c6..99cfc233 100644
--- a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
+++ b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
@@ -12,12 +12,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Import your libraries:\n",
-    "import pandas as pd"
+    "import pandas as pd\n",
+    "from sklearn.datasets import load_diabetes"
    ]
   },
   {
@@ -38,11 +39,98 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes = load_diabetes()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,\n",
+       "          0.01990842, -0.01764613],\n",
+       "        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,\n",
+       "         -0.06832974, -0.09220405],\n",
+       "        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,\n",
+       "          0.00286377, -0.02593034],\n",
+       "        ...,\n",
+       "        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,\n",
+       "         -0.04687948,  0.01549073],\n",
+       "        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,\n",
+       "          0.04452837, -0.02593034],\n",
+       "        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,\n",
+       "         -0.00421986,  0.00306441]]),\n",
+       " 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,\n",
+       "         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,\n",
+       "         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,\n",
+       "         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,\n",
+       "        259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,\n",
+       "        128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,\n",
+       "        150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,\n",
+       "        200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,\n",
+       "         42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,\n",
+       "         83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,\n",
+       "        104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,\n",
+       "        173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,\n",
+       "        107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,\n",
+       "         60., 174., 259., 178., 128.,  96., 126., 288.,  88., 292.,  71.,\n",
+       "        197., 186.,  25.,  84.,  96., 195.,  53., 217., 172., 131., 214.,\n",
+       "         59.,  70., 220., 268., 152.,  47.,  74., 295., 101., 151., 127.,\n",
+       "        237., 225.,  81., 151., 107.,  64., 138., 185., 265., 101., 137.,\n",
+       "        143., 141.,  79., 292., 178.,  91., 116.,  86., 122.,  72., 129.,\n",
+       "        142.,  90., 158.,  39., 196., 222., 277.,  99., 196., 202., 155.,\n",
+       "         77., 191.,  70.,  73.,  49.,  65., 263., 248., 296., 214., 185.,\n",
+       "         78.,  93., 252., 150.,  77., 208.,  77., 108., 160.,  53., 220.,\n",
+       "        154., 259.,  90., 246., 124.,  67.,  72., 257., 262., 275., 177.,\n",
+       "         71.,  47., 187., 125.,  78.,  51., 258., 215., 303., 243.,  91.,\n",
+       "        150., 310., 153., 346.,  63.,  89.,  50.,  39., 103., 308., 116.,\n",
+       "        145.,  74.,  45., 115., 264.,  87., 202., 127., 182., 241.,  66.,\n",
+       "         94., 283.,  64., 102., 200., 265.,  94., 230., 181., 156., 233.,\n",
+       "         60., 219.,  80.,  68., 332., 248.,  84., 200.,  55.,  85.,  89.,\n",
+       "         31., 129.,  83., 275.,  65., 198., 236., 253., 124.,  44., 172.,\n",
+       "        114., 142., 109., 180., 144., 163., 147.,  97., 220., 190., 109.,\n",
+       "        191., 122., 230., 242., 248., 249., 192., 131., 237.,  78., 135.,\n",
+       "        244., 199., 270., 164.,  72.,  96., 306.,  91., 214.,  95., 216.,\n",
+       "        263., 178., 113., 200., 139., 139.,  88., 148.,  88., 243.,  71.,\n",
+       "         77., 109., 272.,  60.,  54., 221.,  90., 311., 281., 182., 321.,\n",
+       "         58., 262., 206., 233., 242., 123., 167.,  63., 197.,  71., 168.,\n",
+       "        140., 217., 121., 235., 245.,  40.,  52., 104., 132.,  88.,  69.,\n",
+       "        219.,  72., 201., 110.,  51., 277.,  63., 118.,  69., 273., 258.,\n",
+       "         43., 198., 242., 232., 175.,  93., 168., 275., 293., 281.,  72.,\n",
+       "        140., 189., 181., 209., 136., 261., 113., 131., 174., 257.,  55.,\n",
+       "         84.,  42., 146., 212., 233.,  91., 111., 152., 120.,  67., 310.,\n",
+       "         94., 183.,  66., 173.,  72.,  49.,  64.,  48., 178., 104., 132.,\n",
+       "        220.,  57.]),\n",
+       " 'DESCR': '.. _diabetes_dataset:\\n\\nDiabetes dataset\\n----------------\\n\\nTen baseline variables, age, sex, body mass index, average blood\\npressure, and six blood serum measurements were obtained for each of n =\\n442 diabetes patients, as well as the response of interest, a\\nquantitative measure of disease progression one year after baseline.\\n\\n**Data Set Characteristics:**\\n\\n  :Number of Instances: 442\\n\\n  :Number of Attributes: First 10 columns are numeric predictive values\\n\\n  :Target: Column 11 is a quantitative measure of disease progression one year after baseline\\n\\n  :Attribute Information:\\n      - Age\\n      - Sex\\n      - Body mass index\\n      - Average blood pressure\\n      - S1\\n      - S2\\n      - S3\\n      - S4\\n      - S5\\n      - S6\\n\\nNote: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\\n\\nSource URL:\\nhttps://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\\n\\nFor more information see:\\nBradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\\n(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)',\n",
+       " 'feature_names': ['age',\n",
+       "  'sex',\n",
+       "  'bmi',\n",
+       "  'bp',\n",
+       "  's1',\n",
+       "  's2',\n",
+       "  's3',\n",
+       "  's4',\n",
+       "  's5',\n",
+       "  's6'],\n",
+       " 'data_filename': '/home/almsasantos/miniconda3/envs/data_env/lib/python3.7/site-packages/sklearn/datasets/data/diabetes_data.csv.gz',\n",
+       " 'target_filename': '/home/almsasantos/miniconda3/envs/data_env/lib/python3.7/site-packages/sklearn/datasets/data/diabetes_target.csv.gz'}"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "diabetes"
    ]
   },
   {
@@ -54,11 +142,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['data', 'target', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes.keys()"
    ]
   },
   {
@@ -74,13 +174,59 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {
     "scrolled": false
    },
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ".. _diabetes_dataset:\n",
+      "\n",
+      "Diabetes dataset\n",
+      "----------------\n",
+      "\n",
+      "Ten baseline variables, age, sex, body mass index, average blood\n",
+      "pressure, and six blood serum measurements were obtained for each of n =\n",
+      "442 diabetes patients, as well as the response of interest, a\n",
+      "quantitative measure of disease progression one year after baseline.\n",
+      "\n",
+      "**Data Set Characteristics:**\n",
+      "\n",
+      "  :Number of Instances: 442\n",
+      "\n",
+      "  :Number of Attributes: First 10 columns are numeric predictive values\n",
+      "\n",
+      "  :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n",
+      "\n",
+      "  :Attribute Information:\n",
+      "      - Age\n",
+      "      - Sex\n",
+      "      - Body mass index\n",
+      "      - Average blood pressure\n",
+      "      - S1\n",
+      "      - S2\n",
+      "      - S3\n",
+      "      - S4\n",
+      "      - S5\n",
+      "      - S6\n",
+      "\n",
+      "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n",
+      "\n",
+      "Source URL:\n",
+      "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n",
+      "\n",
+      "For more information see:\n",
+      "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n",
+      "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "print(diabetes.DESCR)"
    ]
   },
   {
@@ -98,11 +244,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Enter your answer here:\n"
+    "# Enter your answer here:\n",
+    "#There are 10 attributes on the data, the age of the patient, their sex, body mass index, average blood pressure and 6 measures of glucose.\n",
+    "#Based on the data we want to predict if\n",
+    "#There are 442 records in the data"
    ]
   },
   {
@@ -116,11 +265,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(442, 10)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes.data.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(442,)"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "diabetes.target.shape"
    ]
   },
   {
@@ -157,11 +338,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "X = diabetes['data']\n",
+    "y = diabetes['target']"
    ]
   },
   {
@@ -173,11 +357,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes_model = LinearRegression()"
    ]
   },
   {
@@ -191,11 +376,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "diabetes_data_train, diabetes_data_test, diabetes_target_train, diabetes_target_test = train_test_split(X, y, test_size=0.20)"
    ]
   },
   {
@@ -207,11 +394,65 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes_model.fit(diabetes_data_train, diabetes_target_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "152.44583955694154"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "diabetes_model.intercept_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ -89.45281236, -229.06384997,  518.83510703,  299.72996436,\n",
+       "       -845.23992693,  549.2794504 ,  120.58292975,  173.00861293,\n",
+       "        830.53440923,   44.73346357])"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "diabetes_model.coef_"
    ]
   },
   {
@@ -232,11 +473,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([138.89214536,  69.12661733, 121.94914756, 113.19129322,\n",
+       "       156.99751524, 160.26669269,  80.08848189,  61.30710601,\n",
+       "        84.73618699, 107.89958299, 151.35937186,  60.64696835,\n",
+       "       167.87414486,  79.27804032, 179.04698731, 233.58519952,\n",
+       "       112.96390722, 133.42174259, 257.65542174,  88.16482361,\n",
+       "       120.93394734, 103.8417481 , 103.57188763, 149.68285436,\n",
+       "       133.42796845, 124.7837086 , 140.57513167, 158.30971152,\n",
+       "       159.80344786,  77.79931922, 120.09548695, 168.00783093,\n",
+       "       123.37939875, 199.45131627,  90.26752494, 195.23115095,\n",
+       "       135.44962218, 224.249665  ,  71.97282119, 164.8514765 ,\n",
+       "       265.94969922, 205.21673439, 141.45721116, 250.05254518,\n",
+       "       183.47955904, 212.41379545,  75.20164627,  87.75472013,\n",
+       "       222.42848036, 127.05644988,  87.78706082, 176.76840359,\n",
+       "       197.17051518, 126.35877742, 124.01929452,  97.6539756 ,\n",
+       "        75.06391055, 196.70493044,  91.77547822, 241.22160918,\n",
+       "       107.68270556, 132.26875559, 228.51409243, 192.45648231,\n",
+       "       141.97675603, 148.40867286, 176.19876293,  95.03557368,\n",
+       "        48.0875041 , 116.74774667, 123.65244898, 211.27805279,\n",
+       "       112.4169273 , 119.835268  ,  66.47694782,  99.19859547,\n",
+       "       104.00187161,  50.95801923, 204.56926326, 120.06261764,\n",
+       "       249.14681134, 114.46870686, 108.36230907, 129.60594826,\n",
+       "       107.72571814, 126.7724346 , 152.55197824, 184.0222398 ,\n",
+       "       178.90568195])"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "y_pred = diabetes_model.predict(diabetes_data_test)\n",
+    "y_pred"
    ]
   },
   {
@@ -248,11 +524,55 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ 59.  39.  97.  90. 178. 276.  55.  70. 114.  63.  61.  77. 277.  77.\n",
+      "  85.  99. 127. 170. 341.  84.  59. 128. 199. 118. 150. 191. 182.  94.\n",
+      "  85.  51.  83. 180. 160. 272.  71. 178. 230. 261. 128. 131. 308. 288.\n",
+      " 168. 245. 139. 163. 138.  96. 192. 144.  91. 217. 293.  49. 113.  49.\n",
+      "  65. 142.  51. 259.  71. 162. 246.  78.  83. 190. 174.  81. 116.  68.\n",
+      "  53. 221. 160.  96.  96. 101. 104.  63.  68.  64. 215. 200.  94.  40.\n",
+      " 182.  84. 155. 175. 283.]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "print(diabetes_target_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "from sklearn.metrics import mean_squared_error"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3072.9763342489605"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mean_squared_error(y_true=diabetes_target_test, y_pred=y_pred)"
    ]
   },
   {
@@ -264,11 +584,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your explanation here:\n"
+    "# Your explanation here:\n",
+    "#No"
    ]
   },
   {
@@ -303,7 +624,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -327,7 +648,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -352,11 +673,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto = pd.read_csv('../auto-mpg.csv')"
    ]
   },
   {
@@ -368,11 +690,124 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mpg</th>\n",
+       "      <th>cylinders</th>\n",
+       "      <th>displacement</th>\n",
+       "      <th>horse_power</th>\n",
+       "      <th>weight</th>\n",
+       "      <th>acceleration</th>\n",
+       "      <th>model_year</th>\n",
+       "      <th>car_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>130.0</td>\n",
+       "      <td>3504</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"chevrolet chevelle malibu\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>15.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>350.0</td>\n",
+       "      <td>165.0</td>\n",
+       "      <td>3693</td>\n",
+       "      <td>11.5</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"buick skylark 320\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>318.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3436</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"plymouth satellite\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>16.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>304.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3433</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"amc rebel sst\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>17.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>302.0</td>\n",
+       "      <td>140.0</td>\n",
+       "      <td>3449</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"ford torino\"</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    mpg  cylinders  displacement  horse_power  weight  acceleration  \\\n",
+       "0  18.0          8         307.0        130.0    3504          12.0   \n",
+       "1  15.0          8         350.0        165.0    3693          11.5   \n",
+       "2  18.0          8         318.0        150.0    3436          11.0   \n",
+       "3  16.0          8         304.0        150.0    3433          12.0   \n",
+       "4  17.0          8         302.0        140.0    3449          10.5   \n",
+       "\n",
+       "   model_year                       car_name  \n",
+       "0          70  \\t\"chevrolet chevelle malibu\"  \n",
+       "1          70          \\t\"buick skylark 320\"  \n",
+       "2          70         \\t\"plymouth satellite\"  \n",
+       "3          70              \\t\"amc rebel sst\"  \n",
+       "4          70                \\t\"ford torino\"  "
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.head()"
    ]
   },
   {
@@ -384,11 +819,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mpg             float64\n",
+       "cylinders         int64\n",
+       "displacement    float64\n",
+       "horse_power     float64\n",
+       "weight            int64\n",
+       "acceleration    float64\n",
+       "model_year        int64\n",
+       "car_name         object\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.dtypes"
    ]
   },
   {
@@ -400,11 +855,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The newest model year is 70 and the oldest model year is 82\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "newest_model = auto['model_year'].sort_values()[0]\n",
+    "oldest_model = auto['model_year'].sort_values(ascending=False)[381]\n",
+    "print(f'The newest model year is {newest_model} and the oldest model year is {oldest_model}')"
    ]
   },
   {
@@ -416,11 +882,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mpg             0\n",
+       "cylinders       0\n",
+       "displacement    0\n",
+       "horse_power     6\n",
+       "weight          0\n",
+       "acceleration    0\n",
+       "model_year      0\n",
+       "car_name        0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.isna().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "auto.dropna(axis=0, inplace=True)"
    ]
   },
   {
@@ -432,11 +927,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 28,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "There are 5 possible values of cylinders\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto['cylinders'].value_counts()\n",
+    "print(f'There are {len(auto.cylinders.unique())} possible values of cylinders')"
    ]
   },
   {
@@ -452,11 +957,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto.drop('car_name', axis=1, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = auto.drop('mpg', axis=1)\n",
+    "y = auto['mpg'].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
    ]
   },
   {
@@ -470,11 +995,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 32,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto_model = LinearRegression()\n",
+    "auto_model.fit(X_train, y_train)"
    ]
   },
   {
@@ -503,11 +1041,41 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "y_pred = auto_model.predict(X_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import r2_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8013399085677433"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "r2_score(y_train, y_pred)"
    ]
   },
   {
@@ -523,11 +1091,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 36,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8272654354467638"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "y_test_pred = auto_model.predict(X_test)\n",
+    "r2_score(y_test, y_test_pred)"
    ]
   },
   {
@@ -552,7 +1133,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -568,7 +1149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -584,7 +1165,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -600,7 +1181,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -620,7 +1201,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -636,7 +1217,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -652,7 +1233,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -670,7 +1251,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -686,7 +1267,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -713,9 +1294,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python [conda env:data_env]",
    "language": "python",
-   "name": "python3"
+   "name": "conda-env-data_env-py"
   },
   "language_info": {
    "codemirror_mode": {

From 7cd83d1c1761dec35cbb1f425d808d99a0edd5b5 Mon Sep 17 00:00:00 2001
From: almsasantos <alms.asantos@gmail.com>
Date: Mon, 16 Mar 2020 12:28:30 +0100
Subject: [PATCH 4/4] everything done, except bonus 1

---
 .../your-code/main.ipynb                      | 307 +++++++++++++++---
 1 file changed, 253 insertions(+), 54 deletions(-)

diff --git a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
index 99cfc233..9b815120 100644
--- a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
+++ b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
@@ -421,7 +421,7 @@
     {
      "data": {
       "text/plain": [
-       "152.44583955694154"
+       "151.62325584057453"
       ]
      },
      "execution_count": 13,
@@ -441,9 +441,9 @@
     {
      "data": {
       "text/plain": [
-       "array([ -89.45281236, -229.06384997,  518.83510703,  299.72996436,\n",
-       "       -845.23992693,  549.2794504 ,  120.58292975,  173.00861293,\n",
-       "        830.53440923,   44.73346357])"
+       "array([ -61.60664965, -260.44060769,  544.72083274,  280.13157371,\n",
+       "       -607.34982048,  288.11490872,   14.1194967 ,  166.80860998,\n",
+       "        707.22691646,   70.97826776])"
       ]
      },
      "execution_count": 14,
@@ -479,29 +479,29 @@
     {
      "data": {
       "text/plain": [
-       "array([138.89214536,  69.12661733, 121.94914756, 113.19129322,\n",
-       "       156.99751524, 160.26669269,  80.08848189,  61.30710601,\n",
-       "        84.73618699, 107.89958299, 151.35937186,  60.64696835,\n",
-       "       167.87414486,  79.27804032, 179.04698731, 233.58519952,\n",
-       "       112.96390722, 133.42174259, 257.65542174,  88.16482361,\n",
-       "       120.93394734, 103.8417481 , 103.57188763, 149.68285436,\n",
-       "       133.42796845, 124.7837086 , 140.57513167, 158.30971152,\n",
-       "       159.80344786,  77.79931922, 120.09548695, 168.00783093,\n",
-       "       123.37939875, 199.45131627,  90.26752494, 195.23115095,\n",
-       "       135.44962218, 224.249665  ,  71.97282119, 164.8514765 ,\n",
-       "       265.94969922, 205.21673439, 141.45721116, 250.05254518,\n",
-       "       183.47955904, 212.41379545,  75.20164627,  87.75472013,\n",
-       "       222.42848036, 127.05644988,  87.78706082, 176.76840359,\n",
-       "       197.17051518, 126.35877742, 124.01929452,  97.6539756 ,\n",
-       "        75.06391055, 196.70493044,  91.77547822, 241.22160918,\n",
-       "       107.68270556, 132.26875559, 228.51409243, 192.45648231,\n",
-       "       141.97675603, 148.40867286, 176.19876293,  95.03557368,\n",
-       "        48.0875041 , 116.74774667, 123.65244898, 211.27805279,\n",
-       "       112.4169273 , 119.835268  ,  66.47694782,  99.19859547,\n",
-       "       104.00187161,  50.95801923, 204.56926326, 120.06261764,\n",
-       "       249.14681134, 114.46870686, 108.36230907, 129.60594826,\n",
-       "       107.72571814, 126.7724346 , 152.55197824, 184.0222398 ,\n",
-       "       178.90568195])"
+       "array([ 66.37935132,  88.84819977,  85.93016773, 137.93624351,\n",
+       "       249.69856319, 117.97000149, 166.41024337, 231.14148398,\n",
+       "       156.65895101,  81.90045079, 157.19526302, 169.45360892,\n",
+       "       214.00558451, 296.761621  , 195.85525992, 135.67550785,\n",
+       "       112.28077343, 124.43548197,  94.01643575, 118.38802303,\n",
+       "       174.33980063, 177.12239007, 212.09002138, 114.41655502,\n",
+       "       162.67844191, 184.5950349 , 215.83846784,  96.60362119,\n",
+       "       144.99444982, 225.88063695, 159.49664665, 171.13016093,\n",
+       "       215.06142677, 110.25870676, 164.84506098,  65.54869156,\n",
+       "       191.56656633,  94.08292533, 162.08119093, 125.11978521,\n",
+       "        91.01553723, 107.16473123, 170.56909167, 199.19054466,\n",
+       "       146.64258834, 254.62508693, 235.26123914,  73.48024846,\n",
+       "        97.24764758, 122.52878271, 252.34718987, 110.97011029,\n",
+       "       232.78000165, 144.44212693, 230.84327562,  65.29700824,\n",
+       "        83.67883215, 105.44311055, 129.151523  , 281.66828128,\n",
+       "       123.54115313,  61.87309155, 204.42153502, 196.03498841,\n",
+       "       229.32866686, 178.88730863, 191.9882965 , 129.97907864,\n",
+       "       170.26795101, 180.77290578, 183.27743198, 178.26576001,\n",
+       "        62.61083204, 156.70456891,  57.53442299,  95.60119443,\n",
+       "       127.14478011, 132.35303867,  96.57270416, 188.04159405,\n",
+       "       149.048561  ,  98.43510818,  93.08321169,  79.50374091,\n",
+       "       219.87684007, 225.05178691, 221.80598641, 156.20247408,\n",
+       "       104.23131512])"
       ]
      },
      "execution_count": 15,
@@ -531,13 +531,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[ 59.  39.  97.  90. 178. 276.  55.  70. 114.  63.  61.  77. 277.  77.\n",
-      "  85.  99. 127. 170. 341.  84.  59. 128. 199. 118. 150. 191. 182.  94.\n",
-      "  85.  51.  83. 180. 160. 272.  71. 178. 230. 261. 128. 131. 308. 288.\n",
-      " 168. 245. 139. 163. 138.  96. 192. 144.  91. 217. 293.  49. 113.  49.\n",
-      "  65. 142.  51. 259.  71. 162. 246.  78.  83. 190. 174.  81. 116.  68.\n",
-      "  53. 221. 160.  96.  96. 101. 104.  63.  68.  64. 215. 200.  94.  40.\n",
-      " 182.  84. 155. 175. 283.]\n"
+      "[ 43. 158. 181. 140. 243.  64.  77. 246. 154.  72. 196. 180.  52. 258.\n",
+      "  78. 142.  97.  68.  84.  60. 144. 147. 166.  53. 138. 257. 220. 118.\n",
+      " 197. 268. 127. 171. 180.  90. 216.  96. 191.  74. 245. 214.  71. 129.\n",
+      " 184. 293. 103. 281. 317.  89. 170. 145. 336. 253. 321.  93. 281.  75.\n",
+      "  42.  69.  49. 230.  44.  52. 151. 186. 152. 263. 161. 178. 217. 283.\n",
+      " 232. 107.  70.  94.  39. 108. 170. 230.  81. 202. 134. 101.  69.  65.\n",
+      " 173. 225. 295. 178. 125.]\n"
      ]
     }
    ],
@@ -563,7 +563,7 @@
     {
      "data": {
       "text/plain": [
-       "3072.9763342489605"
+       "2976.748095762798"
       ]
      },
      "execution_count": 18,
@@ -1066,7 +1066,7 @@
     {
      "data": {
       "text/plain": [
-       "0.8013399085677433"
+       "0.8236124478526281"
       ]
      },
      "execution_count": 35,
@@ -1097,7 +1097,7 @@
     {
      "data": {
       "text/plain": [
-       "0.8272654354467638"
+       "0.7436589879920421"
       ]
      },
      "execution_count": 36,
@@ -1137,7 +1137,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "X_train09, X_test09, y_train09, y_test09 = train_test_split(X, y, test_size=0.1)"
    ]
   },
   {
@@ -1153,7 +1154,28 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto_model09 = LinearRegression()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "auto_model09.fit(X_train09, y_train09)"
    ]
   },
   {
@@ -1165,11 +1187,53 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "y_pred_train09 = auto_model09.predict(X_train09)\n",
+    "y_pred_test09 = auto_model09.predict(X_test09)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8176093617328014"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "r2_score(y_train09, y_pred_train09)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7260025114610873"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "r2_score(y_test09, y_pred_test09)"
    ]
   },
   {
@@ -1181,11 +1245,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 43,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The r squared score for the new training data is 0.8176093617328014 and the r squared score for the new testing data is 0.7260025114610873\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "print(f'The r squared score for the new training data is {r2_score(y_train09, y_pred_train09)} and the r squared score for the new testing data is {r2_score(y_test09, y_pred_test09)}')"
    ]
   },
   {
@@ -1201,7 +1274,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1217,11 +1290,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto_model = LinearRegression()\n",
+    "selector = RFE(estimator = auto_model, n_features_to_select=3)"
    ]
   },
   {
@@ -1233,11 +1308,45 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 46,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RFE(estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
+       "                               normalize=False),\n",
+       "    n_features_to_select=3, step=1, verbose=0)"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "selector.fit(X, y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1, 2, 4, 3, 1, 1])"
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "selector.ranking_"
    ]
   },
   {
@@ -1251,11 +1360,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 48,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "X = auto.drop(['mpg', 'displacement', 'horse_power', 'weight'], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train_reduced, X_test_reduced, y_train_reduced, y_test_reduced = train_test_split(X, y, train_size=0.8)"
    ]
   },
   {
@@ -1267,11 +1386,91 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here: \n",
+    "auto_model_reduced = LinearRegression()\n",
+    "auto_model_reduced.fit(X_train_reduced, y_train_reduced)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here: \n"
+    "y_pred_train_reduced = auto_model_reduced.predict(X_train_reduced)\n",
+    "y_pred_test_reduced = auto_model_reduced.predict(X_test_reduced)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7190692512547585"
+      ]
+     },
+     "execution_count": 52,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "r2_score(y_train_reduced, y_pred_train_reduced)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.6945400439388549"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "r2_score(y_test_reduced, y_pred_test_reduced)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The r squared score for the new training data is 0.7190692512547585 and the r squared score for the new testing data is 0.6945400439388549\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f'The r squared score for the new training data is {r2_score(y_train_reduced, y_pred_train_reduced)} and the r squared score for the new testing data is {r2_score(y_test_reduced, y_pred_test_reduced)}')"
    ]
   },
   {