From 17b724e7c8287ff2457066f4a867b5c0d39282f7 Mon Sep 17 00:00:00 2001
From: BORJA <borjauria@MacBook-Pro-de-BORJA.local>
Date: Thu, 23 Jan 2020 21:36:32 +0100
Subject: [PATCH 1/2] Modifying gitignore

---
 module-1/.gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/module-1/.gitignore b/module-1/.gitignore
index b2e5ef3d..1fb4aecd 100644
--- a/module-1/.gitignore
+++ b/module-1/.gitignore
@@ -1,2 +1,3 @@
 DS_Store
-.ipynb_checkpoints
\ No newline at end of file
+.ipynb_checkpoints
+.bash_profile

From 718dd3e9aa507210734d933ef483c74ab5aadf61 Mon Sep 17 00:00:00 2001
From: BORJA <borjauria@MacBook-Pro-de-BORJA.local>
Date: Tue, 7 Apr 2020 14:54:58 +0200
Subject: [PATCH 2/2] The lab is done and checked without bonus

---
 .../your-code/main.ipynb                      | 821 ++++++++++++++++--
 1 file changed, 726 insertions(+), 95 deletions(-)

diff --git a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
index 0102ef94..7387c37e 100644
--- a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
+++ b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
@@ -12,11 +12,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Import your libraries:\n"
+    "# Import your libraries:\n",
+    "import pandas as pd\n",
+    "from sklearn.datasets import load_diabetes"
    ]
   },
   {
@@ -37,11 +39,73 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,\n",
+      "         0.01990842, -0.01764613],\n",
+      "       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,\n",
+      "        -0.06832974, -0.09220405],\n",
+      "       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,\n",
+      "         0.00286377, -0.02593034],\n",
+      "       ...,\n",
+      "       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,\n",
+      "        -0.04687948,  0.01549073],\n",
+      "       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,\n",
+      "         0.04452837, -0.02593034],\n",
+      "       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,\n",
+      "        -0.00421986,  0.00306441]]), 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,\n",
+      "        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,\n",
+      "        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,\n",
+      "        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,\n",
+      "       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,\n",
+      "       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,\n",
+      "       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,\n",
+      "       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,\n",
+      "        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,\n",
+      "        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,\n",
+      "       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,\n",
+      "       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,\n",
+      "       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,\n",
+      "        60., 174., 259., 178., 128.,  96., 126., 288.,  88., 292.,  71.,\n",
+      "       197., 186.,  25.,  84.,  96., 195.,  53., 217., 172., 131., 214.,\n",
+      "        59.,  70., 220., 268., 152.,  47.,  74., 295., 101., 151., 127.,\n",
+      "       237., 225.,  81., 151., 107.,  64., 138., 185., 265., 101., 137.,\n",
+      "       143., 141.,  79., 292., 178.,  91., 116.,  86., 122.,  72., 129.,\n",
+      "       142.,  90., 158.,  39., 196., 222., 277.,  99., 196., 202., 155.,\n",
+      "        77., 191.,  70.,  73.,  49.,  65., 263., 248., 296., 214., 185.,\n",
+      "        78.,  93., 252., 150.,  77., 208.,  77., 108., 160.,  53., 220.,\n",
+      "       154., 259.,  90., 246., 124.,  67.,  72., 257., 262., 275., 177.,\n",
+      "        71.,  47., 187., 125.,  78.,  51., 258., 215., 303., 243.,  91.,\n",
+      "       150., 310., 153., 346.,  63.,  89.,  50.,  39., 103., 308., 116.,\n",
+      "       145.,  74.,  45., 115., 264.,  87., 202., 127., 182., 241.,  66.,\n",
+      "        94., 283.,  64., 102., 200., 265.,  94., 230., 181., 156., 233.,\n",
+      "        60., 219.,  80.,  68., 332., 248.,  84., 200.,  55.,  85.,  89.,\n",
+      "        31., 129.,  83., 275.,  65., 198., 236., 253., 124.,  44., 172.,\n",
+      "       114., 142., 109., 180., 144., 163., 147.,  97., 220., 190., 109.,\n",
+      "       191., 122., 230., 242., 248., 249., 192., 131., 237.,  78., 135.,\n",
+      "       244., 199., 270., 164.,  72.,  96., 306.,  91., 214.,  95., 216.,\n",
+      "       263., 178., 113., 200., 139., 139.,  88., 148.,  88., 243.,  71.,\n",
+      "        77., 109., 272.,  60.,  54., 221.,  90., 311., 281., 182., 321.,\n",
+      "        58., 262., 206., 233., 242., 123., 167.,  63., 197.,  71., 168.,\n",
+      "       140., 217., 121., 235., 245.,  40.,  52., 104., 132.,  88.,  69.,\n",
+      "       219.,  72., 201., 110.,  51., 277.,  63., 118.,  69., 273., 258.,\n",
+      "        43., 198., 242., 232., 175.,  93., 168., 275., 293., 281.,  72.,\n",
+      "       140., 189., 181., 209., 136., 261., 113., 131., 174., 257.,  55.,\n",
+      "        84.,  42., 146., 212., 233.,  91., 111., 152., 120.,  67., 310.,\n",
+      "        94., 183.,  66., 173.,  72.,  49.,  64.,  48., 178., 104., 132.,\n",
+      "       220.,  57.]), 'DESCR': '.. _diabetes_dataset:\\n\\nDiabetes dataset\\n----------------\\n\\nTen baseline variables, age, sex, body mass index, average blood\\npressure, and six blood serum measurements were obtained for each of n =\\n442 diabetes patients, as well as the response of interest, a\\nquantitative measure of disease progression one year after baseline.\\n\\n**Data Set Characteristics:**\\n\\n  :Number of Instances: 442\\n\\n  :Number of Attributes: First 10 columns are numeric predictive values\\n\\n  :Target: Column 11 is a quantitative measure of disease progression one year after baseline\\n\\n  :Attribute Information:\\n      - Age\\n      - Sex\\n      - Body mass index\\n      - Average blood pressure\\n      - S1\\n      - S2\\n      - S3\\n      - S4\\n      - S5\\n      - S6\\n\\nNote: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\\n\\nSource URL:\\nhttps://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\\n\\nFor more information see:\\nBradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\\n(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)', 'feature_names': ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6'], 'data_filename': '/Users/borjauria/miniconda3/envs/ironhack_env/lib/python3.7/site-packages/sklearn/datasets/data/diabetes_data.csv.gz', 'target_filename': '/Users/borjauria/miniconda3/envs/ironhack_env/lib/python3.7/site-packages/sklearn/datasets/data/diabetes_target.csv.gz'}\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "diabetes = load_diabetes()\n",
+    "print(diabetes)"
    ]
   },
   {
@@ -53,11 +117,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['data', 'target', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes.keys()"
    ]
   },
   {
@@ -73,13 +149,59 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {
     "scrolled": false
    },
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ".. _diabetes_dataset:\n",
+      "\n",
+      "Diabetes dataset\n",
+      "----------------\n",
+      "\n",
+      "Ten baseline variables, age, sex, body mass index, average blood\n",
+      "pressure, and six blood serum measurements were obtained for each of n =\n",
+      "442 diabetes patients, as well as the response of interest, a\n",
+      "quantitative measure of disease progression one year after baseline.\n",
+      "\n",
+      "**Data Set Characteristics:**\n",
+      "\n",
+      "  :Number of Instances: 442\n",
+      "\n",
+      "  :Number of Attributes: First 10 columns are numeric predictive values\n",
+      "\n",
+      "  :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n",
+      "\n",
+      "  :Attribute Information:\n",
+      "      - Age\n",
+      "      - Sex\n",
+      "      - Body mass index\n",
+      "      - Average blood pressure\n",
+      "      - S1\n",
+      "      - S2\n",
+      "      - S3\n",
+      "      - S4\n",
+      "      - S5\n",
+      "      - S6\n",
+      "\n",
+      "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n",
+      "\n",
+      "Source URL:\n",
+      "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n",
+      "\n",
+      "For more information see:\n",
+      "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n",
+      "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "print(diabetes.DESCR)"
    ]
   },
   {
@@ -97,11 +219,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(442, 10)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Enter your answer here:\n"
+    "diabetes.data.shape\n",
+    "# Enter your answer here:\n",
+    "#There are 10 attributes in the data, it refers to the patient's age, sex, body mass index, average blood pressure and 6 glucose measurements.\n",
+    "# There are 442 records in the data"
    ]
   },
   {
@@ -115,11 +251,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(442,)"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes.target.shape"
    ]
   },
   {
@@ -156,11 +304,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "X = diabetes['data']\n",
+    "y = diabetes['target']"
    ]
   },
   {
@@ -172,11 +323,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes_model = LinearRegression()"
    ]
   },
   {
@@ -190,11 +342,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "diabetes_data_train, diabetes_data_test, diabetes_target_train, diabetes_target_test = train_test_split(X, y, test_size = 0.20)"
    ]
   },
   {
@@ -206,11 +360,65 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes_model.fit(diabetes_data_train, diabetes_target_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "151.86239034549334"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "diabetes_model.intercept_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ -27.71391975, -267.08862827,  536.18199219,  332.4318144 ,\n",
+       "       -716.85132584,  446.31139487,   73.94204506,  217.56183393,\n",
+       "        690.36686518,   72.62325409])"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "diabetes_model.coef_"
    ]
   },
   {
@@ -231,11 +439,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([238.58574559,  91.50331068,  75.09325302,  64.88748908,\n",
+       "       115.81982316, 187.63818037, 113.99486439, 176.054878  ,\n",
+       "        78.92834033, 140.32636457, 144.84279661, 100.08484328,\n",
+       "       227.19186843, 147.10362276, 100.00082362, 207.46308607,\n",
+       "       190.16726124,  97.99009049, 234.76243416,  73.09364077,\n",
+       "       111.90262063, 229.82762169, 196.73944615,  86.10457733,\n",
+       "       191.87375378, 200.51580174,  99.1540136 , 173.07937935,\n",
+       "       234.08776711, 106.37633822, 232.80297325,  67.23026403,\n",
+       "       180.77663043, 251.73652467, 161.12191595, 112.60228742,\n",
+       "        87.88809539, 246.77918189, 118.68359552, 244.72963435,\n",
+       "       189.43631778, 143.13429118,  72.46345876, 143.20874428,\n",
+       "       142.05684509,  57.23448674, 135.32582782, 120.35824672,\n",
+       "       174.16913954, 106.59499321, 173.95730681, 126.49273779,\n",
+       "        52.77539112, 142.07084877, 229.58743125, 181.77285511,\n",
+       "       261.01845717,  90.90352337, 124.58937301, 210.70379539,\n",
+       "       102.78349004, 112.40582843, 197.92242404,  37.81043164,\n",
+       "       217.69058287, 222.19662037, 161.43074622, 142.52649468,\n",
+       "       170.98257978, 161.86560192, 123.42940713, 159.39838091,\n",
+       "       110.90948683, 186.27114682, 220.84531227,  76.27793437,\n",
+       "       221.01721341, 156.44510601, 161.95906927,  43.07618446,\n",
+       "       201.2911246 , 229.92636947, 239.1495099 , 205.63282517,\n",
+       "       116.30675283, 195.51938123, 118.67975747, 112.57204242,\n",
+       "       182.45551813])"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "y_pred = diabetes_model.predict(diabetes_data_test)\n",
+    "y_pred"
    ]
   },
   {
@@ -247,11 +490,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[317. 101.  37.  83. 153.  84. 179. 277. 200. 190.  25.  81. 155. 172.\n",
+      " 118.  52.  78. 170. 261. 134.  61. 248. 292.  96. 178. 293. 135.  70.\n",
+      " 275. 111. 128.  39. 164. 245. 104.  53.  91. 264.  89. 252. 129.  50.\n",
+      "  55.  88.  93.  39. 124. 178. 283.  87.  91. 103.  57. 168. 246. 139.\n",
+      " 303.  64. 131. 288.  94. 200. 131. 104. 275. 152. 196. 116. 242. 252.\n",
+      " 150. 154. 160. 170. 295.  72. 192. 151. 144. 116. 233. 236. 270. 233.\n",
+      "  66. 123. 177. 107. 144.]\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "print(diabetes_target_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3338.2019711743733"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.metrics import mean_squared_error\n",
+    "mean_squared_error(y_true = diabetes_target_test, y_pred = y_pred)"
    ]
   },
   {
@@ -263,11 +542,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your explanation here:\n"
+    "# Your explanation here:"
    ]
   },
   {
@@ -302,7 +581,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -326,11 +605,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your answers here:"
+    "# Your answers here:\n"
    ]
   },
   {
@@ -351,11 +630,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto = pd.read_csv('../auto-mpg.csv')"
    ]
   },
   {
@@ -367,11 +647,124 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mpg</th>\n",
+       "      <th>cylinders</th>\n",
+       "      <th>displacement</th>\n",
+       "      <th>horse_power</th>\n",
+       "      <th>weight</th>\n",
+       "      <th>acceleration</th>\n",
+       "      <th>model_year</th>\n",
+       "      <th>car_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>130.0</td>\n",
+       "      <td>3504</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"chevrolet chevelle malibu\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>15.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>350.0</td>\n",
+       "      <td>165.0</td>\n",
+       "      <td>3693</td>\n",
+       "      <td>11.5</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"buick skylark 320\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>318.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3436</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"plymouth satellite\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>16.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>304.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3433</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"amc rebel sst\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>17.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>302.0</td>\n",
+       "      <td>140.0</td>\n",
+       "      <td>3449</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"ford torino\"</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    mpg  cylinders  displacement  horse_power  weight  acceleration  \\\n",
+       "0  18.0          8         307.0        130.0    3504          12.0   \n",
+       "1  15.0          8         350.0        165.0    3693          11.5   \n",
+       "2  18.0          8         318.0        150.0    3436          11.0   \n",
+       "3  16.0          8         304.0        150.0    3433          12.0   \n",
+       "4  17.0          8         302.0        140.0    3449          10.5   \n",
+       "\n",
+       "   model_year                       car_name  \n",
+       "0          70  \\t\"chevrolet chevelle malibu\"  \n",
+       "1          70          \\t\"buick skylark 320\"  \n",
+       "2          70         \\t\"plymouth satellite\"  \n",
+       "3          70              \\t\"amc rebel sst\"  \n",
+       "4          70                \\t\"ford torino\"  "
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.head(5)"
    ]
   },
   {
@@ -383,11 +776,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mpg             float64\n",
+       "cylinders         int64\n",
+       "displacement    float64\n",
+       "horse_power     float64\n",
+       "weight            int64\n",
+       "acceleration    float64\n",
+       "model_year        int64\n",
+       "car_name         object\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.dtypes"
    ]
   },
   {
@@ -399,11 +812,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The newest model year 70 and the oldest model year 82\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "newest_model = auto['model_year'].sort_values()[0]\n",
+    "oldest_model = auto['model_year'].sort_values(ascending=False)[381]\n",
+    "print(f'The newest model year {newest_model} and the oldest model year {oldest_model}')"
    ]
   },
   {
@@ -415,11 +839,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mpg             0\n",
+       "cylinders       0\n",
+       "displacement    0\n",
+       "horse_power     6\n",
+       "weight          0\n",
+       "acceleration    0\n",
+       "model_year      0\n",
+       "car_name        0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.isna().sum()"
    ]
   },
   {
@@ -431,11 +875,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "There are 5 possible values of cylinders\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto.dropna(axis=0, inplace=True)\n",
+    "print(f'There are {len(auto.cylinders.unique())} possible values of cylinders')"
    ]
   },
   {
@@ -451,11 +905,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto.drop('car_name', axis=1, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X = auto.drop('mpg', axis=1)\n",
+    "y = auto['mpg'].values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)"
    ]
   },
   {
@@ -469,11 +943,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 40,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto_model = LinearRegression()\n",
+    "auto_model.fit(X_train, y_train)"
    ]
   },
   {
@@ -502,11 +989,41 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "y_pred = auto_model.predict(X_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.metrics import r2_score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8157443634313795"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "r2_score(y_train, y_pred)"
    ]
   },
   {
@@ -522,11 +1039,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 44,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7722005568518815"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "y_test_pred = auto_model.predict(X_test)\n",
+    "r2_score(y_test, y_test_pred)"
    ]
   },
   {
@@ -551,11 +1081,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "X_train09, X_test09, y_train09, y_test09 = train_test_split(X, y, test_size=0.1)"
    ]
   },
   {
@@ -567,11 +1098,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto_model09 = LinearRegression()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "auto_model09.fit(X_train09, y_train09)"
    ]
   },
   {
@@ -583,11 +1135,53 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 48,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "y_pred_train09 = auto_model09.predict(X_train09)\n",
+    "y_pred_test09 = auto_model09.predict(X_test09)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8022859161972329"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "r2_score(y_train09, y_pred_train09)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8563373263741094"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "r2_score(y_test09, y_pred_test09)"
    ]
   },
   {
@@ -599,11 +1193,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 51,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The r squared score for the new training data is 0.8022859161972329 and the r squared score for the new testing data is 0.8563373263741094\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "print(f'The r squared score for the new training data is {r2_score(y_train09, y_pred_train09)} and the r squared score for the new testing data is {r2_score(y_test09, y_pred_test09)}')"
    ]
   },
   {
@@ -619,7 +1222,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -635,11 +1238,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 52,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto_model = LinearRegression()\n",
+    "selector = RFE(estimator = auto_model, n_features_to_select=3)"
    ]
   },
   {
@@ -651,11 +1256,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 53,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RFE(estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
+       "                               normalize=False),\n",
+       "    n_features_to_select=3, step=1, verbose=0)"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "selector.fit(X, y)"
    ]
   },
   {
@@ -669,11 +1288,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 54,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1, 2, 4, 3, 1, 1])"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "selector.ranking_"
    ]
   },
   {
@@ -685,7 +1316,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -726,7 +1357,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.6"
+   "version": "3.7.5"
   }
  },
  "nbformat": 4,