From 2f90d27ad7662f31d241123909ec45101424ca80 Mon Sep 17 00:00:00 2001
From: vkamg <veronica.montoro.gonzalez@gmail.com>
Date: Sun, 12 Apr 2020 23:30:55 +0200
Subject: [PATCH 1/3] working on challenge 2

---
 .../your-code/main.ipynb                      | 147 ++++++++++++++----
 1 file changed, 117 insertions(+), 30 deletions(-)

diff --git a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
index 0102ef94..8f1ac815 100644
--- a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
+++ b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
@@ -12,11 +12,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Import your libraries:\n"
+    "# Import your libraries:\n",
+    "import sklearn\n",
+    "from sklearn import datasets"
    ]
   },
   {
@@ -37,11 +39,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "diabetes = datasets.load_diabetes()"
    ]
   },
   {
@@ -53,11 +57,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['data', 'target', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes.keys()"
    ]
   },
   {
@@ -73,13 +89,57 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ".. _diabetes_dataset:\n",
+      "\n",
+      "Diabetes dataset\n",
+      "----------------\n",
+      "\n",
+      "Ten baseline variables, age, sex, body mass index, average blood\n",
+      "pressure, and six blood serum measurements were obtained for each of n =\n",
+      "442 diabetes patients, as well as the response of interest, a\n",
+      "quantitative measure of disease progression one year after baseline.\n",
+      "\n",
+      "**Data Set Characteristics:**\n",
+      "\n",
+      "  :Number of Instances: 442\n",
+      "\n",
+      "  :Number of Attributes: First 10 columns are numeric predictive values\n",
+      "\n",
+      "  :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n",
+      "\n",
+      "  :Attribute Information:\n",
+      "      - Age\n",
+      "      - Sex\n",
+      "      - Body mass index\n",
+      "      - Average blood pressure\n",
+      "      - S1\n",
+      "      - S2\n",
+      "      - S3\n",
+      "      - S4\n",
+      "      - S5\n",
+      "      - S6\n",
+      "\n",
+      "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n",
+      "\n",
+      "Source URL:\n",
+      "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n",
+      "\n",
+      "For more information see:\n",
+      "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n",
+      "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "print(diabetes.DESCR)"
    ]
   },
   {
@@ -96,12 +156,21 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "raw",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "# Enter your answer here:\n"
+    "# Enter your answer here:\n",
+    "1. How many attributes are there in the data? What do they mean?\n",
+    "    - There are 10 attributes: age, sex, body mass index, average blood\n",
+    "    pressure, and six blood serum measurements (S1, S2, S3, S4, S5 and S6)\n",
+    "\n",
+    "2. What is the relation between diabetes['data'] and diabetes['target']?\n",
+    "\n",
+    "    - The diabetes['data'] is the baseline variables and the diabtes['target'] is a quantitative measure of     disease progression one year after baseline.\n",
+    "\n",
+    "3. How many records are there in the data?\n",
+    "\n",
+    "    - 442 (n = 442 diabetes patients)"
    ]
   },
   {
@@ -115,11 +184,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The shape of data is (442, 10) and the shape of target is (442,)\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "data_shape = diabetes['data'].shape\n",
+    "target_shape = diabetes['target'].shape\n",
+    "\n",
+    "print(f\"The shape of data is {data_shape} and the shape of target is {target_shape}\")"
    ]
   },
   {
@@ -156,11 +238,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "from sklearn.linear_model import LinearRegression"
    ]
   },
   {
@@ -172,11 +255,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "diabetes_model = LinearRegression()"
    ]
   },
   {
@@ -194,7 +279,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "\n"
    ]
   },
   {
@@ -712,9 +799,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python [conda env:data_env]",
    "language": "python",
-   "name": "python3"
+   "name": "conda-env-data_env-py"
   },
   "language_info": {
    "codemirror_mode": {
@@ -726,9 +813,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.6"
+   "version": "3.7.5"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }

From 7e2e6ca9cb3d2a20124ad1dac79deb1ca06b8086 Mon Sep 17 00:00:00 2001
From: vkamg <veronica.montoro.gonzalez@gmail.com>
Date: Mon, 13 Apr 2020 01:25:26 +0200
Subject: [PATCH 2/3] working on challenge 3

---
 .../your-code/main.ipynb                      | 365 ++++++++++++++++--
 1 file changed, 331 insertions(+), 34 deletions(-)

diff --git a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
index 8f1ac815..8cadb183 100644
--- a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
+++ b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
@@ -275,13 +275,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Your code here:\n",
     "\n",
-    "\n"
+    "diabetes_data_train = diabetes['data'][0:-20]\n",
+    "\n",
+    "diabetes_target_train = diabetes['target'][0:-20]\n",
+    "\n",
+    "diabetes_data_test = diabetes['data'][-20:]\n",
+    "\n",
+    "diabetes_target_test = diabetes['target'][-20:]"
    ]
   },
   {
@@ -293,11 +299,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 27,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Coeficients are: [ 3.03499549e-01 -2.37639315e+02  5.10530605e+02  3.27736980e+02\n",
+      " -8.14131709e+02  4.92814588e+02  1.02848452e+02  1.84606489e+02\n",
+      "  7.43519617e+02  7.60951722e+01]\n",
+      "The intercept is: 152.76430691633442\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "X = diabetes_data_train\n",
+    "y = diabetes_target_train\n",
+    "\n",
+    "diabetes_model = LinearRegression().fit(X, y)\n",
+    "\n",
+    "print(f'Coeficients are: {diabetes_model.coef_}')\n",
+    "print(f'The intercept is: {diabetes_model.intercept_}')"
    ]
   },
   {
@@ -318,11 +343,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 32,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[197.61846908 155.43979328 172.88665147 111.53537279 164.80054784\n",
+      " 131.06954875 259.12237761 100.47935157 117.0601052  124.30503555\n",
+      " 218.36632793  61.19831284 132.25046751 120.3332925   52.54458691\n",
+      " 194.03798088 102.57139702 123.56604987 211.0346317   52.60335674]\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "predictions = diabetes_model.predict(diabetes_data_test)\n",
+    "\n",
+    "print(predictions)"
    ]
   },
   {
@@ -334,11 +374,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 31,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[233.  91. 111. 152. 120.  67. 310.  94. 183.  66. 173.  72.  49.  64.\n",
+      "  48. 178. 104. 132. 220.  57.]\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "print(diabetes_target_test)"
    ]
   },
   {
@@ -349,12 +399,11 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "raw",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "# Your explanation here:\n"
+    "# Your explanation here:\n",
+    "No, is not the same. What the linear regression algorithm does is it fits multiple lines on the data points and returns the line that results in the least error nut there is always an error."
    ]
   },
   {
@@ -389,11 +438,59 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                                 OLS Regression Results                                \n",
+      "=======================================================================================\n",
+      "Dep. Variable:                      y   R-squared (uncentered):                   0.110\n",
+      "Model:                            OLS   Adj. R-squared (uncentered):              0.089\n",
+      "Method:                 Least Squares   F-statistic:                              5.109\n",
+      "Date:                Mon, 13 Apr 2020   Prob (F-statistic):                    4.77e-07\n",
+      "Time:                        00:57:01   Log-Likelihood:                         -2745.5\n",
+      "No. Observations:                 422   AIC:                                      5511.\n",
+      "Df Residuals:                     412   BIC:                                      5552.\n",
+      "Df Model:                          10                                                  \n",
+      "Covariance Type:            nonrobust                                                  \n",
+      "==============================================================================\n",
+      "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
+      "------------------------------------------------------------------------------\n",
+      "x1            42.9190    183.983      0.233      0.816    -318.744     404.582\n",
+      "x2          -261.9605    188.650     -1.389      0.166    -632.798     108.877\n",
+      "x3           547.5378    204.613      2.676      0.008     145.322     949.754\n",
+      "x4           352.4704    200.777      1.756      0.080     -42.205     747.146\n",
+      "x5          -634.0265   1273.063     -0.498      0.619   -3136.536    1868.483\n",
+      "x6           285.1002   1033.408      0.276      0.783   -1746.310    2316.510\n",
+      "x7            -9.4062    658.863     -0.014      0.989   -1304.558    1285.746\n",
+      "x8           197.4998    502.388      0.393      0.694    -790.064    1185.063\n",
+      "x9           670.7500    526.463      1.274      0.203    -364.139    1705.639\n",
+      "x10           11.6643    205.008      0.057      0.955    -391.327     414.656\n",
+      "==============================================================================\n",
+      "Omnibus:                        0.574   Durbin-Watson:                   0.228\n",
+      "Prob(Omnibus):                  0.751   Jarque-Bera (JB):                0.677\n",
+      "Skew:                          -0.001   Prob(JB):                        0.713\n",
+      "Kurtosis:                       2.804   Cond. No.                         21.4\n",
+      "==============================================================================\n",
+      "\n",
+      "Warnings:\n",
+      "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "import statsmodels.api as sm\n",
+    "\n",
+    "mod = sm.OLS(diabetes_target_train, diabetes_data_train)\n",
+    "\n",
+    "res = mod.fit()\n",
+    "\n",
+    "print(res.summary())"
    ]
   },
   {
@@ -438,11 +535,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "import pandas as pd\n",
+    "\n",
+    "\n",
+    "auto = pd.read_csv(\"../auto-mpg.csv\")"
    ]
   },
   {
@@ -454,11 +555,124 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mpg</th>\n",
+       "      <th>cylinders</th>\n",
+       "      <th>displacement</th>\n",
+       "      <th>horse_power</th>\n",
+       "      <th>weight</th>\n",
+       "      <th>acceleration</th>\n",
+       "      <th>model_year</th>\n",
+       "      <th>car_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>130.0</td>\n",
+       "      <td>3504</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"chevrolet chevelle malibu\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>15.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>350.0</td>\n",
+       "      <td>165.0</td>\n",
+       "      <td>3693</td>\n",
+       "      <td>11.5</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"buick skylark 320\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>318.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3436</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"plymouth satellite\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>16.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>304.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3433</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"amc rebel sst\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>17.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>302.0</td>\n",
+       "      <td>140.0</td>\n",
+       "      <td>3449</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"ford torino\"</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    mpg  cylinders  displacement  horse_power  weight  acceleration  \\\n",
+       "0  18.0          8         307.0        130.0    3504          12.0   \n",
+       "1  15.0          8         350.0        165.0    3693          11.5   \n",
+       "2  18.0          8         318.0        150.0    3436          11.0   \n",
+       "3  16.0          8         304.0        150.0    3433          12.0   \n",
+       "4  17.0          8         302.0        140.0    3449          10.5   \n",
+       "\n",
+       "   model_year                       car_name  \n",
+       "0          70  \\t\"chevrolet chevelle malibu\"  \n",
+       "1          70          \\t\"buick skylark 320\"  \n",
+       "2          70         \\t\"plymouth satellite\"  \n",
+       "3          70              \\t\"amc rebel sst\"  \n",
+       "4          70                \\t\"ford torino\"  "
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto.head()"
    ]
   },
   {
@@ -470,11 +684,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 38,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 398 entries, 0 to 397\n",
+      "Data columns (total 8 columns):\n",
+      "mpg             398 non-null float64\n",
+      "cylinders       398 non-null int64\n",
+      "displacement    398 non-null float64\n",
+      "horse_power     392 non-null float64\n",
+      "weight          398 non-null int64\n",
+      "acceleration    398 non-null float64\n",
+      "model_year      398 non-null int64\n",
+      "car_name        398 non-null object\n",
+      "dtypes: float64(4), int64(3), object(1)\n",
+      "memory usage: 25.0+ KB\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto.info()"
    ]
   },
   {
@@ -486,11 +721,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 39,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The newest model year is 82 and the oldest model year is 70\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "newest_model_year = auto[\"model_year\"].max()\n",
+    "\n",
+    "oldest_model_year = auto[\"model_year\"].min()\n",
+    "\n",
+    "print(f'The newest model year is {newest_model_year} and the oldest model year is {oldest_model_year}')"
    ]
   },
   {
@@ -502,11 +750,41 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mpg             0\n",
+       "cylinders       0\n",
+       "displacement    0\n",
+       "horse_power     6\n",
+       "weight          0\n",
+       "acceleration    0\n",
+       "model_year      0\n",
+       "car_name        0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "auto.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "auto.dropna(inplace=True)"
    ]
   },
   {
@@ -518,11 +796,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 43,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4    199\n",
+       "8    103\n",
+       "6     83\n",
+       "3      4\n",
+       "5      3\n",
+       "Name: cylinders, dtype: int64"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto[\"cylinders\"].value_counts()\n",
+    "\n",
+    "#There are 5 possible values of cylinders."
    ]
   },
   {

From 20568d4abdc545a4c761a41321acadff0f460184 Mon Sep 17 00:00:00 2001
From: vkamg <veronica.montoro.gonzalez@gmail.com>
Date: Mon, 13 Apr 2020 20:21:50 +0200
Subject: [PATCH 3/3] lab finished

---
 .../your-code/main.ipynb                      | 273 ++++++++++++++----
 1 file changed, 217 insertions(+), 56 deletions(-)

diff --git a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
index 8cadb183..df7a5d51 100644
--- a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
+++ b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
@@ -12,7 +12,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -39,7 +39,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -57,7 +57,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -66,7 +66,7 @@
        "dict_keys(['data', 'target', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -89,7 +89,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -184,7 +184,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -238,7 +238,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -255,7 +255,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -275,7 +275,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -299,7 +299,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -343,7 +343,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -374,7 +374,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -438,7 +438,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -451,7 +451,7 @@
       "Model:                            OLS   Adj. R-squared (uncentered):              0.089\n",
       "Method:                 Least Squares   F-statistic:                              5.109\n",
       "Date:                Mon, 13 Apr 2020   Prob (F-statistic):                    4.77e-07\n",
-      "Time:                        00:57:01   Log-Likelihood:                         -2745.5\n",
+      "Time:                        19:25:47   Log-Likelihood:                         -2745.5\n",
       "No. Observations:                 422   AIC:                                      5511.\n",
       "Df Residuals:                     412   BIC:                                      5552.\n",
       "Df Model:                          10                                                  \n",
@@ -510,7 +510,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -535,7 +535,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -555,7 +555,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -665,7 +665,7 @@
        "4          70                \\t\"ford torino\"  "
       ]
      },
-     "execution_count": 37,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -684,7 +684,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -721,7 +721,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -750,7 +750,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -767,7 +767,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 40,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -780,7 +780,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -796,7 +796,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -810,7 +810,7 @@
        "Name: cylinders, dtype: int64"
       ]
      },
-     "execution_count": 43,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -822,6 +822,28 @@
     "#There are 5 possible values of cylinders."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['mpg', 'cylinders', 'displacement', 'horse_power', 'weight',\n",
+       "       'acceleration', 'model_year', 'car_name'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "auto.columns"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -835,11 +857,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "auto.drop(['car_name'], axis=1, inplace=True)\n",
+    "\n",
+    "\n",
+    "X = auto[['cylinders', 'displacement', 'horse_power', 'weight',\n",
+    "       'acceleration', 'model_year']].values\n",
+    "y = auto['mpg'].values\n",
+    "\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)"
    ]
   },
   {
@@ -853,11 +888,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "auto_model = LinearRegression()\n",
+    "\n",
+    "X = X_train\n",
+    "y = y_train\n",
+    "\n",
+    "auto_model = LinearRegression().fit(X, y)"
    ]
   },
   {
@@ -886,11 +928,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 36,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8124650406575946"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "from sklearn.metrics import r2_score\n",
+    "\n",
+    "y_pred = auto_model.predict(X_train)\n",
+    "\n",
+    "r2_score(y_train, y_pred)"
    ]
   },
   {
@@ -906,11 +965,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7956309524849701"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "y_test_pred = auto_model.predict(X_test)\n",
+    "\n",
+    "r2_score(y_test, y_test_pred)"
    ]
   },
   {
@@ -935,11 +1009,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "X = auto[['cylinders', 'displacement', 'horse_power', 'weight',\n",
+    "       'acceleration', 'model_year']].values\n",
+    "y = auto['mpg'].values\n",
+    "\n",
+    "\n",
+    "X_train09, X_test09, y_train09, y_test09 = train_test_split(X, y, test_size=0.10)\n",
+    "\n"
    ]
   },
   {
@@ -951,11 +1033,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "auto_model09 = LinearRegression()\n",
+    "\n",
+    "X = X_train09\n",
+    "y = y_train09\n",
+    "\n",
+    "auto_model09 = LinearRegression().fit(X, y)"
    ]
   },
   {
@@ -967,11 +1056,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 40,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8003012594878844"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "y_pred09 = auto_model.predict(X_train09)\n",
+    "\n",
+    "r2_score(y_train09, y_pred09)"
    ]
   },
   {
@@ -983,11 +1087,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 41,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8785908709630285"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "y_test_pred09 = auto_model.predict(X_test09)\n",
+    "\n",
+    "r2_score(y_test09, y_test_pred09)"
    ]
   },
   {
@@ -1003,7 +1122,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1019,11 +1138,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "selector = RFE(auto_model, 3, step=1)"
    ]
   },
   {
@@ -1035,11 +1156,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 48,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1, 2, 4, 3, 1, 1])"
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "X = auto[['cylinders', 'displacement', 'horse_power', 'weight',\n",
+    "       'acceleration', 'model_year']].values\n",
+    "y = auto['mpg'].values\n",
+    "\n",
+    "selector = selector.fit(X, y)\n",
+    "\n",
+    "selector.ranking_"
    ]
   },
   {
@@ -1053,11 +1193,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 49,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "X_train_reduced, X_test_reduced, y_train_reduced, y_test_reduced = train_test_split(X, y, test_size=0.20)"
    ]
   },
   {
@@ -1069,11 +1211,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 50,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7478134390730498"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here: \n"
+    "# Your code here: \n",
+    "\n",
+    "auto_model_reduced = selector.fit(X_train_reduced, y_train_reduced)\n",
+    "\n",
+    "y_pred_reduced = auto_model_reduced.predict(X_test_reduced)\n",
+    "\n",
+    "r2_score(y_test_reduced, y_pred_reduced)\n",
+    "\n",
+    "#It didn't cause any improvement in the r squared score. I don't know if maybe I did something wrong."
    ]
   },
   {