From ac9da52e72a17372574cdb8dbfb2020230726cd9 Mon Sep 17 00:00:00 2001
From: Alberto Rodriguez <alroma@gmail.com>
Date: Sun, 22 Mar 2020 11:45:21 +0100
Subject: [PATCH 1/4] Pull

---
 module-3/lab-supervised-learning-sklearn/push | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 module-3/lab-supervised-learning-sklearn/push

diff --git a/module-3/lab-supervised-learning-sklearn/push b/module-3/lab-supervised-learning-sklearn/push
new file mode 100644
index 00000000..e69de29b

From 795652daaa5b2e1f1410fd6acd0abeb3a58f6e45 Mon Sep 17 00:00:00 2001
From: Alberto Rodriguez <alroma@gmail.com>
Date: Sun, 22 Mar 2020 11:46:32 +0100
Subject: [PATCH 2/4] push

---
 module-3/lab-supervised-learning-sklearn/push | 1 +
 1 file changed, 1 insertion(+)

diff --git a/module-3/lab-supervised-learning-sklearn/push b/module-3/lab-supervised-learning-sklearn/push
index e69de29b..aa981746 100644
--- a/module-3/lab-supervised-learning-sklearn/push
+++ b/module-3/lab-supervised-learning-sklearn/push
@@ -0,0 +1 @@
+fsfa

From ddc734fdd6b07af881c916ff4e1745930af96047 Mon Sep 17 00:00:00 2001
From: Alberto Rodriguez <alroma@gmail.com>
Date: Thu, 9 Apr 2020 11:56:46 +0200
Subject: [PATCH 3/4] Working on last challenge

---
 .../your-code/main.ipynb                      | 689 +++++++++++++++---
 1 file changed, 601 insertions(+), 88 deletions(-)

diff --git a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
index 0102ef94..a226c7d8 100644
--- a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
+++ b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
@@ -12,11 +12,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Import your libraries:\n"
+    "# Import your libraries:\n",
+    "import pandas as pd\n",
+    "from sklearn import datasets"
    ]
   },
   {
@@ -37,11 +39,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes = datasets.load_diabetes(return_X_y=False)"
    ]
   },
   {
@@ -53,11 +56,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['data', 'target', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes.keys()"
    ]
   },
   {
@@ -73,13 +88,57 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ".. _diabetes_dataset:\n",
+      "\n",
+      "Diabetes dataset\n",
+      "----------------\n",
+      "\n",
+      "Ten baseline variables, age, sex, body mass index, average blood\n",
+      "pressure, and six blood serum measurements were obtained for each of n =\n",
+      "442 diabetes patients, as well as the response of interest, a\n",
+      "quantitative measure of disease progression one year after baseline.\n",
+      "\n",
+      "**Data Set Characteristics:**\n",
+      "\n",
+      "  :Number of Instances: 442\n",
+      "\n",
+      "  :Number of Attributes: First 10 columns are numeric predictive values\n",
+      "\n",
+      "  :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n",
+      "\n",
+      "  :Attribute Information:\n",
+      "      - Age\n",
+      "      - Sex\n",
+      "      - Body mass index\n",
+      "      - Average blood pressure\n",
+      "      - S1\n",
+      "      - S2\n",
+      "      - S3\n",
+      "      - S4\n",
+      "      - S5\n",
+      "      - S6\n",
+      "\n",
+      "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n",
+      "\n",
+      "Source URL:\n",
+      "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n",
+      "\n",
+      "For more information see:\n",
+      "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n",
+      "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "print (diabetes.DESCR)"
    ]
   },
   {
@@ -97,11 +156,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Enter your answer here:\n"
+    "# Enter your answer here:\n",
+    "# How many attributes are there in the data? What do they mean?\n",
+    "    #There are 10 atributes, all of them are from each individual (agex, sex, bmi, blood preassure and 6 other about blood)\n",
+    "# What is the relation between diabetes['data'] and diabetes['target']?\n",
+    "    #Diabetes data is the 'description' of each individual, target measures how the diabetes has evolved since day zero.\n",
+    "# How many records are there in the data?\n",
+    "    #There data from 442 individuals"
    ]
   },
   {
@@ -115,11 +180,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(442, 10) (442,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "print(diabetes.data.shape, diabetes.target.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "#Data has 442 registers and 10 columns, target has 442 registers and only 1 column. Exactly what was expected."
    ]
   },
   {
@@ -156,11 +239,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "from sklearn.linear_model import LinearRegression"
    ]
   },
   {
@@ -172,11 +256,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes_model = LinearRegression()"
    ]
   },
   {
@@ -190,11 +275,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "X = diabetes.data\n",
+    "y = diabetes.target\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "diabetes_data_train, diabetes_data_test, diabetes_target_train, diabetes_target_test = train_test_split(\n",
+    "    X, y, test_size=0.042, random_state=42)"
    ]
   },
   {
@@ -206,11 +296,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Intercept: 151.526250776629\n",
+      "Coefficients: [   2.67659502 -257.98776342  539.75237886  339.16378235 -854.04450849\n",
+      "  477.72031381  147.45555947  252.27907163  716.32314001   63.57644276]\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes_model.fit(diabetes_data_train, diabetes_target_train)\n",
+    "print(f'Intercept: {diabetes_model.intercept_}')\n",
+    "print(f'Coefficients: {diabetes_model.coef_}')\n",
+    "\n"
    ]
   },
   {
@@ -231,11 +335,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([141.81216009, 176.62912663, 134.52300035, 287.7903017 ,\n",
+       "       122.88444183,  96.36277017, 257.66709041, 184.78962182,\n",
+       "        92.6144851 , 111.50621663,  98.34330547, 165.83614827,\n",
+       "        58.00056113, 205.43398993, 100.75616227, 130.5507428 ,\n",
+       "       218.81135251, 246.83179054, 193.45957561])"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "diabetes_model.predict(diabetes_data_test)"
    ]
   },
   {
@@ -247,11 +367,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 30,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[219.  70. 202. 230. 111.  84. 242. 272.  94.  96.  94. 252.  99. 297.\n",
+      " 135.  67. 295. 264. 170.]\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "print(diabetes_target_test)"
    ]
   },
   {
@@ -263,11 +393,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your explanation here:\n"
+    "# Your explanation here:\n",
+    "    #not even close the prediction vs the reality, there are a lot of differences. The variables use for the predition are not \"very predictive\" as evolution of the diabetes"
    ]
   },
   {
@@ -302,11 +433,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "import numpy as np\n",
+    "import statsmodels.api as sm\n",
+    "\n",
+    "\n",
+    "#I AM SKIPPING THIS BONUS "
    ]
   },
   {
@@ -351,11 +487,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 78,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto = pd.read_csv('../auto-mpg.csv')"
    ]
   },
   {
@@ -367,11 +504,144 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mpg</th>\n",
+       "      <th>cylinders</th>\n",
+       "      <th>displacement</th>\n",
+       "      <th>horse_power</th>\n",
+       "      <th>weight</th>\n",
+       "      <th>acceleration</th>\n",
+       "      <th>model_year</th>\n",
+       "      <th>car_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>130.0</td>\n",
+       "      <td>3504</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"chevrolet chevelle malibu\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>15.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>350.0</td>\n",
+       "      <td>165.0</td>\n",
+       "      <td>3693</td>\n",
+       "      <td>11.5</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"buick skylark 320\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>318.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3436</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"plymouth satellite\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>16.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>304.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3433</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"amc rebel sst\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>17.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>302.0</td>\n",
+       "      <td>140.0</td>\n",
+       "      <td>3449</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"ford torino\"</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    mpg  cylinders  displacement  horse_power  weight  acceleration  \\\n",
+       "0  18.0          8         307.0        130.0    3504          12.0   \n",
+       "1  15.0          8         350.0        165.0    3693          11.5   \n",
+       "2  18.0          8         318.0        150.0    3436          11.0   \n",
+       "3  16.0          8         304.0        150.0    3433          12.0   \n",
+       "4  17.0          8         302.0        140.0    3449          10.5   \n",
+       "\n",
+       "   model_year                       car_name  \n",
+       "0          70  \\t\"chevrolet chevelle malibu\"  \n",
+       "1          70          \\t\"buick skylark 320\"  \n",
+       "2          70         \\t\"plymouth satellite\"  \n",
+       "3          70              \\t\"amc rebel sst\"  \n",
+       "4          70                \\t\"ford torino\"  "
+      ]
+     },
+     "execution_count": 79,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.head(5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(398, 8)"
+      ]
+     },
+     "execution_count": 80,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "auto.shape"
    ]
   },
   {
@@ -383,11 +653,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mpg             float64\n",
+       "cylinders         int64\n",
+       "displacement    float64\n",
+       "horse_power     float64\n",
+       "weight            int64\n",
+       "acceleration    float64\n",
+       "model_year        int64\n",
+       "car_name         object\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 81,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.dtypes"
    ]
   },
   {
@@ -399,11 +689,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "count    398.000000\n",
+       "mean      76.010050\n",
+       "std        3.697627\n",
+       "min       70.000000\n",
+       "25%       73.000000\n",
+       "50%       76.000000\n",
+       "75%       79.000000\n",
+       "max       82.000000\n",
+       "Name: model_year, dtype: float64"
+      ]
+     },
+     "execution_count": 82,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto['model_year'].describe()"
    ]
   },
   {
@@ -415,11 +725,90 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mpg             0\n",
+       "cylinders       0\n",
+       "displacement    0\n",
+       "horse_power     6\n",
+       "weight          0\n",
+       "acceleration    0\n",
+       "model_year      0\n",
+       "car_name        0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 83,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "#after checking there is only column \"horse_power\" with missing values. I proceed to delete all rows with missing values\n",
+    "auto = auto.dropna()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mpg             0\n",
+       "cylinders       0\n",
+       "displacement    0\n",
+       "horse_power     0\n",
+       "weight          0\n",
+       "acceleration    0\n",
+       "model_year      0\n",
+       "car_name        0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 85,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "auto.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(392, 8)"
+      ]
+     },
+     "execution_count": 86,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "auto.shape\n",
+    "#There were 398 rows at the beggining, after dropping the na rows, there are only 392"
    ]
   },
   {
@@ -431,11 +820,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 87,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4    199\n",
+       "8    103\n",
+       "6     83\n",
+       "3      4\n",
+       "5      3\n",
+       "Name: cylinders, dtype: int64"
+      ]
+     },
+     "execution_count": 87,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto['cylinders'].value_counts()\n",
+    "#there are 5 possible values"
    ]
   },
   {
@@ -451,11 +858,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 88,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "#Dropping the \"car_name\" column\n",
+    "auto = auto.loc[:, auto.columns != 'car_name']\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Creating X , y will use for the model\n",
+    "y = auto['mpg']\n",
+    "X = auto.loc[:, auto.columns != 'mpg']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    X, y, test_size=0.2)"
    ]
   },
   {
@@ -469,11 +899,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 91,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
+      ]
+     },
+     "execution_count": 91,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto_model = LinearRegression()\n",
+    "auto_model.fit(X_train, y_train)"
    ]
   },
   {
@@ -502,11 +945,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 92,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8083692109045223"
+      ]
+     },
+     "execution_count": 92,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "from sklearn.metrics import r2_score\n",
+    "\n",
+    "y_pred = auto_model.predict(X_train)\n",
+    "\n",
+    "r2_score(y_train, y_pred)\n"
    ]
   },
   {
@@ -522,11 +981,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 93,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8050966794310453"
+      ]
+     },
+     "execution_count": 93,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "y_test_pred = auto_model.predict(X_test)\n",
+    "r2_score(y_test, y_test_pred)\n"
    ]
   },
   {
@@ -538,6 +1010,15 @@
     "The r squared scores of the training data and the test data are pretty close (0.8146 vs 0.7818). This means our model is not overfitted. However, there is still room to improve the model fit. Move on to the next challenge."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# I do not get the same results but it's true they are pretty close."
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -551,11 +1032,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 95,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "X_train09, X_test09, y_train09, y_test09 = train_test_split(\n",
+    "    X, y, test_size=0.1)"
    ]
   },
   {
@@ -567,11 +1050,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 96,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto_model09 = LinearRegression()"
    ]
   },
   {
@@ -583,11 +1067,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 109,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "'tuple' object is not callable",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-109-07108a4dbafb>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mauto_model09\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train09\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train09\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0my_train_predict09\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mauto_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train09\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mr2_score\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0my_train09\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train_predict09\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m: 'tuple' object is not callable"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto_model09.fit(X_train09, y_train09)\n",
+    "y_train_predict09 = auto_model.predict(X_train09)\n",
+    "r2_score (y_train09, y_train_predict09)"
    ]
   },
   {
@@ -599,11 +1098,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 108,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "'tuple' object is not callable",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-108-c713fd8a380b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Your code here:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0my_test_predict09\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mauto_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test09\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mr2_score\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0my_test09\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0my_test_predict09\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mTypeError\u001b[0m: 'tuple' object is not callable"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "y_test_predict09 = auto_model.predict(X_test09)\n",
+    "r2_score (y_test09 , y_test_predict09)"
    ]
   },
   {
@@ -712,9 +1225,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python [conda env:data_env]",
    "language": "python",
-   "name": "python3"
+   "name": "conda-env-data_env-py"
   },
   "language_info": {
    "codemirror_mode": {
@@ -726,9 +1239,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.6"
+   "version": "3.7.5"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }

From f5d8934a34a55be466440e0bb054fa1b4b254520 Mon Sep 17 00:00:00 2001
From: Alberto Rodriguez <alroma@gmail.com>
Date: Thu, 9 Apr 2020 12:01:06 +0200
Subject: [PATCH 4/4] lab finished

---
 .../your-code/main.ipynb                      | 102 +++++++++---------
 1 file changed, 50 insertions(+), 52 deletions(-)

diff --git a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
index a226c7d8..4d91cefd 100644
--- a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
+++ b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
@@ -487,7 +487,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
+   "execution_count": 111,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -504,7 +504,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 79,
+   "execution_count": 112,
    "metadata": {},
    "outputs": [
     {
@@ -614,7 +614,7 @@
        "4          70                \\t\"ford torino\"  "
       ]
      },
-     "execution_count": 79,
+     "execution_count": 112,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -626,7 +626,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 80,
+   "execution_count": 113,
    "metadata": {},
    "outputs": [
     {
@@ -635,7 +635,7 @@
        "(398, 8)"
       ]
      },
-     "execution_count": 80,
+     "execution_count": 113,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -653,7 +653,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
+   "execution_count": 114,
    "metadata": {},
    "outputs": [
     {
@@ -670,7 +670,7 @@
        "dtype: object"
       ]
      },
-     "execution_count": 81,
+     "execution_count": 114,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -689,7 +689,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
+   "execution_count": 115,
    "metadata": {},
    "outputs": [
     {
@@ -706,7 +706,7 @@
        "Name: model_year, dtype: float64"
       ]
      },
-     "execution_count": 82,
+     "execution_count": 115,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -725,7 +725,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 83,
+   "execution_count": 116,
    "metadata": {},
    "outputs": [
     {
@@ -742,7 +742,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 83,
+     "execution_count": 116,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -754,7 +754,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 84,
+   "execution_count": 117,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -764,7 +764,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 85,
+   "execution_count": 118,
    "metadata": {},
    "outputs": [
     {
@@ -781,7 +781,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 85,
+     "execution_count": 118,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -792,7 +792,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 86,
+   "execution_count": 119,
    "metadata": {},
    "outputs": [
     {
@@ -801,7 +801,7 @@
        "(392, 8)"
       ]
      },
-     "execution_count": 86,
+     "execution_count": 119,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -820,7 +820,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 87,
+   "execution_count": 120,
    "metadata": {},
    "outputs": [
     {
@@ -834,7 +834,7 @@
        "Name: cylinders, dtype: int64"
       ]
      },
-     "execution_count": 87,
+     "execution_count": 120,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -858,7 +858,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 88,
+   "execution_count": 121,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -869,7 +869,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 89,
+   "execution_count": 122,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -880,7 +880,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 90,
+   "execution_count": 123,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -899,7 +899,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 91,
+   "execution_count": 124,
    "metadata": {},
    "outputs": [
     {
@@ -908,7 +908,7 @@
        "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
       ]
      },
-     "execution_count": 91,
+     "execution_count": 124,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -945,16 +945,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 92,
+   "execution_count": 125,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.8083692109045223"
+       "0.8121425247115885"
       ]
      },
-     "execution_count": 92,
+     "execution_count": 125,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -981,16 +981,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 93,
+   "execution_count": 126,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.8050966794310453"
+       "0.7958650048990128"
       ]
      },
-     "execution_count": 93,
+     "execution_count": 126,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1012,7 +1012,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 94,
+   "execution_count": 127,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1032,7 +1032,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 95,
+   "execution_count": 128,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1050,7 +1050,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 96,
+   "execution_count": 129,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1067,19 +1067,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 109,
+   "execution_count": 130,
    "metadata": {},
    "outputs": [
     {
-     "ename": "TypeError",
-     "evalue": "'tuple' object is not callable",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-109-07108a4dbafb>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mauto_model09\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train09\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train09\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0my_train_predict09\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mauto_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train09\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mr2_score\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0my_train09\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train_predict09\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;31mTypeError\u001b[0m: 'tuple' object is not callable"
-     ]
+     "data": {
+      "text/plain": [
+       "0.8043919746446471"
+      ]
+     },
+     "execution_count": 130,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -1098,19 +1097,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 108,
+   "execution_count": 131,
    "metadata": {},
    "outputs": [
     {
-     "ename": "TypeError",
-     "evalue": "'tuple' object is not callable",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-108-c713fd8a380b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Your code here:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0my_test_predict09\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mauto_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test09\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mr2_score\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0my_test09\u001b[0m \u001b[0;34m,\u001b[0m \u001b[0my_test_predict09\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;31mTypeError\u001b[0m: 'tuple' object is not callable"
-     ]
+     "data": {
+      "text/plain": [
+       "0.8275315374679679"
+      ]
+     },
+     "execution_count": 131,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [