From ab5fca01cf605769b85148fef3f540f0c17de9ed Mon Sep 17 00:00:00 2001
From: "juliafroch@gmail.com" <juliafroch@gmail.com>
Date: Fri, 10 Apr 2020 21:14:11 +0200
Subject: [PATCH] done

---
 .../your-code/main.ipynb                      | 688 +++++++++++++++---
 1 file changed, 603 insertions(+), 85 deletions(-)

diff --git a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
index 0102ef94..9a022c26 100644
--- a/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
+++ b/module-3/lab-supervised-learning-sklearn/your-code/main.ipynb
@@ -12,11 +12,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Import your libraries:\n"
+    "# Import your libraries:\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import sklearn\n",
+    "from sklearn import datasets"
    ]
   },
   {
@@ -37,11 +41,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes = sklearn.datasets.load_diabetes(return_X_y=False)"
    ]
   },
   {
@@ -53,11 +58,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['data', 'target', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes.keys()"
    ]
   },
   {
@@ -73,13 +90,59 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {
     "scrolled": false
    },
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ".. _diabetes_dataset:\n",
+      "\n",
+      "Diabetes dataset\n",
+      "----------------\n",
+      "\n",
+      "Ten baseline variables, age, sex, body mass index, average blood\n",
+      "pressure, and six blood serum measurements were obtained for each of n =\n",
+      "442 diabetes patients, as well as the response of interest, a\n",
+      "quantitative measure of disease progression one year after baseline.\n",
+      "\n",
+      "**Data Set Characteristics:**\n",
+      "\n",
+      "  :Number of Instances: 442\n",
+      "\n",
+      "  :Number of Attributes: First 10 columns are numeric predictive values\n",
+      "\n",
+      "  :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n",
+      "\n",
+      "  :Attribute Information:\n",
+      "      - Age\n",
+      "      - Sex\n",
+      "      - Body mass index\n",
+      "      - Average blood pressure\n",
+      "      - S1\n",
+      "      - S2\n",
+      "      - S3\n",
+      "      - S4\n",
+      "      - S5\n",
+      "      - S6\n",
+      "\n",
+      "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n",
+      "\n",
+      "Source URL:\n",
+      "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n",
+      "\n",
+      "For more information see:\n",
+      "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n",
+      "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "print(diabetes['DESCR'])"
    ]
   },
   {
@@ -101,7 +164,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Enter your answer here:\n"
+    "# Enter your answer here:\n",
+    "#1.There are 10 attributes that are demographic characteristics of each individual and also 6 blood test from each of them\n",
+    "#2. Diabetes['data'] are the attributes, the variables that determine the target variable which in this case is diabetes['target'] that is the progression of each patient\n",
+    "#3. There are 442 patients, and each of them has 10 attributes and the target variables, this makes a total number of records of 442x11=4862\n"
    ]
   },
   {
@@ -115,11 +181,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(442, 10)\n",
+      "(442,)\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "print(diabetes['data'].shape)\n",
+    "print(diabetes['target'].shape)"
    ]
   },
   {
@@ -156,11 +233,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "from sklearn.linear_model import LinearRegression"
    ]
   },
   {
@@ -172,11 +250,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes_model = linear_model.LinearRegression()"
    ]
   },
   {
@@ -190,11 +269,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes_data, diabetes_target = datasets.load_diabetes(return_X_y=True)\n",
+    "diabetes_data_train = diabetes_X[:-20]\n",
+    "diabetes_data_test = diabetes_X[-20:]\n",
+    "diabetes_target_train = diabetes_y[:-20]\n",
+    "diabetes_target_test = diabetes_y[-20:]"
    ]
   },
   {
@@ -206,11 +290,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "152.76430691633442\n",
+      "[ 3.03499549e-01 -2.37639315e+02  5.10530605e+02  3.27736980e+02\n",
+      " -8.14131709e+02  4.92814588e+02  1.02848452e+02  1.84606489e+02\n",
+      "  7.43519617e+02  7.60951722e+01]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "diabetes_model.fit(diabetes_data_train, diabetes_target_train)\n",
+    "\n",
+    "print(diabetes_model.intercept_)\n",
+    "print(diabetes_model.coef_)"
    ]
   },
   {
@@ -231,11 +330,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[197.61846908 155.43979328 172.88665147 111.53537279 164.80054784\n",
+      " 131.06954875 259.12237761 100.47935157 117.0601052  124.30503555\n",
+      " 218.36632793  61.19831284 132.25046751 120.3332925   52.54458691\n",
+      " 194.03798088 102.57139702 123.56604987 211.0346317   52.60335674]\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "diabetes_y_pred = diabetes_model.predict(diabetes_data_test)\n",
+    "print(diabetes_y_pred)"
    ]
   },
   {
@@ -247,11 +359,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 33,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[233.  91. 111. 152. 120.  67. 310.  94. 183.  66. 173.  72.  49.  64.\n",
+      "  48. 178. 104. 132. 220.  57.]\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "print(diabetes_target_test)"
    ]
   },
   {
@@ -267,7 +389,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your explanation here:\n"
+    "# Your explanation here:\n",
+    "#No, it is not the same because the target test are the real data solutions and the predictions are just done as approximations based on the traning data\n"
    ]
   },
   {
@@ -351,11 +474,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto = pd.read_csv('../auto-mpg.csv')"
    ]
   },
   {
@@ -367,11 +491,124 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mpg</th>\n",
+       "      <th>cylinders</th>\n",
+       "      <th>displacement</th>\n",
+       "      <th>horse_power</th>\n",
+       "      <th>weight</th>\n",
+       "      <th>acceleration</th>\n",
+       "      <th>model_year</th>\n",
+       "      <th>car_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>130.0</td>\n",
+       "      <td>3504</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"chevrolet chevelle malibu\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>15.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>350.0</td>\n",
+       "      <td>165.0</td>\n",
+       "      <td>3693</td>\n",
+       "      <td>11.5</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"buick skylark 320\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>318.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3436</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"plymouth satellite\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>16.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>304.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3433</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"amc rebel sst\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>17.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>302.0</td>\n",
+       "      <td>140.0</td>\n",
+       "      <td>3449</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"ford torino\"</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    mpg  cylinders  displacement  horse_power  weight  acceleration  \\\n",
+       "0  18.0          8         307.0        130.0    3504          12.0   \n",
+       "1  15.0          8         350.0        165.0    3693          11.5   \n",
+       "2  18.0          8         318.0        150.0    3436          11.0   \n",
+       "3  16.0          8         304.0        150.0    3433          12.0   \n",
+       "4  17.0          8         302.0        140.0    3449          10.5   \n",
+       "\n",
+       "   model_year                       car_name  \n",
+       "0          70  \\t\"chevrolet chevelle malibu\"  \n",
+       "1          70          \\t\"buick skylark 320\"  \n",
+       "2          70         \\t\"plymouth satellite\"  \n",
+       "3          70              \\t\"amc rebel sst\"  \n",
+       "4          70                \\t\"ford torino\"  "
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.head(5)"
    ]
   },
   {
@@ -383,11 +620,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 398 entries, 0 to 397\n",
+      "Data columns (total 8 columns):\n",
+      "mpg             398 non-null float64\n",
+      "cylinders       398 non-null int64\n",
+      "displacement    398 non-null float64\n",
+      "horse_power     392 non-null float64\n",
+      "weight          398 non-null int64\n",
+      "acceleration    398 non-null float64\n",
+      "model_year      398 non-null int64\n",
+      "car_name        398 non-null object\n",
+      "dtypes: float64(4), int64(3), object(1)\n",
+      "memory usage: 25.0+ KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.info()"
    ]
   },
   {
@@ -399,11 +657,155 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mpg</th>\n",
+       "      <th>cylinders</th>\n",
+       "      <th>displacement</th>\n",
+       "      <th>horse_power</th>\n",
+       "      <th>weight</th>\n",
+       "      <th>acceleration</th>\n",
+       "      <th>model_year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>398.000000</td>\n",
+       "      <td>398.000000</td>\n",
+       "      <td>398.000000</td>\n",
+       "      <td>392.000000</td>\n",
+       "      <td>398.000000</td>\n",
+       "      <td>398.000000</td>\n",
+       "      <td>398.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>23.514573</td>\n",
+       "      <td>5.454774</td>\n",
+       "      <td>193.425879</td>\n",
+       "      <td>104.469388</td>\n",
+       "      <td>2970.424623</td>\n",
+       "      <td>15.568090</td>\n",
+       "      <td>76.010050</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>7.815984</td>\n",
+       "      <td>1.701004</td>\n",
+       "      <td>104.269838</td>\n",
+       "      <td>38.491160</td>\n",
+       "      <td>846.841774</td>\n",
+       "      <td>2.757689</td>\n",
+       "      <td>3.697627</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>9.000000</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>68.000000</td>\n",
+       "      <td>46.000000</td>\n",
+       "      <td>1613.000000</td>\n",
+       "      <td>8.000000</td>\n",
+       "      <td>70.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>17.500000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>104.250000</td>\n",
+       "      <td>75.000000</td>\n",
+       "      <td>2223.750000</td>\n",
+       "      <td>13.825000</td>\n",
+       "      <td>73.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>23.000000</td>\n",
+       "      <td>4.000000</td>\n",
+       "      <td>148.500000</td>\n",
+       "      <td>93.500000</td>\n",
+       "      <td>2803.500000</td>\n",
+       "      <td>15.500000</td>\n",
+       "      <td>76.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>29.000000</td>\n",
+       "      <td>8.000000</td>\n",
+       "      <td>262.000000</td>\n",
+       "      <td>126.000000</td>\n",
+       "      <td>3608.000000</td>\n",
+       "      <td>17.175000</td>\n",
+       "      <td>79.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>46.600000</td>\n",
+       "      <td>8.000000</td>\n",
+       "      <td>455.000000</td>\n",
+       "      <td>230.000000</td>\n",
+       "      <td>5140.000000</td>\n",
+       "      <td>24.800000</td>\n",
+       "      <td>82.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "              mpg   cylinders  displacement  horse_power       weight  \\\n",
+       "count  398.000000  398.000000    398.000000   392.000000   398.000000   \n",
+       "mean    23.514573    5.454774    193.425879   104.469388  2970.424623   \n",
+       "std      7.815984    1.701004    104.269838    38.491160   846.841774   \n",
+       "min      9.000000    3.000000     68.000000    46.000000  1613.000000   \n",
+       "25%     17.500000    4.000000    104.250000    75.000000  2223.750000   \n",
+       "50%     23.000000    4.000000    148.500000    93.500000  2803.500000   \n",
+       "75%     29.000000    8.000000    262.000000   126.000000  3608.000000   \n",
+       "max     46.600000    8.000000    455.000000   230.000000  5140.000000   \n",
+       "\n",
+       "       acceleration  model_year  \n",
+       "count    398.000000  398.000000  \n",
+       "mean      15.568090   76.010050  \n",
+       "std        2.757689    3.697627  \n",
+       "min        8.000000   70.000000  \n",
+       "25%       13.825000   73.000000  \n",
+       "50%       15.500000   76.000000  \n",
+       "75%       17.175000   79.000000  \n",
+       "max       24.800000   82.000000  "
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "auto.describe()\n",
+    "#the newest model year is 82 and the oldest 70"
    ]
   },
   {
@@ -415,11 +817,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 47,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto.isnull().sum()\n",
+    "auto.dropna(inplace=True)"
    ]
   },
   {
@@ -431,11 +835,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 50,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([8, 4, 6, 3, 5])"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto['cylinders'].unique()"
    ]
   },
   {
@@ -455,7 +871,22 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "auto.drop('car_name', axis=1, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Your code here:\n",
+    "target = auto['mpg']\n",
+    "attributes = ['cylinders', 'displacement', 'horse_power', 'weight', 'acceleration']\n",
+    "data = auto[attributes]\n",
+    "\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "X_train, X_test, y_train, y_test = train_test_split(data, target, test_size = 0.2, random_state = 0)"
    ]
   },
   {
@@ -469,11 +900,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 61,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
+      ]
+     },
+     "execution_count": 61,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto_model = linear_model.LinearRegression()\n",
+    "auto_model.fit(X_train, y_train)"
    ]
   },
   {
@@ -502,11 +946,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7097139425798664"
+      ]
+     },
+     "execution_count": 66,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "y_pred = auto_model.predict(X_train)\n",
+    "\n",
+    "from sklearn.metrics import r2_score\n",
+    "r2_score(y_train, y_pred)"
    ]
   },
   {
@@ -522,11 +981,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.6942573567797339"
+      ]
+     },
+     "execution_count": 68,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "y_test_pred = auto_model.predict(X_test)\n",
+    "\n",
+    "r2_score(y_test, y_test_pred)"
    ]
   },
   {
@@ -551,11 +1024,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 69,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "X_train09, X_test09, y_train09, y_test09 = train_test_split(data, target, test_size = 0.1, random_state = 0)"
    ]
   },
   {
@@ -567,11 +1042,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 71,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
+      ]
+     },
+     "execution_count": 71,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "auto_model09 = linear_model.LinearRegression()\n",
+    "auto_model09.fit(X_train09, y_train09)"
    ]
   },
   {
@@ -583,11 +1071,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.711486921026971"
+      ]
+     },
+     "execution_count": 72,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "y_pred09 = auto_model.predict(X_train09)\n",
+    "\n",
+    "r2_score(y_train09, y_pred09)"
    ]
   },
   {
@@ -599,11 +1101,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.6607860679011375"
+      ]
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "y_test_pred09 = auto_model.predict(X_test09)\n",
+    "\n",
+    "r2_score(y_test09, y_test_pred09)\n",
+    "\n",
+    "#there's no improvement in the r squared"
    ]
   },
   {
@@ -726,7 +1244,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.6"
+   "version": "3.7.5"
   }
  },
  "nbformat": 4,