From 86ca65dd29930ee511e22733be47cacb2c17214f Mon Sep 17 00:00:00 2001
From: Caio Nunez <caio.nunez@gmail.com>
Date: Wed, 8 Mar 2023 14:34:13 +0000
Subject: [PATCH] supervisedsklearn

---
 your-code/main.ipynb | 775 +++++++++++++++++++++++++++++++++++++------
 1 file changed, 671 insertions(+), 104 deletions(-)

diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 8a9fa9e..059813a 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,11 +12,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Import your libraries:\n"
+    "# Import your libraries:\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "from sklearn.model_selection import train_test_split"
    ]
   },
   {
@@ -37,11 +42,95 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,\n",
+       "          0.01990842, -0.01764613],\n",
+       "        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,\n",
+       "         -0.06832974, -0.09220405],\n",
+       "        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,\n",
+       "          0.00286377, -0.02593034],\n",
+       "        ...,\n",
+       "        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,\n",
+       "         -0.04687948,  0.01549073],\n",
+       "        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,\n",
+       "          0.04452837, -0.02593034],\n",
+       "        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,\n",
+       "         -0.00421986,  0.00306441]]),\n",
+       " 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,\n",
+       "         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,\n",
+       "         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,\n",
+       "         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,\n",
+       "        259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,\n",
+       "        128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,\n",
+       "        150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,\n",
+       "        200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,\n",
+       "         42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,\n",
+       "         83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,\n",
+       "        104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,\n",
+       "        173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,\n",
+       "        107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,\n",
+       "         60., 174., 259., 178., 128.,  96., 126., 288.,  88., 292.,  71.,\n",
+       "        197., 186.,  25.,  84.,  96., 195.,  53., 217., 172., 131., 214.,\n",
+       "         59.,  70., 220., 268., 152.,  47.,  74., 295., 101., 151., 127.,\n",
+       "        237., 225.,  81., 151., 107.,  64., 138., 185., 265., 101., 137.,\n",
+       "        143., 141.,  79., 292., 178.,  91., 116.,  86., 122.,  72., 129.,\n",
+       "        142.,  90., 158.,  39., 196., 222., 277.,  99., 196., 202., 155.,\n",
+       "         77., 191.,  70.,  73.,  49.,  65., 263., 248., 296., 214., 185.,\n",
+       "         78.,  93., 252., 150.,  77., 208.,  77., 108., 160.,  53., 220.,\n",
+       "        154., 259.,  90., 246., 124.,  67.,  72., 257., 262., 275., 177.,\n",
+       "         71.,  47., 187., 125.,  78.,  51., 258., 215., 303., 243.,  91.,\n",
+       "        150., 310., 153., 346.,  63.,  89.,  50.,  39., 103., 308., 116.,\n",
+       "        145.,  74.,  45., 115., 264.,  87., 202., 127., 182., 241.,  66.,\n",
+       "         94., 283.,  64., 102., 200., 265.,  94., 230., 181., 156., 233.,\n",
+       "         60., 219.,  80.,  68., 332., 248.,  84., 200.,  55.,  85.,  89.,\n",
+       "         31., 129.,  83., 275.,  65., 198., 236., 253., 124.,  44., 172.,\n",
+       "        114., 142., 109., 180., 144., 163., 147.,  97., 220., 190., 109.,\n",
+       "        191., 122., 230., 242., 248., 249., 192., 131., 237.,  78., 135.,\n",
+       "        244., 199., 270., 164.,  72.,  96., 306.,  91., 214.,  95., 216.,\n",
+       "        263., 178., 113., 200., 139., 139.,  88., 148.,  88., 243.,  71.,\n",
+       "         77., 109., 272.,  60.,  54., 221.,  90., 311., 281., 182., 321.,\n",
+       "         58., 262., 206., 233., 242., 123., 167.,  63., 197.,  71., 168.,\n",
+       "        140., 217., 121., 235., 245.,  40.,  52., 104., 132.,  88.,  69.,\n",
+       "        219.,  72., 201., 110.,  51., 277.,  63., 118.,  69., 273., 258.,\n",
+       "         43., 198., 242., 232., 175.,  93., 168., 275., 293., 281.,  72.,\n",
+       "        140., 189., 181., 209., 136., 261., 113., 131., 174., 257.,  55.,\n",
+       "         84.,  42., 146., 212., 233.,  91., 111., 152., 120.,  67., 310.,\n",
+       "         94., 183.,  66., 173.,  72.,  49.,  64.,  48., 178., 104., 132.,\n",
+       "        220.,  57.]),\n",
+       " 'frame': None,\n",
+       " 'DESCR': '.. _diabetes_dataset:\\n\\nDiabetes dataset\\n----------------\\n\\nTen baseline variables, age, sex, body mass index, average blood\\npressure, and six blood serum measurements were obtained for each of n =\\n442 diabetes patients, as well as the response of interest, a\\nquantitative measure of disease progression one year after baseline.\\n\\n**Data Set Characteristics:**\\n\\n  :Number of Instances: 442\\n\\n  :Number of Attributes: First 10 columns are numeric predictive values\\n\\n  :Target: Column 11 is a quantitative measure of disease progression one year after baseline\\n\\n  :Attribute Information:\\n      - age     age in years\\n      - sex\\n      - bmi     body mass index\\n      - bp      average blood pressure\\n      - s1      tc, total serum cholesterol\\n      - s2      ldl, low-density lipoproteins\\n      - s3      hdl, high-density lipoproteins\\n      - s4      tch, total cholesterol / HDL\\n      - s5      ltg, possibly log of serum triglycerides level\\n      - s6      glu, blood sugar level\\n\\nNote: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\\n\\nSource URL:\\nhttps://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\\n\\nFor more information see:\\nBradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\\n(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)',\n",
+       " 'feature_names': ['age',\n",
+       "  'sex',\n",
+       "  'bmi',\n",
+       "  'bp',\n",
+       "  's1',\n",
+       "  's2',\n",
+       "  's3',\n",
+       "  's4',\n",
+       "  's5',\n",
+       "  's6'],\n",
+       " 'data_filename': 'diabetes_data.csv.gz',\n",
+       " 'target_filename': 'diabetes_target.csv.gz',\n",
+       " 'data_module': 'sklearn.datasets.data'}"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "from sklearn.datasets import load_diabetes\n",
+    "\n",
+    "diabetes = load_diabetes()\n",
+    "diabetes"
    ]
   },
   {
@@ -53,11 +142,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename', 'data_module'])"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "diabetes.keys()"
    ]
   },
   {
@@ -73,13 +175,60 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {
     "scrolled": false
    },
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ".. _diabetes_dataset:\n",
+      "\n",
+      "Diabetes dataset\n",
+      "----------------\n",
+      "\n",
+      "Ten baseline variables, age, sex, body mass index, average blood\n",
+      "pressure, and six blood serum measurements were obtained for each of n =\n",
+      "442 diabetes patients, as well as the response of interest, a\n",
+      "quantitative measure of disease progression one year after baseline.\n",
+      "\n",
+      "**Data Set Characteristics:**\n",
+      "\n",
+      "  :Number of Instances: 442\n",
+      "\n",
+      "  :Number of Attributes: First 10 columns are numeric predictive values\n",
+      "\n",
+      "  :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n",
+      "\n",
+      "  :Attribute Information:\n",
+      "      - age     age in years\n",
+      "      - sex\n",
+      "      - bmi     body mass index\n",
+      "      - bp      average blood pressure\n",
+      "      - s1      tc, total serum cholesterol\n",
+      "      - s2      ldl, low-density lipoproteins\n",
+      "      - s3      hdl, high-density lipoproteins\n",
+      "      - s4      tch, total cholesterol / HDL\n",
+      "      - s5      ltg, possibly log of serum triglycerides level\n",
+      "      - s6      glu, blood sugar level\n",
+      "\n",
+      "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n",
+      "\n",
+      "Source URL:\n",
+      "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n",
+      "\n",
+      "For more information see:\n",
+      "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n",
+      "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "print(diabetes.DESCR)"
    ]
   },
   {
@@ -101,7 +250,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Enter your answer here:\n"
+    "# Enter your answer here:\n",
+    "\n",
+    "\"\"\" \n",
+    "\n",
+    "1. There are 10 attributes. Numeric predictive values in the first 10 columns\n",
+    "2. Diabetes[data] is the set of attributes/features that we will use to predict diabetes[target], which is the actual\n",
+    "measure that we want to predict/evaluate with the model\n",
+    "3. There are 442 records in the data\n",
+    "\n",
+    "\"\"\""
    ]
   },
   {
@@ -115,11 +273,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(442, 10)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "diabetes[\"data\"].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(442,)"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "diabetes[\"target\"].shape\n",
+    "\n",
+    "## They match"
    ]
   },
   {
@@ -156,11 +349,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "from sklearn.linear_model import LinearRegression"
    ]
   },
   {
@@ -172,11 +367,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "diabetes_model = LinearRegression()"
    ]
   },
   {
@@ -190,11 +387,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "features = diabetes[\"data\"]\n",
+    "target = diabetes[\"target\"]\n",
+    "\n",
+    "diabetes_data_train, diabetes_data_test, diabetes_target_train, diabetes_target_test = train_test_split(features, target, test_size = 0.20)"
    ]
   },
   {
@@ -206,11 +408,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "152.55472220850726\n",
+      "[  17.54816161 -276.23079567  508.22184486  301.77211175 -340.77743688\n",
+      "  211.28056101 -179.85591454  -14.29758702  612.03383667  112.81057961]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "diabetes_model.fit(diabetes_data_train, diabetes_target_train)\n",
+    "print(diabetes_model.intercept_)\n",
+    "print(diabetes_model.coef_)"
    ]
   },
   {
@@ -231,11 +447,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "pred = diabetes_model.predict(diabetes_data_test)"
    ]
   },
   {
@@ -247,11 +465,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ 94.38335042 122.65198523 169.22244428 235.88003275 101.06403208\n",
+      " 144.19926493 189.58328191 166.18711692 166.31744813 257.98404066\n",
+      "  85.63857299 158.71997794 184.00440706 120.50358102 181.21935084\n",
+      " 193.19017561 171.24205217 104.23055219  59.98613892 187.56771049\n",
+      "  87.60721017 127.07332764 119.35812122 138.80008617 148.53422288\n",
+      " 162.60677441  61.56487255 123.86812274  93.48881537 181.11167804\n",
+      " 125.266734   251.93450268 158.41501333 209.23915245 247.30909879\n",
+      " 160.77681226 269.63146777 219.95202458 227.5199142  162.62826983\n",
+      "  88.42219866  65.51787117 247.7462447  105.70179774 123.26993379\n",
+      " 111.79889248 217.89107582  88.84501394 160.6125096  179.80776481\n",
+      " 168.12523594 136.01768364 126.84602099 222.51656033 137.25169426\n",
+      " 219.24454491 147.14584268  92.1163227  128.40451421 153.5556762\n",
+      "  95.64457192  80.10574998 163.84896896 160.81658332  70.97370019\n",
+      " 138.86415525 164.54804226  81.0605143   89.63805287 105.13815998\n",
+      " 105.53622297 128.77503404 225.89249705  71.32855345 131.12927936\n",
+      " 142.89122931 234.15161481 169.97723831  75.93654611 225.61601134\n",
+      " 188.15989464 104.78110262 147.88134061 126.65426    194.38772868\n",
+      "  55.20423087 125.00248632 114.92596064 179.0183498 ]\n",
+      "[125.  59. 151. 259.  97. 172. 129. 120. 206. 132. 114. 118. 229. 150.\n",
+      "  85. 222. 144.  87.  85. 170.  55. 139. 183. 103.  97. 151.  99. 168.\n",
+      " 115. 163.  78. 336. 237. 189. 341. 262. 303. 180. 173. 235.  53.  83.\n",
+      " 215. 102.  68.  61. 202.  54. 184. 140. 121.  59. 178. 275.  83. 279.\n",
+      " 115.  94. 148. 252. 137.  89. 252. 185. 128. 190. 258.  71. 170.  49.\n",
+      "  71. 135. 208.  59.  74. 202. 152. 141.  72. 295.  78. 128.  81. 127.\n",
+      " 164.  78.  63.  53.  66.]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "print(pred)\n",
+    "print(diabetes_target_test)"
    ]
   },
   {
@@ -267,7 +520,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your explanation here:\n"
+    "# Your explanation here:\n",
+    "\n",
+    "## No. Model is not good enough to predict with precision, although already manages to follow\n",
+    "## partially the trends of the target_test\n"
    ]
   },
   {
@@ -351,11 +607,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "auto = pd.read_csv(\"/Users/caionunez/Desktop/Ironhack/Week7/lab-supervised-learning-sklearn/auto-mpg.csv\")"
    ]
   },
   {
@@ -367,11 +625,125 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>mpg</th>\n",
+       "      <th>cylinders</th>\n",
+       "      <th>displacement</th>\n",
+       "      <th>horse_power</th>\n",
+       "      <th>weight</th>\n",
+       "      <th>acceleration</th>\n",
+       "      <th>model_year</th>\n",
+       "      <th>car_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>130.0</td>\n",
+       "      <td>3504</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"chevrolet chevelle malibu\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>15.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>350.0</td>\n",
+       "      <td>165.0</td>\n",
+       "      <td>3693</td>\n",
+       "      <td>11.5</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"buick skylark 320\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>18.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>318.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3436</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"plymouth satellite\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>16.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>304.0</td>\n",
+       "      <td>150.0</td>\n",
+       "      <td>3433</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"amc rebel sst\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>17.0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>302.0</td>\n",
+       "      <td>140.0</td>\n",
+       "      <td>3449</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>70</td>\n",
+       "      <td>\\t\"ford torino\"</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    mpg  cylinders  displacement  horse_power  weight  acceleration  \\\n",
+       "0  18.0          8         307.0        130.0    3504          12.0   \n",
+       "1  15.0          8         350.0        165.0    3693          11.5   \n",
+       "2  18.0          8         318.0        150.0    3436          11.0   \n",
+       "3  16.0          8         304.0        150.0    3433          12.0   \n",
+       "4  17.0          8         302.0        140.0    3449          10.5   \n",
+       "\n",
+       "   model_year                       car_name  \n",
+       "0          70  \\t\"chevrolet chevelle malibu\"  \n",
+       "1          70          \\t\"buick skylark 320\"  \n",
+       "2          70         \\t\"plymouth satellite\"  \n",
+       "3          70              \\t\"amc rebel sst\"  \n",
+       "4          70                \\t\"ford torino\"  "
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "auto.head()"
    ]
   },
   {
@@ -383,11 +755,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mpg             float64\n",
+       "cylinders         int64\n",
+       "displacement    float64\n",
+       "horse_power     float64\n",
+       "weight            int64\n",
+       "acceleration    float64\n",
+       "model_year        int64\n",
+       "car_name         object\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "auto.dtypes\n",
+    "\n",
+    "## Only object column is \"car name\" so we are good."
    ]
   },
   {
@@ -399,11 +794,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Newest model year 82\n",
+      "Oldest model year 70\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "print(\"Newest model year\", auto[\"model_year\"].max())\n",
+    "print(\"Oldest model year\", auto[\"model_year\"].min())"
    ]
   },
   {
@@ -415,11 +822,61 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mpg             0\n",
+       "cylinders       0\n",
+       "displacement    0\n",
+       "horse_power     6\n",
+       "weight          0\n",
+       "acceleration    0\n",
+       "model_year      0\n",
+       "car_name        0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "auto.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "mpg             0\n",
+       "cylinders       0\n",
+       "displacement    0\n",
+       "horse_power     0\n",
+       "weight          0\n",
+       "acceleration    0\n",
+       "model_year      0\n",
+       "car_name        0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "auto.dropna(inplace=True)\n",
+    "auto.isnull().sum()"
    ]
   },
   {
@@ -431,11 +888,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4    199\n",
+       "8    103\n",
+       "6     83\n",
+       "3      4\n",
+       "5      3\n",
+       "Name: cylinders, dtype: int64"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "auto[\"cylinders\"].value_counts()\n",
+    "\n",
+    "## 5 possible cylinders value"
    ]
   },
   {
@@ -451,11 +928,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "#auto.drop(columns='car_name', inplace=True, axis=1)\n",
+    "\n",
+    "features = auto.drop(columns = [\"mpg\"])\n",
+    "target = auto[\"mpg\"]\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.20)"
    ]
   },
   {
@@ -469,11 +953,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression()"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "auto_model = LinearRegression()\n",
+    "auto_model.fit(X_train, y_train)"
    ]
   },
   {
@@ -493,11 +991,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.7738352694130818\n",
+      "0.8151047252127355\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "print(auto_model.score(X_test, y_test))\n",
+    "\n",
+    "from sklearn.metrics import r2_score\n",
+    "\n",
+    "y_pred = auto_model.predict(X_train)\n",
+    "print(r2_score(y_train, y_pred))"
    ]
   },
   {
@@ -513,11 +1027,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.7738352694130818\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "## Already did that above I think, but anyway\n",
+    "\n",
+    "y_test_pred = auto_model.predict(X_test)\n",
+    "print(r2_score(y_test, y_test_pred))"
    ]
   },
   {
@@ -542,11 +1069,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 51,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "# Your code here:\n",
+    "\n",
+    "X_train09, X_test09, y_train09, y_test09 = train_test_split(features, target, test_size = 0.10)"
    ]
   },
   {
@@ -558,11 +1087,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LinearRegression()"
+      ]
+     },
+     "execution_count": 53,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "auto_model09 = LinearRegression()\n",
+    "auto_model09.fit(X_train09, y_train09)"
    ]
   },
   {
@@ -574,11 +1117,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.8137446885335688\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "y_pred09 = auto_model09.predict(X_train09)\n",
+    "print(r2_score(y_train09, y_pred09))"
    ]
   },
   {
@@ -590,11 +1144,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Your code here:\n"
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.7452575439654096\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "y_pred_test = auto_model09.predict(X_test09)\n",
+    "print(r2_score(y_test09, y_pred_test))\n",
+    "\n",
+    "## It didn't improve and I was expecting it to improve. Will try to review the code later on"
    ]
   },
   {
@@ -703,7 +1270,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -717,7 +1284,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.9"
+   "version": "3.9.13"
   }
  },
  "nbformat": 4,