diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 0102ef9..96f55a6 100755
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,11 +12,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
- "# Import your libraries:\n"
+ "# Import your libraries:\n",
+ "import pandas as pd"
]
},
{
@@ -37,11 +38,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "from sklearn import datasets\n",
+ "diabetes = datasets.load_diabetes()"
]
},
{
@@ -53,11 +56,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "diabetes.keys()"
]
},
{
@@ -73,13 +88,80 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {
"scrolled": false
},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ ".. _diabetes_dataset:\n",
+ "\n",
+ "Diabetes dataset\n",
+ "----------------\n",
+ "\n",
+ "Ten baseline variables, age, sex, body mass index, average blood\n",
+ "pressure, and six blood serum measurements were obtained for each of n =\n",
+ "442 diabetes patients, as well as the response of interest, a\n",
+ "quantitative measure of disease progression one year after baseline.\n",
+ "\n",
+ "**Data Set Characteristics:**\n",
+ "\n",
+ " :Number of Instances: 442\n",
+ "\n",
+ " :Number of Attributes: First 10 columns are numeric predictive values\n",
+ "\n",
+ " :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n",
+ "\n",
+ " :Attribute Information:\n",
+ " - age age in years\n",
+ " - sex\n",
+ " - bmi body mass index\n",
+ " - bp average blood pressure\n",
+ " - s1 tc, T-Cells (a type of white blood cells)\n",
+ " - s2 ldl, low-density lipoproteins\n",
+ " - s3 hdl, high-density lipoproteins\n",
+ " - s4 tch, thyroid stimulating hormone\n",
+ " - s5 ltg, lamotrigine\n",
+ " - s6 glu, blood sugar level\n",
+ "\n",
+ "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n",
+ "\n",
+ "Source URL:\n",
+ "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n",
+ "\n",
+ "For more information see:\n",
+ "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n",
+ "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(442, 10)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(442,)"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "print(diabetes['DESCR'])\n",
+ "display(diabetes['data'].shape)\n",
+ "diabetes['target'].shape"
]
},
{
@@ -97,11 +179,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
- "# Enter your answer here:\n"
+ "# Enter your answer here:\n",
+ "# There are 10 attributes in the data\n",
+ "# The data seems to be the conditions for the patient and the target the measure of the disease\n",
+ "# They both have 442 rows / values"
]
},
{
@@ -115,11 +200,33 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(442, 10)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(442,)"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "display(diabetes['data'].shape)\n",
+ "diabetes['target'].shape"
]
},
{
@@ -156,11 +263,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "from sklearn import linear_model"
]
},
{
@@ -172,11 +280,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "diabetes_model=linear_model.LinearRegression()"
]
},
{
@@ -190,11 +299,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "X_train,X_test,y_train,y_test=train_test_split(diabetes['data'],diabetes['target'],test_size=0.2,\n",
+ " random_state=42)"
]
},
{
@@ -206,11 +318,36 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "151.3456553477407"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([ 37.90031426, -241.96624835, 542.42575342, 347.70830529,\n",
+ " -931.46126093, 518.04405547, 163.40353476, 275.31003837,\n",
+ " 736.18909839, 48.67112488])"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "diabetes_model.fit(X_train,y_train)\n",
+ "display(diabetes_model.intercept_)\n",
+ "diabetes_model.coef_"
]
},
{
@@ -231,11 +368,46 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([139.5483133 , 179.52030578, 134.04133298, 291.41193598,\n",
+ " 123.78723656, 92.17357677, 258.23409704, 181.33895238,\n",
+ " 90.22217862, 108.63143298, 94.13938654, 168.43379636,\n",
+ " 53.50669663, 206.63040068, 100.13238561, 130.66881649,\n",
+ " 219.53270758, 250.78291772, 196.36682356, 218.57497401,\n",
+ " 207.35002447, 88.48361667, 70.43428801, 188.95725301,\n",
+ " 154.88720039, 159.35957695, 188.31587948, 180.38835506,\n",
+ " 47.98988446, 108.97514644, 174.78080029, 86.36598906,\n",
+ " 132.95890535, 184.5410226 , 173.83298051, 190.35863287,\n",
+ " 124.41740796, 119.65426903, 147.95402494, 59.05311211,\n",
+ " 71.62636914, 107.68722902, 165.45544477, 155.00784964,\n",
+ " 171.04558668, 61.45763075, 71.66975626, 114.96330486,\n",
+ " 51.57808027, 167.57781958, 152.52505798, 62.95827693,\n",
+ " 103.49862017, 109.20495627, 175.63844013, 154.60247734,\n",
+ " 94.41476124, 210.74244148, 120.25601864, 77.61590087,\n",
+ " 187.93503183, 206.49543321, 140.63018684, 105.59463059,\n",
+ " 130.704246 , 202.18650868, 171.1330116 , 164.91246096,\n",
+ " 124.72637597, 144.81210187, 181.99631481, 199.41234515,\n",
+ " 234.21402489, 145.96053305, 79.86349114, 157.36828831,\n",
+ " 192.74737754, 208.8980067 , 158.58505486, 206.0226849 ,\n",
+ " 107.47978402, 140.93428553, 54.81856678, 55.92807758,\n",
+ " 115.00974554, 78.95886675, 81.55731377, 54.3774778 ,\n",
+ " 166.25477778])"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "y_pred=diabetes_model.predict(X_test)\n",
+ "y_pred"
]
},
{
@@ -247,11 +419,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([219., 70., 202., 230., 111., 84., 242., 272., 94., 96., 94.,\n",
+ " 252., 99., 297., 135., 67., 295., 264., 170., 275., 310., 64.,\n",
+ " 128., 232., 129., 118., 263., 77., 48., 107., 140., 113., 90.,\n",
+ " 164., 180., 233., 42., 84., 172., 63., 48., 108., 156., 168.,\n",
+ " 90., 52., 200., 87., 90., 258., 136., 158., 69., 72., 171.,\n",
+ " 95., 72., 151., 168., 60., 122., 52., 187., 102., 214., 248.,\n",
+ " 181., 110., 140., 202., 101., 222., 281., 61., 89., 91., 186.,\n",
+ " 220., 237., 233., 68., 190., 96., 72., 153., 98., 37., 63.,\n",
+ " 184.])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "0.45260660216173787"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "display(y_test)\n",
+ "from sklearn.metrics import r2_score\n",
+ "r2_score(y_test,y_pred)"
]
},
{
@@ -263,11 +466,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
- "# Your explanation here:\n"
+ "# Your explanation here:\n",
+ "# It is not exactly the same, actually they are far apart according to the R^2 test"
]
},
{
@@ -302,11 +506,147 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "OLS Regression Results\n",
+ "\n",
+ " | Dep. Variable: | y | R-squared: | 0.528 | \n",
+ "
\n",
+ "\n",
+ " | Model: | OLS | Adj. R-squared: | 0.514 | \n",
+ "
\n",
+ "\n",
+ " | Method: | Least Squares | F-statistic: | 38.25 | \n",
+ "
\n",
+ "\n",
+ " | Date: | Mon, 11 Oct 2021 | Prob (F-statistic): | 5.41e-50 | \n",
+ "
\n",
+ "\n",
+ " | Time: | 16:17:23 | Log-Likelihood: | -1906.1 | \n",
+ "
\n",
+ "\n",
+ " | No. Observations: | 353 | AIC: | 3834. | \n",
+ "
\n",
+ "\n",
+ " | Df Residuals: | 342 | BIC: | 3877. | \n",
+ "
\n",
+ "\n",
+ " | Df Model: | 10 | | | \n",
+ "
\n",
+ "\n",
+ " | Covariance Type: | nonrobust | | | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
+ "
\n",
+ "\n",
+ " | const | 151.3457 | 2.902 | 52.155 | 0.000 | 145.638 | 157.053 | \n",
+ "
\n",
+ "\n",
+ " | x1 | 37.9003 | 69.056 | 0.549 | 0.583 | -97.927 | 173.728 | \n",
+ "
\n",
+ "\n",
+ " | x2 | -241.9662 | 68.570 | -3.529 | 0.000 | -376.837 | -107.095 | \n",
+ "
\n",
+ "\n",
+ " | x3 | 542.4258 | 76.956 | 7.049 | 0.000 | 391.059 | 693.792 | \n",
+ "
\n",
+ "\n",
+ " | x4 | 347.7083 | 71.357 | 4.873 | 0.000 | 207.355 | 488.062 | \n",
+ "
\n",
+ "\n",
+ " | x5 | -931.4613 | 451.142 | -2.065 | 0.040 | -1818.823 | -44.099 | \n",
+ "
\n",
+ "\n",
+ " | x6 | 518.0441 | 364.118 | 1.423 | 0.156 | -198.150 | 1234.238 | \n",
+ "
\n",
+ "\n",
+ " | x7 | 163.4035 | 233.015 | 0.701 | 0.484 | -294.919 | 621.726 | \n",
+ "
\n",
+ "\n",
+ " | x8 | 275.3100 | 185.399 | 1.485 | 0.138 | -89.357 | 639.977 | \n",
+ "
\n",
+ "\n",
+ " | x9 | 736.1891 | 192.440 | 3.826 | 0.000 | 357.675 | 1114.704 | \n",
+ "
\n",
+ "\n",
+ " | x10 | 48.6711 | 73.435 | 0.663 | 0.508 | -95.771 | 193.113 | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " | Omnibus: | 1.457 | Durbin-Watson: | 1.794 | \n",
+ "
\n",
+ "\n",
+ " | Prob(Omnibus): | 0.483 | Jarque-Bera (JB): | 1.412 | \n",
+ "
\n",
+ "\n",
+ " | Skew: | 0.064 | Prob(JB): | 0.494 | \n",
+ "
\n",
+ "\n",
+ " | Kurtosis: | 2.718 | Cond. No. | 219. | \n",
+ "
\n",
+ "
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
+ ],
+ "text/plain": [
+ "\n",
+ "\"\"\"\n",
+ " OLS Regression Results \n",
+ "==============================================================================\n",
+ "Dep. Variable: y R-squared: 0.528\n",
+ "Model: OLS Adj. R-squared: 0.514\n",
+ "Method: Least Squares F-statistic: 38.25\n",
+ "Date: Mon, 11 Oct 2021 Prob (F-statistic): 5.41e-50\n",
+ "Time: 16:17:23 Log-Likelihood: -1906.1\n",
+ "No. Observations: 353 AIC: 3834.\n",
+ "Df Residuals: 342 BIC: 3877.\n",
+ "Df Model: 10 \n",
+ "Covariance Type: nonrobust \n",
+ "==============================================================================\n",
+ " coef std err t P>|t| [0.025 0.975]\n",
+ "------------------------------------------------------------------------------\n",
+ "const 151.3457 2.902 52.155 0.000 145.638 157.053\n",
+ "x1 37.9003 69.056 0.549 0.583 -97.927 173.728\n",
+ "x2 -241.9662 68.570 -3.529 0.000 -376.837 -107.095\n",
+ "x3 542.4258 76.956 7.049 0.000 391.059 693.792\n",
+ "x4 347.7083 71.357 4.873 0.000 207.355 488.062\n",
+ "x5 -931.4613 451.142 -2.065 0.040 -1818.823 -44.099\n",
+ "x6 518.0441 364.118 1.423 0.156 -198.150 1234.238\n",
+ "x7 163.4035 233.015 0.701 0.484 -294.919 621.726\n",
+ "x8 275.3100 185.399 1.485 0.138 -89.357 639.977\n",
+ "x9 736.1891 192.440 3.826 0.000 357.675 1114.704\n",
+ "x10 48.6711 73.435 0.663 0.508 -95.771 193.113\n",
+ "==============================================================================\n",
+ "Omnibus: 1.457 Durbin-Watson: 1.794\n",
+ "Prob(Omnibus): 0.483 Jarque-Bera (JB): 1.412\n",
+ "Skew: 0.064 Prob(JB): 0.494\n",
+ "Kurtosis: 2.718 Cond. No. 219.\n",
+ "==============================================================================\n",
+ "\n",
+ "Notes:\n",
+ "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
+ "\"\"\""
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "import statsmodels.api as sm\n",
+ "data_train=sm.add_constant(X_train)\n",
+ "model = sm.OLS(y_train,data_train)\n",
+ "res = model.fit()\n",
+ "res.summary()"
]
},
{
@@ -326,11 +666,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
- "# Your answers here:"
+ "# Your answers here:\n",
+ "# The F score was 5.41e-50, therefore the null hypothesis was rejected\n",
+ "# Yes, x2,x3,x4 and x9, they have no significance in the model, probably not independent\n",
+ "# I would remove some of those variables maybe even all of them"
]
},
{
@@ -351,11 +694,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "auto=pd.read_csv('../auto-mpg.csv')"
]
},
{
@@ -367,11 +711,124 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " mpg | \n",
+ " cylinders | \n",
+ " displacement | \n",
+ " horse_power | \n",
+ " weight | \n",
+ " acceleration | \n",
+ " model_year | \n",
+ " car_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 18.0 | \n",
+ " 8 | \n",
+ " 307.0 | \n",
+ " 130.0 | \n",
+ " 3504 | \n",
+ " 12.0 | \n",
+ " 70 | \n",
+ " \\t\"chevrolet chevelle malibu\" | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 15.0 | \n",
+ " 8 | \n",
+ " 350.0 | \n",
+ " 165.0 | \n",
+ " 3693 | \n",
+ " 11.5 | \n",
+ " 70 | \n",
+ " \\t\"buick skylark 320\" | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 18.0 | \n",
+ " 8 | \n",
+ " 318.0 | \n",
+ " 150.0 | \n",
+ " 3436 | \n",
+ " 11.0 | \n",
+ " 70 | \n",
+ " \\t\"plymouth satellite\" | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 16.0 | \n",
+ " 8 | \n",
+ " 304.0 | \n",
+ " 150.0 | \n",
+ " 3433 | \n",
+ " 12.0 | \n",
+ " 70 | \n",
+ " \\t\"amc rebel sst\" | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 17.0 | \n",
+ " 8 | \n",
+ " 302.0 | \n",
+ " 140.0 | \n",
+ " 3449 | \n",
+ " 10.5 | \n",
+ " 70 | \n",
+ " \\t\"ford torino\" | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " mpg cylinders displacement horse_power weight acceleration \\\n",
+ "0 18.0 8 307.0 130.0 3504 12.0 \n",
+ "1 15.0 8 350.0 165.0 3693 11.5 \n",
+ "2 18.0 8 318.0 150.0 3436 11.0 \n",
+ "3 16.0 8 304.0 150.0 3433 12.0 \n",
+ "4 17.0 8 302.0 140.0 3449 10.5 \n",
+ "\n",
+ " model_year car_name \n",
+ "0 70 \\t\"chevrolet chevelle malibu\" \n",
+ "1 70 \\t\"buick skylark 320\" \n",
+ "2 70 \\t\"plymouth satellite\" \n",
+ "3 70 \\t\"amc rebel sst\" \n",
+ "4 70 \\t\"ford torino\" "
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "auto.head(5)"
]
},
{
@@ -383,11 +840,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 398 entries, 0 to 397\n",
+ "Data columns (total 8 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 mpg 398 non-null float64\n",
+ " 1 cylinders 398 non-null int64 \n",
+ " 2 displacement 398 non-null float64\n",
+ " 3 horse_power 392 non-null float64\n",
+ " 4 weight 398 non-null int64 \n",
+ " 5 acceleration 398 non-null float64\n",
+ " 6 model_year 398 non-null int64 \n",
+ " 7 car_name 398 non-null object \n",
+ "dtypes: float64(4), int64(3), object(1)\n",
+ "memory usage: 25.0+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "auto.info()"
]
},
{
@@ -399,11 +879,33 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "82"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "70"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "display(auto.model_year.max())\n",
+ "auto.model_year.min()"
]
},
{
@@ -415,11 +917,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "mpg 0\n",
+ "cylinders 0\n",
+ "displacement 0\n",
+ "horse_power 6\n",
+ "weight 0\n",
+ "acceleration 0\n",
+ "model_year 0\n",
+ "car_name 0\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "display(auto.isna().sum())\n",
+ "auto=auto.dropna()"
]
},
{
@@ -431,11 +953,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n"
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4 199\n",
+ "8 103\n",
+ "6 83\n",
+ "3 4\n",
+ "5 3\n",
+ "Name: cylinders, dtype: int64"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "auto.cylinders.value_counts()"
]
},
{
@@ -451,11 +990,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "X_train,X_test,y_train,y_test=train_test_split(auto.drop(['car_name','mpg'],axis=1),\n",
+ " auto['mpg'],test_size=0.2,random_state=42)"
]
},
{
@@ -469,11 +1010,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "LinearRegression()"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "auto_model=linear_model.LinearRegression()\n",
+ "auto_model.fit(X_train,y_train)"
]
},
{
@@ -502,11 +1056,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.8107227953093896"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "y_pred=auto_model.predict(X_train)\n",
+ "r2_score(y_train,y_pred)"
]
},
{
@@ -522,11 +1089,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 25,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.7942349075428593"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "y_test_pred=auto_model.predict(X_test)\n",
+ "r2_score(y_test,y_test_pred)"
]
},
{
@@ -551,11 +1131,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "X_train09,X_test09,y_train09,y_test09=train_test_split(auto.drop(['car_name','mpg'],axis=1),\n",
+ " auto['mpg'],test_size=0.1,random_state=42)"
]
},
{
@@ -567,11 +1149,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 27,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "LinearRegression()"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "auto_model09=linear_model.LinearRegression()\n",
+ "auto_model09.fit(X_train09,y_train09)"
]
},
{
@@ -583,11 +1178,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 28,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.8468911998183242"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "y_pred09=auto_model09.predict(X_test09)\n",
+ "r2_score(y_test09,y_pred09)"
]
},
{
@@ -599,11 +1207,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 29,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.8047940166959004"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "y_test_pred09=auto_model09.predict(X_train09)\n",
+ "r2_score(y_train09,y_test_pred09)"
]
},
{
@@ -619,7 +1240,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
@@ -635,11 +1256,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "selector=RFE(auto_model,n_features_to_select=3)"
]
},
{
@@ -651,11 +1273,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 32,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([1, 4, 3, 2, 1, 1])"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "selector.fit(X_train,y_train)\n",
+ "selector.ranking_"
]
},
{
@@ -669,11 +1304,15 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "X_train_reduced,X_test_reduced,y_train_reduced,y_test_reduced=train_test_split(auto[['cylinders',\n",
+ " 'acceleration',\n",
+ " 'model_year']],\n",
+ " auto['mpg'],test_size=0.2,random_state=42)"
]
},
{
@@ -685,11 +1324,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 34,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.7144839092209849"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here: \n"
+ "# Your code here: \n",
+ "auto_model_reduced=linear_model.LinearRegression()\n",
+ "auto_model_reduced.fit(X_train_reduced,y_train_reduced)\n",
+ "y_pred_reduced=auto_model_reduced.predict(X_train_reduced)\n",
+ "r2_score(y_train_reduced,y_pred_reduced)"
]
},
{
@@ -726,7 +1380,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.6"
+ "version": "3.8.8"
}
},
"nbformat": 4,