diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 0102ef9..96f55a6 100755 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,11 +12,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "# Import your libraries:\n" + "# Import your libraries:\n", + "import pandas as pd" ] }, { @@ -37,11 +38,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "from sklearn import datasets\n", + "diabetes = datasets.load_diabetes()" ] }, { @@ -53,11 +56,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename'])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "diabetes.keys()" ] }, { @@ -73,13 +88,80 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "scrolled": false }, - "outputs": [], - "source": [ - "# Your code here:\n" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ".. _diabetes_dataset:\n", + "\n", + "Diabetes dataset\n", + "----------------\n", + "\n", + "Ten baseline variables, age, sex, body mass index, average blood\n", + "pressure, and six blood serum measurements were obtained for each of n =\n", + "442 diabetes patients, as well as the response of interest, a\n", + "quantitative measure of disease progression one year after baseline.\n", + "\n", + "**Data Set Characteristics:**\n", + "\n", + " :Number of Instances: 442\n", + "\n", + " :Number of Attributes: First 10 columns are numeric predictive values\n", + "\n", + " :Target: Column 11 is a quantitative measure of disease progression one year after baseline\n", + "\n", + " :Attribute Information:\n", + " - age age in years\n", + " - sex\n", + " - bmi body mass index\n", + " - bp average blood pressure\n", + " - s1 tc, T-Cells (a type of white blood cells)\n", + " - s2 ldl, low-density lipoproteins\n", + " - s3 hdl, high-density lipoproteins\n", + " - s4 tch, thyroid stimulating hormone\n", + " - s5 ltg, lamotrigine\n", + " - s6 glu, blood sugar level\n", + "\n", + "Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).\n", + "\n", + "Source URL:\n", + "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.html\n", + "\n", + "For more information see:\n", + "Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) \"Least Angle Regression,\" Annals of Statistics (with discussion), 407-499.\n", + "(https://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)\n" + ] + }, + { + "data": { + "text/plain": [ + "(442, 10)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "(442,)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "print(diabetes['DESCR'])\n", + "display(diabetes['data'].shape)\n", + "diabetes['target'].shape" ] }, { @@ -97,11 +179,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "# Enter your answer here:\n" + "# Enter your answer here:\n", + "# There are 10 attributes in the data\n", + "# The data seems to be the conditions for the patient and the target the measure of the disease\n", + "# They both have 442 rows / values" ] }, { @@ -115,11 +200,33 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(442, 10)" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "(442,)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "display(diabetes['data'].shape)\n", + "diabetes['target'].shape" ] }, { @@ -156,11 +263,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "from sklearn import linear_model" ] }, { @@ -172,11 +280,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "diabetes_model=linear_model.LinearRegression()" ] }, { @@ -190,11 +299,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "from sklearn.model_selection import train_test_split\n", + "X_train,X_test,y_train,y_test=train_test_split(diabetes['data'],diabetes['target'],test_size=0.2,\n", + " random_state=42)" ] }, { @@ -206,11 +318,36 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "151.3456553477407" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "array([ 37.90031426, -241.96624835, 542.42575342, 347.70830529,\n", + " -931.46126093, 518.04405547, 163.40353476, 275.31003837,\n", + " 736.18909839, 48.67112488])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "diabetes_model.fit(X_train,y_train)\n", + "display(diabetes_model.intercept_)\n", + "diabetes_model.coef_" ] }, { @@ -231,11 +368,46 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([139.5483133 , 179.52030578, 134.04133298, 291.41193598,\n", + " 123.78723656, 92.17357677, 258.23409704, 181.33895238,\n", + " 90.22217862, 108.63143298, 94.13938654, 168.43379636,\n", + " 53.50669663, 206.63040068, 100.13238561, 130.66881649,\n", + " 219.53270758, 250.78291772, 196.36682356, 218.57497401,\n", + " 207.35002447, 88.48361667, 70.43428801, 188.95725301,\n", + " 154.88720039, 159.35957695, 188.31587948, 180.38835506,\n", + " 47.98988446, 108.97514644, 174.78080029, 86.36598906,\n", + " 132.95890535, 184.5410226 , 173.83298051, 190.35863287,\n", + " 124.41740796, 119.65426903, 147.95402494, 59.05311211,\n", + " 71.62636914, 107.68722902, 165.45544477, 155.00784964,\n", + " 171.04558668, 61.45763075, 71.66975626, 114.96330486,\n", + " 51.57808027, 167.57781958, 152.52505798, 62.95827693,\n", + " 103.49862017, 109.20495627, 175.63844013, 154.60247734,\n", + " 94.41476124, 210.74244148, 120.25601864, 77.61590087,\n", + " 187.93503183, 206.49543321, 140.63018684, 105.59463059,\n", + " 130.704246 , 202.18650868, 171.1330116 , 164.91246096,\n", + " 124.72637597, 144.81210187, 181.99631481, 199.41234515,\n", + " 234.21402489, 145.96053305, 79.86349114, 157.36828831,\n", + " 192.74737754, 208.8980067 , 158.58505486, 206.0226849 ,\n", + " 107.47978402, 140.93428553, 54.81856678, 55.92807758,\n", + " 115.00974554, 78.95886675, 81.55731377, 54.3774778 ,\n", + " 166.25477778])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "y_pred=diabetes_model.predict(X_test)\n", + "y_pred" ] }, { @@ -247,11 +419,42 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([219., 70., 202., 230., 111., 84., 242., 272., 94., 96., 94.,\n", + " 252., 99., 297., 135., 67., 295., 264., 170., 275., 310., 64.,\n", + " 128., 232., 129., 118., 263., 77., 48., 107., 140., 113., 90.,\n", + " 164., 180., 233., 42., 84., 172., 63., 48., 108., 156., 168.,\n", + " 90., 52., 200., 87., 90., 258., 136., 158., 69., 72., 171.,\n", + " 95., 72., 151., 168., 60., 122., 52., 187., 102., 214., 248.,\n", + " 181., 110., 140., 202., 101., 222., 281., 61., 89., 91., 186.,\n", + " 220., 237., 233., 68., 190., 96., 72., 153., 98., 37., 63.,\n", + " 184.])" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "0.45260660216173787" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "display(y_test)\n", + "from sklearn.metrics import r2_score\n", + "r2_score(y_test,y_pred)" ] }, { @@ -263,11 +466,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "# Your explanation here:\n" + "# Your explanation here:\n", + "# It is not exactly the same, actually they are far apart according to the R^2 test" ] }, { @@ -302,11 +506,147 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
OLS Regression Results
Dep. Variable: y R-squared: 0.528
Model: OLS Adj. R-squared: 0.514
Method: Least Squares F-statistic: 38.25
Date: Mon, 11 Oct 2021 Prob (F-statistic): 5.41e-50
Time: 16:17:23 Log-Likelihood: -1906.1
No. Observations: 353 AIC: 3834.
Df Residuals: 342 BIC: 3877.
Df Model: 10
Covariance Type: nonrobust
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err t P>|t| [0.025 0.975]
const 151.3457 2.902 52.155 0.000 145.638 157.053
x1 37.9003 69.056 0.549 0.583 -97.927 173.728
x2 -241.9662 68.570 -3.529 0.000 -376.837 -107.095
x3 542.4258 76.956 7.049 0.000 391.059 693.792
x4 347.7083 71.357 4.873 0.000 207.355 488.062
x5 -931.4613 451.142 -2.065 0.040 -1818.823 -44.099
x6 518.0441 364.118 1.423 0.156 -198.150 1234.238
x7 163.4035 233.015 0.701 0.484 -294.919 621.726
x8 275.3100 185.399 1.485 0.138 -89.357 639.977
x9 736.1891 192.440 3.826 0.000 357.675 1114.704
x10 48.6711 73.435 0.663 0.508 -95.771 193.113
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Omnibus: 1.457 Durbin-Watson: 1.794
Prob(Omnibus): 0.483 Jarque-Bera (JB): 1.412
Skew: 0.064 Prob(JB): 0.494
Kurtosis: 2.718 Cond. No. 219.


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: y R-squared: 0.528\n", + "Model: OLS Adj. R-squared: 0.514\n", + "Method: Least Squares F-statistic: 38.25\n", + "Date: Mon, 11 Oct 2021 Prob (F-statistic): 5.41e-50\n", + "Time: 16:17:23 Log-Likelihood: -1906.1\n", + "No. Observations: 353 AIC: 3834.\n", + "Df Residuals: 342 BIC: 3877.\n", + "Df Model: 10 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const 151.3457 2.902 52.155 0.000 145.638 157.053\n", + "x1 37.9003 69.056 0.549 0.583 -97.927 173.728\n", + "x2 -241.9662 68.570 -3.529 0.000 -376.837 -107.095\n", + "x3 542.4258 76.956 7.049 0.000 391.059 693.792\n", + "x4 347.7083 71.357 4.873 0.000 207.355 488.062\n", + "x5 -931.4613 451.142 -2.065 0.040 -1818.823 -44.099\n", + "x6 518.0441 364.118 1.423 0.156 -198.150 1234.238\n", + "x7 163.4035 233.015 0.701 0.484 -294.919 621.726\n", + "x8 275.3100 185.399 1.485 0.138 -89.357 639.977\n", + "x9 736.1891 192.440 3.826 0.000 357.675 1114.704\n", + "x10 48.6711 73.435 0.663 0.508 -95.771 193.113\n", + "==============================================================================\n", + "Omnibus: 1.457 Durbin-Watson: 1.794\n", + "Prob(Omnibus): 0.483 Jarque-Bera (JB): 1.412\n", + "Skew: 0.064 Prob(JB): 0.494\n", + "Kurtosis: 2.718 Cond. No. 219.\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", + "\"\"\"" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "import statsmodels.api as sm\n", + "data_train=sm.add_constant(X_train)\n", + "model = sm.OLS(y_train,data_train)\n", + "res = model.fit()\n", + "res.summary()" ] }, { @@ -326,11 +666,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ - "# Your answers here:" + "# Your answers here:\n", + "# The F score was 5.41e-50, therefore the null hypothesis was rejected\n", + "# Yes, x2,x3,x4 and x9, they have no significance in the model, probably not independent\n", + "# I would remove some of those variables maybe even all of them" ] }, { @@ -351,11 +694,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "auto=pd.read_csv('../auto-mpg.csv')" ] }, { @@ -367,11 +711,124 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
mpgcylindersdisplacementhorse_powerweightaccelerationmodel_yearcar_name
018.08307.0130.0350412.070\\t\"chevrolet chevelle malibu\"
115.08350.0165.0369311.570\\t\"buick skylark 320\"
218.08318.0150.0343611.070\\t\"plymouth satellite\"
316.08304.0150.0343312.070\\t\"amc rebel sst\"
417.08302.0140.0344910.570\\t\"ford torino\"
\n", + "
" + ], + "text/plain": [ + " mpg cylinders displacement horse_power weight acceleration \\\n", + "0 18.0 8 307.0 130.0 3504 12.0 \n", + "1 15.0 8 350.0 165.0 3693 11.5 \n", + "2 18.0 8 318.0 150.0 3436 11.0 \n", + "3 16.0 8 304.0 150.0 3433 12.0 \n", + "4 17.0 8 302.0 140.0 3449 10.5 \n", + "\n", + " model_year car_name \n", + "0 70 \\t\"chevrolet chevelle malibu\" \n", + "1 70 \\t\"buick skylark 320\" \n", + "2 70 \\t\"plymouth satellite\" \n", + "3 70 \\t\"amc rebel sst\" \n", + "4 70 \\t\"ford torino\" " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "auto.head(5)" ] }, { @@ -383,11 +840,34 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 398 entries, 0 to 397\n", + "Data columns (total 8 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 mpg 398 non-null float64\n", + " 1 cylinders 398 non-null int64 \n", + " 2 displacement 398 non-null float64\n", + " 3 horse_power 392 non-null float64\n", + " 4 weight 398 non-null int64 \n", + " 5 acceleration 398 non-null float64\n", + " 6 model_year 398 non-null int64 \n", + " 7 car_name 398 non-null object \n", + "dtypes: float64(4), int64(3), object(1)\n", + "memory usage: 25.0+ KB\n" + ] + } + ], + "source": [ + "# Your code here:\n", + "auto.info()" ] }, { @@ -399,11 +879,33 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "82" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "70" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "display(auto.model_year.max())\n", + "auto.model_year.min()" ] }, { @@ -415,11 +917,31 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "mpg 0\n", + "cylinders 0\n", + "displacement 0\n", + "horse_power 6\n", + "weight 0\n", + "acceleration 0\n", + "model_year 0\n", + "car_name 0\n", + "dtype: int64" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Your code here:\n", + "display(auto.isna().sum())\n", + "auto=auto.dropna()" ] }, { @@ -431,11 +953,28 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Your code here:\n" + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4 199\n", + "8 103\n", + "6 83\n", + "3 4\n", + "5 3\n", + "Name: cylinders, dtype: int64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Your code here:\n", + "auto.cylinders.value_counts()" ] }, { @@ -451,11 +990,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "X_train,X_test,y_train,y_test=train_test_split(auto.drop(['car_name','mpg'],axis=1),\n", + " auto['mpg'],test_size=0.2,random_state=42)" ] }, { @@ -469,11 +1010,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "auto_model=linear_model.LinearRegression()\n", + "auto_model.fit(X_train,y_train)" ] }, { @@ -502,11 +1056,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.8107227953093896" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "y_pred=auto_model.predict(X_train)\n", + "r2_score(y_train,y_pred)" ] }, { @@ -522,11 +1089,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.7942349075428593" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "y_test_pred=auto_model.predict(X_test)\n", + "r2_score(y_test,y_test_pred)" ] }, { @@ -551,11 +1131,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "X_train09,X_test09,y_train09,y_test09=train_test_split(auto.drop(['car_name','mpg'],axis=1),\n", + " auto['mpg'],test_size=0.1,random_state=42)" ] }, { @@ -567,11 +1149,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "auto_model09=linear_model.LinearRegression()\n", + "auto_model09.fit(X_train09,y_train09)" ] }, { @@ -583,11 +1178,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.8468911998183242" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "y_pred09=auto_model09.predict(X_test09)\n", + "r2_score(y_test09,y_pred09)" ] }, { @@ -599,11 +1207,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.8047940166959004" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "y_test_pred09=auto_model09.predict(X_train09)\n", + "r2_score(y_train09,y_test_pred09)" ] }, { @@ -619,7 +1240,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -635,11 +1256,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "selector=RFE(auto_model,n_features_to_select=3)" ] }, { @@ -651,11 +1273,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 4, 3, 2, 1, 1])" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "selector.fit(X_train,y_train)\n", + "selector.ranking_" ] }, { @@ -669,11 +1304,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "X_train_reduced,X_test_reduced,y_train_reduced,y_test_reduced=train_test_split(auto[['cylinders',\n", + " 'acceleration',\n", + " 'model_year']],\n", + " auto['mpg'],test_size=0.2,random_state=42)" ] }, { @@ -685,11 +1324,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.7144839092209849" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here: \n" + "# Your code here: \n", + "auto_model_reduced=linear_model.LinearRegression()\n", + "auto_model_reduced.fit(X_train_reduced,y_train_reduced)\n", + "y_pred_reduced=auto_model_reduced.predict(X_train_reduced)\n", + "r2_score(y_train_reduced,y_pred_reduced)" ] }, { @@ -726,7 +1380,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.8.8" } }, "nbformat": 4,