From e60c7fa5a738a8c1e781bf9f5a37966df7057afa Mon Sep 17 00:00:00 2001 From: avinash Date: Thu, 24 Feb 2022 12:29:40 +0530 Subject: [PATCH] Updated the solution --- Q2.ipynb | 822 +++++++++++++++++++++++++++++++++ Student assignment updates.txt | 3 +- 2 files changed, 824 insertions(+), 1 deletion(-) create mode 100644 Q2.ipynb diff --git a/Q2.ipynb b/Q2.ipynb new file mode 100644 index 0000000..835c992 --- /dev/null +++ b/Q2.ipynb @@ -0,0 +1,822 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "A3F2afIDn2Xy" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.linear_model import LinearRegression\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.model_selection import train_test_split\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uUfOgqI4osh6" + }, + "source": [ + "# Assignment No.2 \n", + "# Linear Regression\n", + "PRN 2019BTECS00077 Avinash Biradar" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 704 + }, + "id": "oa4l2FWRsHVX", + "outputId": "e3b727f9-cb44-466e-d642-cc3fcd000920" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearStartYearEndLocationAbbrLocationDescDatasourceClassTopicQuestionData_Value_UnitData_Value_TypeData_ValueData_Value_AltData_Value_Footnote_SymbolData_Value_FootnoteLow_Confidence_LimitHigh_Confidence_LimitSample_SizeTotalAge(years)EducationGenderIncomeRace/EthnicityGeoLocationClassIDTopicIDQuestionIDDataValueTypeIDLocationIDStratificationCategory1Stratification1StratificationCategoryId1StratificationID1
020112011ALAlabamaBehavioral Risk Factor Surveillance SystemObesity / Weight StatusObesity / Weight StatusPercent of adults aged 18 years and older who ...NaNValue32.032.0NaNNaN30.533.57304.0TotalNaNNaNNaNNaNNaN(32.84057112200048, -86.63186076199969)OWSOWS1Q036VALUE1.0TotalTotalOVROVERALL
120112011ALAlabamaBehavioral Risk Factor Surveillance SystemObesity / Weight StatusObesity / Weight StatusPercent of adults aged 18 years and older who ...NaNValue32.332.3NaNNaN29.934.72581.0NaNNaNNaNMaleNaNNaN(32.84057112200048, -86.63186076199969)OWSOWS1Q036VALUE1.0GenderMaleGENMALE
220112011ALAlabamaBehavioral Risk Factor Surveillance SystemObesity / Weight StatusObesity / Weight StatusPercent of adults aged 18 years and older who ...NaNValue31.831.8NaNNaN30.033.64723.0NaNNaNNaNFemaleNaNNaN(32.84057112200048, -86.63186076199969)OWSOWS1Q036VALUE1.0GenderFemaleGENFEMALE
320112011ALAlabamaBehavioral Risk Factor Surveillance SystemObesity / Weight StatusObesity / Weight StatusPercent of adults aged 18 years and older who ...NaNValue33.633.6NaNNaN29.937.61153.0NaNNaNLess than high schoolNaNNaNNaN(32.84057112200048, -86.63186076199969)OWSOWS1Q036VALUE1.0EducationLess than high schoolEDUEDUHS
420112011ALAlabamaBehavioral Risk Factor Surveillance SystemObesity / Weight StatusObesity / Weight StatusPercent of adults aged 18 years and older who ...NaNValue32.832.8NaNNaN30.235.62402.0NaNNaNHigh school graduateNaNNaNNaN(32.84057112200048, -86.63186076199969)OWSOWS1Q036VALUE1.0EducationHigh school graduateEDUEDUHSGRAD
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " YearStart YearEnd ... StratificationCategoryId1 StratificationID1\n", + "0 2011 2011 ... OVR OVERALL\n", + "1 2011 2011 ... GEN MALE\n", + "2 2011 2011 ... GEN FEMALE\n", + "3 2011 2011 ... EDU EDUHS\n", + "4 2011 2011 ... EDU EDUHSGRAD\n", + "\n", + "[5 rows x 33 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"/content/dataset.csv\")\n", + "df.size\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZZiolEh9sKS5", + "outputId": "7c41dc0c-85c5-41f8-abde-c7004108aaef" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1082 entries, 0 to 1081\n", + "Data columns (total 33 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 YearStart 1082 non-null int64 \n", + " 1 YearEnd 1082 non-null int64 \n", + " 2 LocationAbbr 1082 non-null object \n", + " 3 LocationDesc 1082 non-null object \n", + " 4 Datasource 1082 non-null object \n", + " 5 Class 1082 non-null object \n", + " 6 Topic 1082 non-null object \n", + " 7 Question 1082 non-null object \n", + " 8 Data_Value_Unit 0 non-null float64\n", + " 9 Data_Value_Type 1082 non-null object \n", + " 10 Data_Value 967 non-null float64\n", + " 11 Data_Value_Alt 967 non-null float64\n", + " 12 Data_Value_Footnote_Symbol 114 non-null object \n", + " 13 Data_Value_Footnote 114 non-null object \n", + " 14 Low_Confidence_Limit 967 non-null float64\n", + " 15 High_Confidence_Limit 967 non-null float64\n", + " 16 Sample_Size 967 non-null float64\n", + " 17 Total 39 non-null object \n", + " 18 Age(years) 235 non-null object \n", + " 19 Education 153 non-null object \n", + " 20 Gender 81 non-null object \n", + " 21 Income 266 non-null object \n", + " 22 Race/Ethnicity 307 non-null object \n", + " 23 GeoLocation 1072 non-null object \n", + " 24 ClassID 1081 non-null object \n", + " 25 TopicID 1081 non-null object \n", + " 26 QuestionID 1081 non-null object \n", + " 27 DataValueTypeID 1081 non-null object \n", + " 28 LocationID 1081 non-null float64\n", + " 29 StratificationCategory1 1081 non-null object \n", + " 30 Stratification1 1081 non-null object \n", + " 31 StratificationCategoryId1 1081 non-null object \n", + " 32 StratificationID1 1081 non-null object \n", + "dtypes: float64(7), int64(2), object(24)\n", + "memory usage: 279.1+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 364 + }, + "id": "aDX5APBztnHb", + "outputId": "41b17a4c-fea8-4f11-aca8-0a2c950cef51" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearStartYearEndData_Value_UnitData_ValueData_Value_AltLow_Confidence_LimitHigh_Confidence_LimitSample_SizeLocationID
count1082.0000001082.0000000.0967.000000967.000000967.000000967.000000967.0000001081.000000
mean2011.9953792011.995379NaN31.45232731.45232726.70289636.8706313008.4477771.852914
std1.0505171.050517NaN9.9624259.9624259.77531511.05974321136.2310645.260757
min2011.0000002011.000000NaN2.3000002.3000000.9000005.70000050.0000001.000000
25%2011.0000002011.000000NaN24.50000024.50000019.60000029.500000455.5000001.000000
50%2012.0000002012.000000NaN31.50000031.50000026.20000036.600000971.0000001.000000
75%2013.0000002013.000000NaN37.90000037.90000033.05000043.7000001808.0000002.000000
max2015.0000002015.000000NaN67.20000067.20000063.30000070.900000398316.00000059.000000
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " YearStart YearEnd ... Sample_Size LocationID\n", + "count 1082.000000 1082.000000 ... 967.000000 1081.000000\n", + "mean 2011.995379 2011.995379 ... 3008.447777 1.852914\n", + "std 1.050517 1.050517 ... 21136.231064 5.260757\n", + "min 2011.000000 2011.000000 ... 50.000000 1.000000\n", + "25% 2011.000000 2011.000000 ... 455.500000 1.000000\n", + "50% 2012.000000 2012.000000 ... 971.000000 1.000000\n", + "75% 2013.000000 2013.000000 ... 1808.000000 2.000000\n", + "max 2015.000000 2015.000000 ... 398316.000000 59.000000\n", + "\n", + "[8 rows x 9 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "background_save": true, + "base_uri": "https://localhost:8080/" + }, + "id": "Hm4qgdYUsY0_", + "outputId": "03edf47c-5cf9-46f5-fc8e-535eb809ffcb" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([], shape=(0, 1), dtype=float64)" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wrangled=df[df['StratificationID1'] == 'OVERALL'][['LocationDesc', 'Data_Value', 'Question', \"YearStart\" ]] \n", + "question=wrangled[wrangled['Question'] == 'Percent of adults who engage in no leisure-time physical activity '][['LocationDesc', 'Data_Value', 'Question', \"YearStart\" ]]\n", + "x_all=question[question['YearStart']==2014][['LocationDesc', 'Data_Value' ]] \n", + "x=question[question['YearStart']==2014][['Data_Value' ]].values\n", + "x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OWXpMKeouR3Z", + "outputId": "4a6dec29-8ace-47ef-d2fe-f2a4f4813e84" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([], shape=(0, 1), dtype=float64)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wrangled=df[df['StratificationID1'] == 'OVERALL'][['LocationDesc', 'Data_Value', 'Question', \"YearStart\" ]] \n", + "year=wrangled[wrangled['Question'] == 'Percent of adults aged 18 years and older who have obesity '][['LocationDesc', 'Data_Value', 'Question', \"YearStart\" ]]\n", + "y_all=year[year['YearStart']==2014][['LocationDesc', 'Data_Value' ]] \n", + "y=year[year['YearStart']==2014][['Data_Value' ]].values\n", + "y" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "66GmCtvTwZbb" + }, + "outputs": [], + "source": [ + "x=x.reshape(-1,1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dyOOx_k9wcYF" + }, + "outputs": [], + "source": [ + "x=x.reshape(-1,1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 346 + }, + "id": "wDVswbihweia", + "outputId": "58b7a5ab-1282-4b37-be36-99073dc4d5c1" + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "ignored", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mx_train\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mx_test\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my_test\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtrain_test_split\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtrain_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m.8\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtest_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m.2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mrandom_state\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_split.py\u001b[0m in \u001b[0;36mtrain_test_split\u001b[0;34m(test_size, train_size, random_state, shuffle, stratify, *arrays)\u001b[0m\n\u001b[1;32m 2419\u001b[0m \u001b[0mn_samples\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_num_samples\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marrays\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2420\u001b[0m n_train, n_test = _validate_shuffle_split(\n\u001b[0;32m-> 2421\u001b[0;31m \u001b[0mn_samples\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdefault_test_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.25\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2422\u001b[0m )\n\u001b[1;32m 2423\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_split.py\u001b[0m in \u001b[0;36m_validate_shuffle_split\u001b[0;34m(n_samples, test_size, train_size, default_test_size)\u001b[0m\n\u001b[1;32m 2099\u001b[0m \u001b[0;34m\"With n_samples={}, test_size={} and train_size={}, the \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2100\u001b[0m \u001b[0;34m\"resulting train set will be empty. Adjust any of the \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2101\u001b[0;31m \u001b[0;34m\"aforementioned parameters.\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_samples\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2102\u001b[0m )\n\u001b[1;32m 2103\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: With n_samples=0, test_size=0.2 and train_size=0.8, the resulting train set will be empty. Adjust any of the aforementioned parameters." + ] + } + ], + "source": [ + "x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=.8,test_size=.2,random_state=100)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Q2.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/Student assignment updates.txt b/Student assignment updates.txt index 9979d7c..d06b6ee 100644 --- a/Student assignment updates.txt +++ b/Student assignment updates.txt @@ -1,2 +1,3 @@ -Write your name and PRN no +Name : Avinash Biradar +PRN:2019BTECS00077 Hello Updated