diff --git a/2019BTECS00061-asign2.pdf b/2019BTECS00061-asign2.pdf
new file mode 100644
index 0000000..893c691
Binary files /dev/null and b/2019BTECS00061-asign2.pdf differ
diff --git a/SET2_Linear_Regression.ipynb b/SET2_Linear_Regression.ipynb
new file mode 100644
index 0000000..1f8ec32
--- /dev/null
+++ b/SET2_Linear_Regression.ipynb
@@ -0,0 +1,724 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "SET2 - Linear Regression.ipynb",
+ "provenance": [],
+ "collapsed_sections": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We shall implement Linear Regression using the Scikit for the dataset of Air Quality. Further we shall test our model for the predictions."
+ ],
+ "metadata": {
+ "id": "Jh--3_JsS4XO"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Let's import all required libraries\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import seaborn as sns\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.linear_model import LinearRegression\n",
+ "from sklearn.metrics import accuracy_score\n",
+ "from sklearn.metrics import mean_absolute_percentage_error\n",
+ "from sklearn.metrics import precision_score\n",
+ "import matplotlib.pyplot as plt"
+ ],
+ "metadata": {
+ "id": "aa2VtFQ_TUlA"
+ },
+ "execution_count": 58,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Import dataset\n",
+ "\n",
+ "air_data = pd.read_csv('https://github.com/rising-entropy/datasets/raw/main/AirQualityUCI.csv')\n",
+ "air_data.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "bqzocoBuUDAw",
+ "outputId": "816399f7-3acc-487a-a8af-5d15e5d9beaa"
+ },
+ "execution_count": 49,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Date | \n",
+ " Time | \n",
+ " CO(GT) | \n",
+ " PT08.S1(CO) | \n",
+ " NMHC(GT) | \n",
+ " C6H6(GT) | \n",
+ " PT08.S2(NMHC) | \n",
+ " NOx(GT) | \n",
+ " PT08.S3(NOx) | \n",
+ " NO2(GT) | \n",
+ " PT08.S4(NO2) | \n",
+ " PT08.S5(O3) | \n",
+ " T | \n",
+ " RH | \n",
+ " AH | \n",
+ " Unnamed: 15 | \n",
+ " Unnamed: 16 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 10-03-2004 | \n",
+ " 18:00:00 | \n",
+ " 2.6 | \n",
+ " 1360.0 | \n",
+ " 150.0 | \n",
+ " 11.9 | \n",
+ " 1046.0 | \n",
+ " 166.0 | \n",
+ " 1056.0 | \n",
+ " 113.0 | \n",
+ " 1692.0 | \n",
+ " 1268.0 | \n",
+ " 13.6 | \n",
+ " 48.9 | \n",
+ " 0.7578 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 10-03-2004 | \n",
+ " 19:00:00 | \n",
+ " 2.0 | \n",
+ " 1292.0 | \n",
+ " 112.0 | \n",
+ " 9.4 | \n",
+ " 955.0 | \n",
+ " 103.0 | \n",
+ " 1174.0 | \n",
+ " 92.0 | \n",
+ " 1559.0 | \n",
+ " 972.0 | \n",
+ " 13.3 | \n",
+ " 47.7 | \n",
+ " 0.7255 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 10-03-2004 | \n",
+ " 20:00:00 | \n",
+ " 2.2 | \n",
+ " 1402.0 | \n",
+ " 88.0 | \n",
+ " 9.0 | \n",
+ " 939.0 | \n",
+ " 131.0 | \n",
+ " 1140.0 | \n",
+ " 114.0 | \n",
+ " 1555.0 | \n",
+ " 1074.0 | \n",
+ " 11.9 | \n",
+ " 54.0 | \n",
+ " 0.7502 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 10-03-2004 | \n",
+ " 21:00:00 | \n",
+ " 2.2 | \n",
+ " 1376.0 | \n",
+ " 80.0 | \n",
+ " 9.2 | \n",
+ " 948.0 | \n",
+ " 172.0 | \n",
+ " 1092.0 | \n",
+ " 122.0 | \n",
+ " 1584.0 | \n",
+ " 1203.0 | \n",
+ " 11.0 | \n",
+ " 60.0 | \n",
+ " 0.7867 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 10-03-2004 | \n",
+ " 22:00:00 | \n",
+ " 1.6 | \n",
+ " 1272.0 | \n",
+ " 51.0 | \n",
+ " 6.5 | \n",
+ " 836.0 | \n",
+ " 131.0 | \n",
+ " 1205.0 | \n",
+ " 116.0 | \n",
+ " 1490.0 | \n",
+ " 1110.0 | \n",
+ " 11.2 | \n",
+ " 59.6 | \n",
+ " 0.7888 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " Date Time CO(GT) ... AH Unnamed: 15 Unnamed: 16\n",
+ "0 10-03-2004 18:00:00 2.6 ... 0.7578 NaN NaN\n",
+ "1 10-03-2004 19:00:00 2.0 ... 0.7255 NaN NaN\n",
+ "2 10-03-2004 20:00:00 2.2 ... 0.7502 NaN NaN\n",
+ "3 10-03-2004 21:00:00 2.2 ... 0.7867 NaN NaN\n",
+ "4 10-03-2004 22:00:00 1.6 ... 0.7888 NaN NaN\n",
+ "\n",
+ "[5 rows x 17 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 49
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Looking for missing values in the table.\n",
+ "air_data.isnull().sum()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "MfZaBLQTa0ar",
+ "outputId": "28b64c3d-ee39-4c1c-880d-8dabafcc00b5"
+ },
+ "execution_count": 50,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Date 114\n",
+ "Time 114\n",
+ "CO(GT) 114\n",
+ "PT08.S1(CO) 114\n",
+ "NMHC(GT) 114\n",
+ "C6H6(GT) 114\n",
+ "PT08.S2(NMHC) 114\n",
+ "NOx(GT) 114\n",
+ "PT08.S3(NOx) 114\n",
+ "NO2(GT) 114\n",
+ "PT08.S4(NO2) 114\n",
+ "PT08.S5(O3) 114\n",
+ "T 114\n",
+ "RH 114\n",
+ "AH 114\n",
+ "Unnamed: 15 9471\n",
+ "Unnamed: 16 9471\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 50
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n"
+ ],
+ "metadata": {
+ "id": "1npwHEKLSyQo"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Handling the missing values\n",
+ "\n",
+ "# Drop Date and Time\n",
+ "air_data = air_data.drop(columns='Date', axis=1)\n",
+ "air_data = air_data.drop(columns='Time', axis=1)\n",
+ "air_data = air_data.drop(columns='Unnamed: 15', axis=1)\n",
+ "air_data = air_data.drop(columns='Unnamed: 16', axis=1)\n",
+ "air_data = air_data.drop(columns='RH', axis=1)\n",
+ "air_data = air_data.drop(columns='AH', axis=1)\n",
+ "air_data = air_data.drop(columns='T', axis=1)\n",
+ "\n",
+ "# Replacing missing values with mean\n",
+ "air_data['CO(GT)'].fillna(air_data['CO(GT)'].mean(), inplace=True)\n",
+ "air_data['PT08.S1(CO)'].fillna(air_data['PT08.S1(CO)'].mean(), inplace=True)\n",
+ "air_data['NMHC(GT)'].fillna(air_data['NMHC(GT)'].mean(), inplace=True)\n",
+ "air_data['C6H6(GT)'].fillna(air_data['C6H6(GT)'].mean(), inplace=True)\n",
+ "air_data['PT08.S2(NMHC)'].fillna(air_data['PT08.S2(NMHC)'].mean(), inplace=True)\n",
+ "air_data['NOx(GT)'].fillna(air_data['NOx(GT)'].mean(), inplace=True)\n",
+ "air_data['PT08.S3(NOx)'].fillna(air_data['PT08.S3(NOx)'].mean(), inplace=True)\n",
+ "air_data['NO2(GT)'].fillna(air_data['NO2(GT)'].mean(), inplace=True)\n",
+ "air_data['PT08.S4(NO2)'].fillna(air_data['PT08.S4(NO2)'].mean(), inplace=True)\n",
+ "air_data['PT08.S5(O3)'].fillna(air_data['PT08.S5(O3)'].mean(), inplace=True)\n",
+ "\n",
+ "air_data.isnull().sum()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "hifjH48hazHK",
+ "outputId": "de27aded-3fe7-491e-c8ef-5dc67c3762dd"
+ },
+ "execution_count": 51,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "CO(GT) 0\n",
+ "PT08.S1(CO) 0\n",
+ "NMHC(GT) 0\n",
+ "C6H6(GT) 0\n",
+ "PT08.S2(NMHC) 0\n",
+ "NOx(GT) 0\n",
+ "PT08.S3(NOx) 0\n",
+ "NO2(GT) 0\n",
+ "PT08.S4(NO2) 0\n",
+ "PT08.S5(O3) 0\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 51
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Get dataset head\n",
+ "\n",
+ "air_data.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "Z59uiNOg44w-",
+ "outputId": "080cad8e-a316-4d75-a1df-36f2d146ad08"
+ },
+ "execution_count": 52,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CO(GT) | \n",
+ " PT08.S1(CO) | \n",
+ " NMHC(GT) | \n",
+ " C6H6(GT) | \n",
+ " PT08.S2(NMHC) | \n",
+ " NOx(GT) | \n",
+ " PT08.S3(NOx) | \n",
+ " NO2(GT) | \n",
+ " PT08.S4(NO2) | \n",
+ " PT08.S5(O3) | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2.6 | \n",
+ " 1360.0 | \n",
+ " 150.0 | \n",
+ " 11.9 | \n",
+ " 1046.0 | \n",
+ " 166.0 | \n",
+ " 1056.0 | \n",
+ " 113.0 | \n",
+ " 1692.0 | \n",
+ " 1268.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2.0 | \n",
+ " 1292.0 | \n",
+ " 112.0 | \n",
+ " 9.4 | \n",
+ " 955.0 | \n",
+ " 103.0 | \n",
+ " 1174.0 | \n",
+ " 92.0 | \n",
+ " 1559.0 | \n",
+ " 972.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2.2 | \n",
+ " 1402.0 | \n",
+ " 88.0 | \n",
+ " 9.0 | \n",
+ " 939.0 | \n",
+ " 131.0 | \n",
+ " 1140.0 | \n",
+ " 114.0 | \n",
+ " 1555.0 | \n",
+ " 1074.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2.2 | \n",
+ " 1376.0 | \n",
+ " 80.0 | \n",
+ " 9.2 | \n",
+ " 948.0 | \n",
+ " 172.0 | \n",
+ " 1092.0 | \n",
+ " 122.0 | \n",
+ " 1584.0 | \n",
+ " 1203.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1.6 | \n",
+ " 1272.0 | \n",
+ " 51.0 | \n",
+ " 6.5 | \n",
+ " 836.0 | \n",
+ " 131.0 | \n",
+ " 1205.0 | \n",
+ " 116.0 | \n",
+ " 1490.0 | \n",
+ " 1110.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " CO(GT) PT08.S1(CO) NMHC(GT) ... NO2(GT) PT08.S4(NO2) PT08.S5(O3)\n",
+ "0 2.6 1360.0 150.0 ... 113.0 1692.0 1268.0\n",
+ "1 2.0 1292.0 112.0 ... 92.0 1559.0 972.0\n",
+ "2 2.2 1402.0 88.0 ... 114.0 1555.0 1074.0\n",
+ "3 2.2 1376.0 80.0 ... 122.0 1584.0 1203.0\n",
+ "4 1.6 1272.0 51.0 ... 116.0 1490.0 1110.0\n",
+ "\n",
+ "[5 rows x 10 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 52
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Splitting the dataset\n",
+ "X = air_data.iloc[:, :-1].values\n",
+ "Y = air_data.iloc[:, -1:].values\n",
+ "\n",
+ "# We leave 0.2 for testing\n",
+ "X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.2, random_state=2)"
+ ],
+ "metadata": {
+ "id": "wwfjU7eM8BZr"
+ },
+ "execution_count": 53,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Perform Linear Regression training data model\n",
+ "model = LinearRegression()\n",
+ "model.fit(X_train, Y_train)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "25daJepp8SIN",
+ "outputId": "a3364493-2828-4408-894f-1ed11875b58d"
+ },
+ "execution_count": 54,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "LinearRegression()"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 54
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# We now test the model\n",
+ "# We shall take the averages\n",
+ "\n",
+ "X_test_prediction = model.predict(X_test)\n",
+ "theCombinedFractionDiff = 0\n",
+ "for i in range(len(Y_test)):\n",
+ " theDiff = abs(Y_test[i][0] - X_test_prediction[i][0])\n",
+ " theFractionDiff = theDiff/Y_test[i][0]\n",
+ " theCombinedFractionDiff += theFractionDiff\n",
+ "theValue = theCombinedFractionDiff/len(Y_test)\n",
+ "\n",
+ "print(\"The Error is\", theValue, \"fractions average.\")\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "MA9tylL0-CTf",
+ "outputId": "f336f822-a195-4e0d-fef4-59790eea488b"
+ },
+ "execution_count": 62,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "The Error is 0.10333011859351106 fractions average.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Thus, we performed Linear Regression Pre-processing, Training and Testing over Air Quality dataset."
+ ],
+ "metadata": {
+ "id": "2EtlPWp2Sku8"
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/Student assignment updates.txt b/Student assignment updates.txt
index 9979d7c..1dd96aa 100644
--- a/Student assignment updates.txt
+++ b/Student assignment updates.txt
@@ -1,2 +1,2 @@
-Write your name and PRN no
-Hello Updated
+ROHIT BIRADAR
+2019BTECS00061