diff --git a/Assignment_2_Set.ipynb b/Assignment_2_Set.ipynb
new file mode 100644
index 0000000..c2a113c
--- /dev/null
+++ b/Assignment_2_Set.ipynb
@@ -0,0 +1,1530 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "Assignment_2_Set.ipynb",
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Software Engineering Tools Lab \n",
+ "Assignement 2 \n",
+ "PRN-2019BTECS00094 \n",
+ "\n"
+ ],
+ "metadata": {
+ "id": "jamncb6P7jRm"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**Dataset Information**\n",
+ "\n",
+ "The dataset contains 9358 instances of hourly averaged responses from an array of 5 metal oxide chemical sensors embedded in an Air Quality Chemical Multisensor Device. The device was located on the field in a significantly polluted area, at road level,within an Italian city. Data were recorded from March 2004 to February 2005 (one year)representing the longest freely available recordings of on field deployed air quality chemical sensor devices responses. Ground Truth hourly averaged concentrations for CO, Non Metanic Hydrocarbons, Benzene, Total Nitrogen Oxides (NOx) and Nitrogen Dioxide (NO2) and were provided by a co-located reference certified analyzer. Evidences of cross-sensitivities as well as both concept and sensor drifts are present as described in De Vito et al., Sens. And Act. B, Vol. 129,2,2008 (citation required) eventually affecting sensors concentration estimation capabilities. **Missing values are tagged with -200 value.**\n",
+ "This dataset can be used exclusively for research purposes. Commercial purposes are fully excluded. \n",
+ "\n",
+ "\n",
+ "**Attribute Information:**\n",
+ "\n",
+ "0 Date (DD/MM/YYYY)\n",
+ "\n",
+ "1 Time (HH.MM.SS)\n",
+ "\n",
+ "2 True hourly averaged concentration CO in mg/m^3 (reference analyzer)\n",
+ "\n",
+ "3 PT08.S1 (tin oxide) hourly averaged sensor response (nominally CO targeted)\n",
+ "\n",
+ "4 True hourly averaged overall Non Metanic HydroCarbons concentration in microg/m^3 (reference analyzer)\n",
+ "\n",
+ "5 True hourly averaged Benzene concentration in microg/m^3 (reference analyzer)\n",
+ "\n",
+ "6 PT08.S2 (titania) hourly averaged sensor response (nominally NMHC targeted)\n",
+ "\n",
+ "7 True hourly averaged NOx concentration in ppb (reference analyzer)\n",
+ "\n",
+ "8 PT08.S3 (tungsten oxide) hourly averaged sensor response (nominally NOx targeted)\n",
+ "\n",
+ "9 True hourly averaged NO2 concentration in microg/m^3 (reference analyzer)\n",
+ "\n",
+ "10 PT08.S4 (tungsten oxide) hourly averaged sensor response (nominally NO2 targeted)\n",
+ "\n",
+ "11 PT08.S5 (indium oxide) hourly averaged sensor response (nominally O3 targeted)\n",
+ "\n",
+ "12 Temperature in °C\n",
+ "\n",
+ "13 Relative Humidity (%)\n",
+ "\n",
+ "14 AH Absolute Humidity"
+ ],
+ "metadata": {
+ "id": "LkHgD56FGmIG"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# importing libraries\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt"
+ ],
+ "metadata": {
+ "id": "lEXvZ6Xx73hc"
+ },
+ "execution_count": 19,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# dataset\n",
+ "# link https://archive.ics.uci.edu/ml/machine-learning-databases/00360/\n",
+ "Data = pd.read_excel('AirQualityUCI.xlsx')\n",
+ "Data\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 661
+ },
+ "id": "zFsVZvJ1BmGR",
+ "outputId": "24ba4c5e-7b93-4ccf-cb3a-6da0b67047fa"
+ },
+ "execution_count": 20,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Date | \n",
+ " Time | \n",
+ " CO(GT) | \n",
+ " PT08.S1(CO) | \n",
+ " NMHC(GT) | \n",
+ " C6H6(GT) | \n",
+ " PT08.S2(NMHC) | \n",
+ " NOx(GT) | \n",
+ " PT08.S3(NOx) | \n",
+ " NO2(GT) | \n",
+ " PT08.S4(NO2) | \n",
+ " PT08.S5(O3) | \n",
+ " T | \n",
+ " RH | \n",
+ " AH | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2004-03-10 | \n",
+ " 18:00:00 | \n",
+ " 2.6 | \n",
+ " 1360.00 | \n",
+ " 150 | \n",
+ " 11.881723 | \n",
+ " 1045.50 | \n",
+ " 166.0 | \n",
+ " 1056.25 | \n",
+ " 113.0 | \n",
+ " 1692.00 | \n",
+ " 1267.50 | \n",
+ " 13.600 | \n",
+ " 48.875001 | \n",
+ " 0.757754 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2004-03-10 | \n",
+ " 19:00:00 | \n",
+ " 2.0 | \n",
+ " 1292.25 | \n",
+ " 112 | \n",
+ " 9.397165 | \n",
+ " 954.75 | \n",
+ " 103.0 | \n",
+ " 1173.75 | \n",
+ " 92.0 | \n",
+ " 1558.75 | \n",
+ " 972.25 | \n",
+ " 13.300 | \n",
+ " 47.700000 | \n",
+ " 0.725487 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2004-03-10 | \n",
+ " 20:00:00 | \n",
+ " 2.2 | \n",
+ " 1402.00 | \n",
+ " 88 | \n",
+ " 8.997817 | \n",
+ " 939.25 | \n",
+ " 131.0 | \n",
+ " 1140.00 | \n",
+ " 114.0 | \n",
+ " 1554.50 | \n",
+ " 1074.00 | \n",
+ " 11.900 | \n",
+ " 53.975000 | \n",
+ " 0.750239 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2004-03-10 | \n",
+ " 21:00:00 | \n",
+ " 2.2 | \n",
+ " 1375.50 | \n",
+ " 80 | \n",
+ " 9.228796 | \n",
+ " 948.25 | \n",
+ " 172.0 | \n",
+ " 1092.00 | \n",
+ " 122.0 | \n",
+ " 1583.75 | \n",
+ " 1203.25 | \n",
+ " 11.000 | \n",
+ " 60.000000 | \n",
+ " 0.786713 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 2004-03-10 | \n",
+ " 22:00:00 | \n",
+ " 1.6 | \n",
+ " 1272.25 | \n",
+ " 51 | \n",
+ " 6.518224 | \n",
+ " 835.50 | \n",
+ " 131.0 | \n",
+ " 1205.00 | \n",
+ " 116.0 | \n",
+ " 1490.00 | \n",
+ " 1110.00 | \n",
+ " 11.150 | \n",
+ " 59.575001 | \n",
+ " 0.788794 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 9352 | \n",
+ " 2005-04-04 | \n",
+ " 10:00:00 | \n",
+ " 3.1 | \n",
+ " 1314.25 | \n",
+ " -200 | \n",
+ " 13.529605 | \n",
+ " 1101.25 | \n",
+ " 471.7 | \n",
+ " 538.50 | \n",
+ " 189.8 | \n",
+ " 1374.25 | \n",
+ " 1728.50 | \n",
+ " 21.850 | \n",
+ " 29.250000 | \n",
+ " 0.756824 | \n",
+ "
\n",
+ " \n",
+ " | 9353 | \n",
+ " 2005-04-04 | \n",
+ " 11:00:00 | \n",
+ " 2.4 | \n",
+ " 1162.50 | \n",
+ " -200 | \n",
+ " 11.355157 | \n",
+ " 1027.00 | \n",
+ " 353.3 | \n",
+ " 603.75 | \n",
+ " 179.2 | \n",
+ " 1263.50 | \n",
+ " 1269.00 | \n",
+ " 24.325 | \n",
+ " 23.725000 | \n",
+ " 0.711864 | \n",
+ "
\n",
+ " \n",
+ " | 9354 | \n",
+ " 2005-04-04 | \n",
+ " 12:00:00 | \n",
+ " 2.4 | \n",
+ " 1142.00 | \n",
+ " -200 | \n",
+ " 12.374538 | \n",
+ " 1062.50 | \n",
+ " 293.0 | \n",
+ " 603.25 | \n",
+ " 174.7 | \n",
+ " 1240.75 | \n",
+ " 1092.00 | \n",
+ " 26.900 | \n",
+ " 18.350000 | \n",
+ " 0.640649 | \n",
+ "
\n",
+ " \n",
+ " | 9355 | \n",
+ " 2005-04-04 | \n",
+ " 13:00:00 | \n",
+ " 2.1 | \n",
+ " 1002.50 | \n",
+ " -200 | \n",
+ " 9.547187 | \n",
+ " 960.50 | \n",
+ " 234.5 | \n",
+ " 701.50 | \n",
+ " 155.7 | \n",
+ " 1041.00 | \n",
+ " 769.75 | \n",
+ " 28.325 | \n",
+ " 13.550000 | \n",
+ " 0.513866 | \n",
+ "
\n",
+ " \n",
+ " | 9356 | \n",
+ " 2005-04-04 | \n",
+ " 14:00:00 | \n",
+ " 2.2 | \n",
+ " 1070.75 | \n",
+ " -200 | \n",
+ " 11.932060 | \n",
+ " 1047.25 | \n",
+ " 265.2 | \n",
+ " 654.00 | \n",
+ " 167.7 | \n",
+ " 1128.50 | \n",
+ " 816.00 | \n",
+ " 28.500 | \n",
+ " 13.125000 | \n",
+ " 0.502804 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
9357 rows × 15 columns
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " Date Time CO(GT) ... T RH AH\n",
+ "0 2004-03-10 18:00:00 2.6 ... 13.600 48.875001 0.757754\n",
+ "1 2004-03-10 19:00:00 2.0 ... 13.300 47.700000 0.725487\n",
+ "2 2004-03-10 20:00:00 2.2 ... 11.900 53.975000 0.750239\n",
+ "3 2004-03-10 21:00:00 2.2 ... 11.000 60.000000 0.786713\n",
+ "4 2004-03-10 22:00:00 1.6 ... 11.150 59.575001 0.788794\n",
+ "... ... ... ... ... ... ... ...\n",
+ "9352 2005-04-04 10:00:00 3.1 ... 21.850 29.250000 0.756824\n",
+ "9353 2005-04-04 11:00:00 2.4 ... 24.325 23.725000 0.711864\n",
+ "9354 2005-04-04 12:00:00 2.4 ... 26.900 18.350000 0.640649\n",
+ "9355 2005-04-04 13:00:00 2.1 ... 28.325 13.550000 0.513866\n",
+ "9356 2005-04-04 14:00:00 2.2 ... 28.500 13.125000 0.502804\n",
+ "\n",
+ "[9357 rows x 15 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 20
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "Data.shape\n",
+ "#shows number of rows and columns in the dataset"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "vV9NRO47CoC6",
+ "outputId": "7ab09972-1c75-4e48-b4ef-eb0d7a832110"
+ },
+ "execution_count": 21,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(9357, 15)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 21
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Data Pre-processing\n",
+ "\n",
+ "In dataset information it is given that missing value are tagged with -200 ,so we will replace it with 0."
+ ],
+ "metadata": {
+ "id": "5ALrcMzEI4ne"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "Data['CO(GT)'].value_counts()\n",
+ "# i.e all -200 are replaced with 0"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ROSeBaKuIwxH",
+ "outputId": "a21634d7-2d2e-404c-b13e-6affeae8e0b6"
+ },
+ "execution_count": 22,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "-200.0 1683\n",
+ " 1.0 305\n",
+ " 1.4 279\n",
+ " 1.6 275\n",
+ " 1.5 273\n",
+ " ... \n",
+ " 9.9 1\n",
+ " 7.6 1\n",
+ " 9.3 1\n",
+ " 7.0 1\n",
+ " 8.5 1\n",
+ "Name: CO(GT), Length: 97, dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 22
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# FINDING -200 USING SIMPLE FOR LOOPS WHICH CAN ALSO BE DONE WITH VALUE_COUNTS AND THEN REPLACED\n",
+ "l=[]\n",
+ "for i in range(len(Data.columns)):\n",
+ " f=Data.columns[i]\n",
+ " count=0\n",
+ " for j in range(len(Data[f])):\n",
+ " if Data[f][j]==-200:\n",
+ " count+=1\n",
+ " l.append((f,count)) \n",
+ "print(\"Values from each column that needs to be replaced with avg \\n \",l) "
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "oMzr28s0Efck",
+ "outputId": "a9a84fae-2e77-4165-debc-142f60611d69"
+ },
+ "execution_count": 23,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Values from each column that needs to be replaced with avg \n",
+ " [('Date', 0), ('Time', 0), ('CO(GT)', 1683), ('PT08.S1(CO)', 366), ('NMHC(GT)', 8443), ('C6H6(GT)', 366), ('PT08.S2(NMHC)', 366), ('NOx(GT)', 1639), ('PT08.S3(NOx)', 366), ('NO2(GT)', 1642), ('PT08.S4(NO2)', 366), ('PT08.S5(O3)', 366), ('T', 366), ('RH', 366), ('AH', 366)]\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "num=Data._get_numeric_data()\n",
+ "num[num<0]=0\n",
+ "Data"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 661
+ },
+ "id": "AC1dRYWbIiXL",
+ "outputId": "1a7bddd8-7a5d-46fe-f08c-ab4a026a1855"
+ },
+ "execution_count": 24,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Date | \n",
+ " Time | \n",
+ " CO(GT) | \n",
+ " PT08.S1(CO) | \n",
+ " NMHC(GT) | \n",
+ " C6H6(GT) | \n",
+ " PT08.S2(NMHC) | \n",
+ " NOx(GT) | \n",
+ " PT08.S3(NOx) | \n",
+ " NO2(GT) | \n",
+ " PT08.S4(NO2) | \n",
+ " PT08.S5(O3) | \n",
+ " T | \n",
+ " RH | \n",
+ " AH | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2004-03-10 | \n",
+ " 18:00:00 | \n",
+ " 2.6 | \n",
+ " 1360.00 | \n",
+ " 150 | \n",
+ " 11.881723 | \n",
+ " 1045.50 | \n",
+ " 166.0 | \n",
+ " 1056.25 | \n",
+ " 113.0 | \n",
+ " 1692.00 | \n",
+ " 1267.50 | \n",
+ " 13.600 | \n",
+ " 48.875001 | \n",
+ " 0.757754 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2004-03-10 | \n",
+ " 19:00:00 | \n",
+ " 2.0 | \n",
+ " 1292.25 | \n",
+ " 112 | \n",
+ " 9.397165 | \n",
+ " 954.75 | \n",
+ " 103.0 | \n",
+ " 1173.75 | \n",
+ " 92.0 | \n",
+ " 1558.75 | \n",
+ " 972.25 | \n",
+ " 13.300 | \n",
+ " 47.700000 | \n",
+ " 0.725487 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2004-03-10 | \n",
+ " 20:00:00 | \n",
+ " 2.2 | \n",
+ " 1402.00 | \n",
+ " 88 | \n",
+ " 8.997817 | \n",
+ " 939.25 | \n",
+ " 131.0 | \n",
+ " 1140.00 | \n",
+ " 114.0 | \n",
+ " 1554.50 | \n",
+ " 1074.00 | \n",
+ " 11.900 | \n",
+ " 53.975000 | \n",
+ " 0.750239 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2004-03-10 | \n",
+ " 21:00:00 | \n",
+ " 2.2 | \n",
+ " 1375.50 | \n",
+ " 80 | \n",
+ " 9.228796 | \n",
+ " 948.25 | \n",
+ " 172.0 | \n",
+ " 1092.00 | \n",
+ " 122.0 | \n",
+ " 1583.75 | \n",
+ " 1203.25 | \n",
+ " 11.000 | \n",
+ " 60.000000 | \n",
+ " 0.786713 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 2004-03-10 | \n",
+ " 22:00:00 | \n",
+ " 1.6 | \n",
+ " 1272.25 | \n",
+ " 51 | \n",
+ " 6.518224 | \n",
+ " 835.50 | \n",
+ " 131.0 | \n",
+ " 1205.00 | \n",
+ " 116.0 | \n",
+ " 1490.00 | \n",
+ " 1110.00 | \n",
+ " 11.150 | \n",
+ " 59.575001 | \n",
+ " 0.788794 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 9352 | \n",
+ " 2005-04-04 | \n",
+ " 10:00:00 | \n",
+ " 3.1 | \n",
+ " 1314.25 | \n",
+ " 0 | \n",
+ " 13.529605 | \n",
+ " 1101.25 | \n",
+ " 471.7 | \n",
+ " 538.50 | \n",
+ " 189.8 | \n",
+ " 1374.25 | \n",
+ " 1728.50 | \n",
+ " 21.850 | \n",
+ " 29.250000 | \n",
+ " 0.756824 | \n",
+ "
\n",
+ " \n",
+ " | 9353 | \n",
+ " 2005-04-04 | \n",
+ " 11:00:00 | \n",
+ " 2.4 | \n",
+ " 1162.50 | \n",
+ " 0 | \n",
+ " 11.355157 | \n",
+ " 1027.00 | \n",
+ " 353.3 | \n",
+ " 603.75 | \n",
+ " 179.2 | \n",
+ " 1263.50 | \n",
+ " 1269.00 | \n",
+ " 24.325 | \n",
+ " 23.725000 | \n",
+ " 0.711864 | \n",
+ "
\n",
+ " \n",
+ " | 9354 | \n",
+ " 2005-04-04 | \n",
+ " 12:00:00 | \n",
+ " 2.4 | \n",
+ " 1142.00 | \n",
+ " 0 | \n",
+ " 12.374538 | \n",
+ " 1062.50 | \n",
+ " 293.0 | \n",
+ " 603.25 | \n",
+ " 174.7 | \n",
+ " 1240.75 | \n",
+ " 1092.00 | \n",
+ " 26.900 | \n",
+ " 18.350000 | \n",
+ " 0.640649 | \n",
+ "
\n",
+ " \n",
+ " | 9355 | \n",
+ " 2005-04-04 | \n",
+ " 13:00:00 | \n",
+ " 2.1 | \n",
+ " 1002.50 | \n",
+ " 0 | \n",
+ " 9.547187 | \n",
+ " 960.50 | \n",
+ " 234.5 | \n",
+ " 701.50 | \n",
+ " 155.7 | \n",
+ " 1041.00 | \n",
+ " 769.75 | \n",
+ " 28.325 | \n",
+ " 13.550000 | \n",
+ " 0.513866 | \n",
+ "
\n",
+ " \n",
+ " | 9356 | \n",
+ " 2005-04-04 | \n",
+ " 14:00:00 | \n",
+ " 2.2 | \n",
+ " 1070.75 | \n",
+ " 0 | \n",
+ " 11.932060 | \n",
+ " 1047.25 | \n",
+ " 265.2 | \n",
+ " 654.00 | \n",
+ " 167.7 | \n",
+ " 1128.50 | \n",
+ " 816.00 | \n",
+ " 28.500 | \n",
+ " 13.125000 | \n",
+ " 0.502804 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
9357 rows × 15 columns
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " Date Time CO(GT) ... T RH AH\n",
+ "0 2004-03-10 18:00:00 2.6 ... 13.600 48.875001 0.757754\n",
+ "1 2004-03-10 19:00:00 2.0 ... 13.300 47.700000 0.725487\n",
+ "2 2004-03-10 20:00:00 2.2 ... 11.900 53.975000 0.750239\n",
+ "3 2004-03-10 21:00:00 2.2 ... 11.000 60.000000 0.786713\n",
+ "4 2004-03-10 22:00:00 1.6 ... 11.150 59.575001 0.788794\n",
+ "... ... ... ... ... ... ... ...\n",
+ "9352 2005-04-04 10:00:00 3.1 ... 21.850 29.250000 0.756824\n",
+ "9353 2005-04-04 11:00:00 2.4 ... 24.325 23.725000 0.711864\n",
+ "9354 2005-04-04 12:00:00 2.4 ... 26.900 18.350000 0.640649\n",
+ "9355 2005-04-04 13:00:00 2.1 ... 28.325 13.550000 0.513866\n",
+ "9356 2005-04-04 14:00:00 2.2 ... 28.500 13.125000 0.502804\n",
+ "\n",
+ "[9357 rows x 15 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 24
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "Data['CO(GT)'].value_counts()\n",
+ "# i.e all -200 are replaced with 0"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "TFsh3YyuItQn",
+ "outputId": "84e655fc-6321-4351-bf12-4d95a8a7816e"
+ },
+ "execution_count": 25,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0.0 1683\n",
+ "1.0 305\n",
+ "1.4 279\n",
+ "1.6 275\n",
+ "1.5 273\n",
+ " ... \n",
+ "9.9 1\n",
+ "7.6 1\n",
+ "9.3 1\n",
+ "7.0 1\n",
+ "8.5 1\n",
+ "Name: CO(GT), Length: 97, dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 25
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Training and Testing"
+ ],
+ "metadata": {
+ "id": "KpW_9LHwLiDk"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#features\n",
+ "feature=Data\n",
+ "feature=feature.drop('Date',axis=1)\n",
+ "feature=feature.drop('Time',axis=1)\n",
+ "feature=feature.drop('C6H6(GT)',axis=1)"
+ ],
+ "metadata": {
+ "id": "uTooBrR3LfIw"
+ },
+ "execution_count": 26,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "feature.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "CE8rCV12MAul",
+ "outputId": "2531bdeb-596b-483f-e82b-259aeccebb18"
+ },
+ "execution_count": 27,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CO(GT) | \n",
+ " PT08.S1(CO) | \n",
+ " NMHC(GT) | \n",
+ " PT08.S2(NMHC) | \n",
+ " NOx(GT) | \n",
+ " PT08.S3(NOx) | \n",
+ " NO2(GT) | \n",
+ " PT08.S4(NO2) | \n",
+ " PT08.S5(O3) | \n",
+ " T | \n",
+ " RH | \n",
+ " AH | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2.6 | \n",
+ " 1360.00 | \n",
+ " 150 | \n",
+ " 1045.50 | \n",
+ " 166.0 | \n",
+ " 1056.25 | \n",
+ " 113.0 | \n",
+ " 1692.00 | \n",
+ " 1267.50 | \n",
+ " 13.60 | \n",
+ " 48.875001 | \n",
+ " 0.757754 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2.0 | \n",
+ " 1292.25 | \n",
+ " 112 | \n",
+ " 954.75 | \n",
+ " 103.0 | \n",
+ " 1173.75 | \n",
+ " 92.0 | \n",
+ " 1558.75 | \n",
+ " 972.25 | \n",
+ " 13.30 | \n",
+ " 47.700000 | \n",
+ " 0.725487 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2.2 | \n",
+ " 1402.00 | \n",
+ " 88 | \n",
+ " 939.25 | \n",
+ " 131.0 | \n",
+ " 1140.00 | \n",
+ " 114.0 | \n",
+ " 1554.50 | \n",
+ " 1074.00 | \n",
+ " 11.90 | \n",
+ " 53.975000 | \n",
+ " 0.750239 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2.2 | \n",
+ " 1375.50 | \n",
+ " 80 | \n",
+ " 948.25 | \n",
+ " 172.0 | \n",
+ " 1092.00 | \n",
+ " 122.0 | \n",
+ " 1583.75 | \n",
+ " 1203.25 | \n",
+ " 11.00 | \n",
+ " 60.000000 | \n",
+ " 0.786713 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1.6 | \n",
+ " 1272.25 | \n",
+ " 51 | \n",
+ " 835.50 | \n",
+ " 131.0 | \n",
+ " 1205.00 | \n",
+ " 116.0 | \n",
+ " 1490.00 | \n",
+ " 1110.00 | \n",
+ " 11.15 | \n",
+ " 59.575001 | \n",
+ " 0.788794 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " CO(GT) PT08.S1(CO) NMHC(GT) ... T RH AH\n",
+ "0 2.6 1360.00 150 ... 13.60 48.875001 0.757754\n",
+ "1 2.0 1292.25 112 ... 13.30 47.700000 0.725487\n",
+ "2 2.2 1402.00 88 ... 11.90 53.975000 0.750239\n",
+ "3 2.2 1375.50 80 ... 11.00 60.000000 0.786713\n",
+ "4 1.6 1272.25 51 ... 11.15 59.575001 0.788794\n",
+ "\n",
+ "[5 rows x 12 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 27
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#labels\n",
+ "label = Data['C6H6(GT)']"
+ ],
+ "metadata": {
+ "id": "XlG4H5FEMJb_"
+ },
+ "execution_count": 28,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "label.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "69IcYKTSMTVQ",
+ "outputId": "1c0ed30a-91b0-40e8-bfc3-4aeaacc5530d"
+ },
+ "execution_count": 29,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0 11.881723\n",
+ "1 9.397165\n",
+ "2 8.997817\n",
+ "3 9.228796\n",
+ "4 6.518224\n",
+ "Name: C6H6(GT), dtype: float64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 29
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#test and train split\n",
+ "X_train,X_test,y_train,y_test = train_test_split(feature,label,test_size=.3)"
+ ],
+ "metadata": {
+ "id": "Q7DD1ImWMVZ6"
+ },
+ "execution_count": 30,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(X_train.shape,y_train.shape)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "U4Fu1doTMar6",
+ "outputId": "f39a64ce-7db5-4476-a877-ecdd6a3cd3ab"
+ },
+ "execution_count": 31,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(6549, 12) (6549,)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(X_test.shape,y_test.shape)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "191l4vlJMj9u",
+ "outputId": "bc265eef-1b6e-4289-dbc6-eadfe8007570"
+ },
+ "execution_count": 32,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(2808, 12) (2808,)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from sklearn.linear_model import LinearRegression\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import mean_squared_error, r2_score\n",
+ "lr = LinearRegression()\n",
+ "lr.fit(X_train,y_train)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "VT39Vg44MnwP",
+ "outputId": "48a5155b-d112-4b8c-b8a7-af404111d139"
+ },
+ "execution_count": 34,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "LinearRegression()"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 34
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "lr.score(X_test,y_test)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "wpjv1qqgNqOC",
+ "outputId": "e99ac38c-f6e6-4603-c701-aa891494603c"
+ },
+ "execution_count": 35,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0.9593836249769289"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 35
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "y_pred = lr.predict(X_test)\n",
+ "y_pred"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "3oR9qsr0OAe8",
+ "outputId": "86e32613-ff03-46c2-9bca-cfc4e5c5c3d5"
+ },
+ "execution_count": 36,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array([25.22105165, 8.8237993 , 9.16831067, ..., 7.79737906,\n",
+ " 3.70307572, 3.58615659])"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 36
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# The coefficients\n",
+ "print('Coefficients: \\n',lr.coef_)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "8yL9HO1IOH6U",
+ "outputId": "f0d38c41-c528-4665-997d-9c193fdd2940"
+ },
+ "execution_count": 37,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Coefficients: \n",
+ " [ 4.44175279e-01 -4.18014722e-03 5.40808435e-04 2.14754542e-02\n",
+ " 5.48115313e-03 -2.93017951e-03 -2.00242201e-02 4.99707842e-03\n",
+ " -1.55447838e-04 -2.71503044e-01 -1.02636097e-01 1.60707701e+00]\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# The mean squared error\n",
+ "print('Mean squared error: %.2f'\n",
+ " % mean_squared_error(y_test, y_pred))"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "w2xlBE63OQ2-",
+ "outputId": "2bf69bd0-4eed-49a8-ff9c-092df07b930d"
+ },
+ "execution_count": 38,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Mean squared error: 2.30\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# The coefficient of determination: 1 is perfect prediction\n",
+ "print('Coefficient of determination: %.2f'\n",
+ " % r2_score(y_test, y_pred))"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "QTRmo0IGOcWj",
+ "outputId": "acea7556-fc39-43b4-cc69-138da2e0e777"
+ },
+ "execution_count": 40,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Coefficient of determination: 0.96\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# the r squared value\n",
+ "print('R squared value: %.2f'%r2_score(y_test, y_pred))"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "U6LvlxL9OmsO",
+ "outputId": "174208f1-a30f-42b5-a182-6a03aaa688e9"
+ },
+ "execution_count": 41,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "R squared value: 0.96\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "fig, ax = plt.subplots()\n",
+ "ax.scatter(y_pred, y_test, edgecolors=(0, 0, 1))\n",
+ "ax.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=3)\n",
+ "ax.set_xlabel('Predicted')\n",
+ "ax.set_ylabel('Actual')\n",
+ "plt.show()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 279
+ },
+ "id": "ULG4mOgCOtSO",
+ "outputId": "647e802c-b056-470f-f138-f487ab94476b"
+ },
+ "execution_count": 43,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ }
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/Student assignment updates.txt b/Student assignment updates.txt
index 9979d7c..c6477e0 100644
--- a/Student assignment updates.txt
+++ b/Student assignment updates.txt
@@ -1,2 +1,4 @@
Write your name and PRN no
Hello Updated
+Sweety Shrawan Gupta
+2019BTECS00094