pyxeda · ChathukiKet · Oct 20, 2021 · Nov 26, 2021
diff --git a/Chapter10/LogisticRegression/README.md b/Chapter10/LogisticRegression/README.md
@@ -1 +1,22 @@
-This readme should describe 'what is in the python code'.
+What it does :
+
+    1. Reads a csv file from the google drive, then create a logistic regression model on the child vs adult dataset. Calculate the accuracy and confusion matrix.
+
+Dependancies :
+
+    1. sklearn module is needed to be installed in the local machine.
+
+    2. pandas module is needed to be installed in the local machine, to read CSV.
+
+    3. gdown module is needed to be installed in the local machine, to download the CSV file from the google drive. 
+
+
+Things to check before running :
+
+    1. Check whether you have access to the internet. 
+
+    2. Check whether you have given the correct file location of the csv file. 
+
+    3. Check whether you have access to the file. 
+
+    4. Check whether the file format is correct.
diff --git a/Chapter10/LogisticRegression/logistic_regression.ipynb b/Chapter10/LogisticRegression/logistic_regression.ipynb
@@ -0,0 +1,287 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "logistic_regressor.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1xWA9Xxd-BT2"
+      },
+      "source": [
+        "# **Problem: Logistic Regression Model**\n",
+        "\n",
+        "The python program reads a csv file (child vs adult dataset) from the google drive, and creates a logistic regression model on that dataset. Calculate the confusion matrix and accuracy. \n",
+        "\n",
+        "**Examples:**\n",
+        "\n",
+        "Input  -->\n",
+        "\n",
+        "> url = 'https://drive.google.com/file/d/1J5z8OsAtgSp9i1eLxQFoxVexSZJuhI_-/view?usp=sharing'\n",
+        "\n",
+        "Output -->\n",
+        "\n",
+        "> Confucion matrix = [[488  12]\n",
+        " [163 337]]\n",
+        "\n",
+        "> Accuracy = 0.825\n",
+        "\n",
+        "**Notes:**\n",
+        "\n",
+        "Following things are needed to be checked before running the program.\n",
+        "*   Check whether you have given the correct location of your csv file.\n",
+        "*   Check whether you file format is correct.\n",
+        "*   Check whether you have access to the file."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "5zlOyhAE7e5k"
+      },
+      "source": [
+        "#@title MIT License\n",
+        "\n",
+        "# Copyright (c) 2021 AIClub\n",
+        "\n",
+        "# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated \n",
+        "# documentation files (the \"Software\"), to deal in the Software without restriction, including without \n",
+        "# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of \n",
+        "# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following \n",
+        "# conditions:\n",
+        "\n",
+        "# The above copyright notice and this permission notice shall be included in all copies or substantial\n",
+        "# portions of the Software.\n",
+        "\n",
+        "# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT \n",
+        "# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO \n",
+        "# EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN \n",
+        "# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE \n",
+        "# OR OTHER DEALINGS IN THE SOFTWARE."
+      ],
+      "execution_count": 1,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VBqjhcUbc-72"
+      },
+      "source": [
+        "# Import modules"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "9S-OD3v0cTxA"
+      },
+      "source": [
+        "# Import pandas module to read the CSV file and to process the tabular data\n",
+        "import pandas as pd\n",
+        "\n",
+        "# Import train_test_split to split data as test and train\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "\n",
+        "# Import LogisticRegression class\n",
+        "from sklearn.linear_model import LogisticRegression\n",
+        "\n",
+        "# Import accuracy_score and confusion_matrix functions for accuracy calculations\n",
+        "from sklearn.metrics import accuracy_score, confusion_matrix\n",
+        "\n",
+        "# Import gdown module to download files from google drive\n",
+        "import gdown"
+      ],
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NC5yAkbHdCZm"
+      },
+      "source": [
+        "# Get the file location from the google drive"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "hR2Ar8XHdoav"
+      },
+      "source": [
+        "# Please change the url as needed (make sure you have the access to the file)\n",
+        "url = \"https://drive.google.com/file/d/1J5z8OsAtgSp9i1eLxQFoxVexSZJuhI_-/view?usp=sharing\" \n",
+        "\n",
+        "# Derive the file id from the url\n",
+        "file_id = url.split('/')[-2]\n",
+        "\n",
+        "# Derive the download url of the file\n",
+        "download_url = 'https://drive.google.com/uc?id=' + file_id\n",
+        "\n",
+        "# Give the file name you want to save it \n",
+        "file_name = \"child_vs_adult.csv\" \n",
+        "\n",
+        "# Derive the file location\n",
+        "file_location = \"/content/\" + file_name"
+      ],
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gd1WH9wQdou7"
+      },
+      "source": [
+        "# Downloading, creating of logistic regression model and calculating of metrics"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uMe4N40Uh5fe"
+      },
+      "source": [
+        "Download and read the CSV file"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ouYfE4areWFk"
+      },
+      "source": [
+        "# Download the file from drive\n",
+        "gdown.download(download_url, file_location, quiet=False)\n",
+        "\n",
+        "# Read the CSV file\n",
+        "data = pd.read_csv(file_location)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GHfLu6-aggWJ"
+      },
+      "source": [
+        "Display a sample from the dataset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ipsfzY3BgqAE"
+      },
+      "source": [
+        "# Print a sample from the csv dataset\n",
+        "print('---------- First 5 rows of the Dataset ----------\\n', data.head())"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "T7pn823DhbAi"
+      },
+      "source": [
+        "Create the logistic regression model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "I-LutOhihk1-"
+      },
+      "source": [
+        "# Please change the target variable according to the dataset\n",
+        "# You can refer to the dataset details printed in the above step \n",
+        "target_column = 'who_am_I'\n",
+        "\n",
+        "# Seperate the training data by removing the target column\n",
+        "X = data.drop(columns=[target_column])\n",
+        "\n",
+        "# Separate the target values\n",
+        "y = data[target_column].values\n",
+        "\n",
+        "# Split the dataset into train and test data\n",
+        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1, stratify=y)\n",
+        "\n",
+        "# Create logistic regressor\n",
+        "logistic_regressor = LogisticRegression()\n",
+        "\n",
+        "# Train the model\n",
+        "logistic_regressor.fit(X_train, y_train)\n",
+        "\n",
+        "# Predict using test values\n",
+        "y_pred = logistic_regressor.predict(X_test)\n",
+        "\n",
+        "# Get actual values and predicted values into a table\n",
+        "predicted_results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})\n",
+        "print('---------- Predicted Results ----------\\n', predicted_results)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RxAogMqQhlcW"
+      },
+      "source": [
+        "Confusion matrix"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qSMrcrFYhuPX"
+      },
+      "source": [
+        "# Calculate the confusion matrix\n",
+        "confusion_mat = confusion_matrix(y_test, y_pred)\n",
+        "print ('---------- Confusion Matrix of Your Model -----------\\n', confusion_mat)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eEpwTG1oAtls"
+      },
+      "source": [
+        "Accuracy"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "EEfpac8-AveP"
+      },
+      "source": [
+        "# Calculate accuracy using 'accuracy_score'\n",
+        "accuracy = accuracy_score(y_test, y_pred)\n",
+        "print('---------- Accuracy of Your Model -----------\\n', accuracy)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}