From dd3d04f88a4793b804985f052f24dca0c218f0d2 Mon Sep 17 00:00:00 2001
From: ChathukiKet <chathuki.navanjana@pyxeda.ai>
Date: Wed, 20 Oct 2021 12:28:59 +0530
Subject: [PATCH 1/2] chapter 10 - logistic regression model for child vs adult
 dataset

---
 Chapter10/LogisticRegression/README.md        |  23 +-
 .../logistic_regression.ipynb                 | 287 ++++++++++++++++++
 .../LogisticRegression/logistic_regression.py | 103 ++++++-
 3 files changed, 409 insertions(+), 4 deletions(-)
 create mode 100644 Chapter10/LogisticRegression/logistic_regression.ipynb

diff --git a/Chapter10/LogisticRegression/README.md b/Chapter10/LogisticRegression/README.md
index 5c4a0ff..d24ae89 100644
--- a/Chapter10/LogisticRegression/README.md
+++ b/Chapter10/LogisticRegression/README.md
@@ -1 +1,22 @@
-This readme should describe 'what is in the python code'.
\ No newline at end of file
+What it does :
+
+    1. Reads a csv file from the google drive, then create a logistic regression model on the child vs adult dataset. Calculate the accuracy and confusion matrix.
+
+Dependancies :
+
+    1. sklearn module is needed to be installed in the local machine.
+
+    2. pandas module is needed to be installed in the local machine, to read CSV.
+
+    3. gdown module is needed to be installed in the local machine, to download the CSV file from the google drive. 
+
+
+Things to check before running :
+
+    1. Check whether you have access to the internet. 
+    
+    2. Check whether you have given the correct file location of the csv file. 
+
+    3. Check whether you have access to the file. 
+
+    4. Check whether the file format is correct.
\ No newline at end of file
diff --git a/Chapter10/LogisticRegression/logistic_regression.ipynb b/Chapter10/LogisticRegression/logistic_regression.ipynb
new file mode 100644
index 0000000..2564a8b
--- /dev/null
+++ b/Chapter10/LogisticRegression/logistic_regression.ipynb
@@ -0,0 +1,287 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "logistic_regressor.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1xWA9Xxd-BT2"
+      },
+      "source": [
+        "# **Problem: Logistic Regression Model**\n",
+        "\n",
+        "The python program reads a csv file (child vs adult dataset) from the google drive, and creates a logistic regression model on that dataset. Calculate the confusion matrix and accuracy. \n",
+        "\n",
+        "**Examples:**\n",
+        "\n",
+        "Input  -->\n",
+        "\n",
+        "> url = 'https://drive.google.com/file/d/1J5z8OsAtgSp9i1eLxQFoxVexSZJuhI_-/view?usp=sharing'\n",
+        "\n",
+        "Output -->\n",
+        "\n",
+        "> Confucion matrix = [[488  12]\n",
+        " [163 337]]\n",
+        "\n",
+        "> Accuracy = 0.825\n",
+        "\n",
+        "**Notes:**\n",
+        "\n",
+        "Following things are needed to be checked before running the program.\n",
+        "*   Check whether you have given the correct location of your csv file.\n",
+        "*   Check whether you file format is correct.\n",
+        "*   Check whether you have access to the file."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "5zlOyhAE7e5k"
+      },
+      "source": [
+        "#@title MIT License\n",
+        "\n",
+        "# Copyright (c) 2021 AIClub\n",
+        "\n",
+        "# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated \n",
+        "# documentation files (the \"Software\"), to deal in the Software without restriction, including without \n",
+        "# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of \n",
+        "# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following \n",
+        "# conditions:\n",
+        "\n",
+        "# The above copyright notice and this permission notice shall be included in all copies or substantial\n",
+        "# portions of the Software.\n",
+        "\n",
+        "# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT \n",
+        "# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO \n",
+        "# EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN \n",
+        "# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE \n",
+        "# OR OTHER DEALINGS IN THE SOFTWARE."
+      ],
+      "execution_count": 1,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VBqjhcUbc-72"
+      },
+      "source": [
+        "# Import modules"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "9S-OD3v0cTxA"
+      },
+      "source": [
+        "# Import pandas module to read the CSV file and to process the tabular data\n",
+        "import pandas as pd\n",
+        "\n",
+        "# Import train_test_split to split data as test and train\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "\n",
+        "# Import LogisticRegression class\n",
+        "from sklearn.linear_model import LogisticRegression\n",
+        "\n",
+        "# Import accuracy_score and confusion_matrix functions for accuracy calculations\n",
+        "from sklearn.metrics import accuracy_score, confusion_matrix\n",
+        "\n",
+        "# Import gdown module to download files from google drive\n",
+        "import gdown"
+      ],
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NC5yAkbHdCZm"
+      },
+      "source": [
+        "# Get the file location from the google drive"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "hR2Ar8XHdoav"
+      },
+      "source": [
+        "# Please change the url as needed (make sure you have the access to the file)\n",
+        "url = \"https://drive.google.com/file/d/1J5z8OsAtgSp9i1eLxQFoxVexSZJuhI_-/view?usp=sharing\" \n",
+        "\n",
+        "# Derive the file id from the url\n",
+        "file_id = url.split('/')[-2]\n",
+        "\n",
+        "# Derive the download url of the file\n",
+        "download_url = 'https://drive.google.com/uc?id=' + file_id\n",
+        "\n",
+        "# Give the file name you want to save it \n",
+        "file_name = \"child_vs_adult.csv\" \n",
+        "\n",
+        "# Derive the file location\n",
+        "file_location = \"/content/\" + file_name"
+      ],
+      "execution_count": 3,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gd1WH9wQdou7"
+      },
+      "source": [
+        "# Downloading, creating of logistic regression model and calculating of metrics"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uMe4N40Uh5fe"
+      },
+      "source": [
+        "Download and read the CSV file"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ouYfE4areWFk"
+      },
+      "source": [
+        "# Download the file from drive\n",
+        "gdown.download(download_url, file_location, quiet=False)\n",
+        "\n",
+        "# Read the CSV file\n",
+        "data = pd.read_csv(file_location)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "GHfLu6-aggWJ"
+      },
+      "source": [
+        "Display a sample from the dataset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ipsfzY3BgqAE"
+      },
+      "source": [
+        "# Print a sample from the csv dataset\n",
+        "print('---------- First 5 rows of the Dataset ----------\\n', data.head())"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "T7pn823DhbAi"
+      },
+      "source": [
+        "Create the logistic regression model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "I-LutOhihk1-"
+      },
+      "source": [
+        "# Please change the target variable according to the dataset\n",
+        "# You can refer to the dataset details printed in the above step \n",
+        "target_column = 'who_am_I'\n",
+        "\n",
+        "# Seperate the training data by removing the target column\n",
+        "X = data.drop(columns=[target_column])\n",
+        "\n",
+        "# Separate the target values\n",
+        "y = data[target_column].values\n",
+        "\n",
+        "# Split the dataset into train and test data\n",
+        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1, stratify=y)\n",
+        "\n",
+        "# Create logistic regressor\n",
+        "logistic_regressor = LogisticRegression()\n",
+        "\n",
+        "# Train the model\n",
+        "logistic_regressor.fit(X_train, y_train)\n",
+        "\n",
+        "# Predict using test values\n",
+        "y_pred = logistic_regressor.predict(X_test)\n",
+        "\n",
+        "# Get actual values and predicted values into a table\n",
+        "predicted_results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})\n",
+        "print('---------- Predicted Results ----------\\n', predicted_results)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RxAogMqQhlcW"
+      },
+      "source": [
+        "Confusion matrix"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qSMrcrFYhuPX"
+      },
+      "source": [
+        "# Calculate the confusion matrix\n",
+        "confusion_mat = confusion_matrix(y_test, y_pred)\n",
+        "print ('---------- Confusion Matrix of Your Model -----------\\n', confusion_mat)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eEpwTG1oAtls"
+      },
+      "source": [
+        "Accuracy"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "EEfpac8-AveP"
+      },
+      "source": [
+        "# Calculate accuracy using 'accuracy_score'\n",
+        "accuracy = accuracy_score(y_test, y_pred)\n",
+        "print('---------- Accuracy of Your Model -----------\\n', accuracy)"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/Chapter10/LogisticRegression/logistic_regression.py b/Chapter10/LogisticRegression/logistic_regression.py
index 7e0bdce..bc3813f 100644
--- a/Chapter10/LogisticRegression/logistic_regression.py
+++ b/Chapter10/LogisticRegression/logistic_regression.py
@@ -1,3 +1,100 @@
-# TODO: Create a logistic regression model (using scikit learn) on the child vs adult dataset. 
-#       Calculate the accuracy and confusion matrix
-# TODO: Code should be well commented.
\ No newline at end of file
+'''Copyright (c) 2021 AIClub
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 
+documentation files (the "Software"), to deal in the Software without restriction, including without 
+limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 
+the Software, and to permit persons to whom the Software is furnished to do so, subject to the following 
+conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 
+LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 
+EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN 
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 
+OR OTHER DEALINGS IN THE SOFTWARE.'''
+
+
+# Python program to create a logistic regression model (using scikit learn) on the child vs adult dataset. 
+# Calculate the accuracy and confusion matrix 
+
+
+# Import pandas module to read the CSV file and to process the tabular data
+import pandas as pd
+
+# Import train_test_split to split data as test and train
+from sklearn.model_selection import train_test_split
+
+# Import LogisticRegression class
+from sklearn.linear_model import LogisticRegression
+
+# Import accuracy_score and confusion_matrix functions for accuracy calculations
+from sklearn.metrics import accuracy_score, confusion_matrix
+
+# Import gdown module to download files from google drive
+import gdown
+
+
+# ------------------------------------ Get the file from the google drive. ------------------------------------------
+
+# Please change the url as needed (make sure you have the access to the file)
+url = 'https://drive.google.com/file/d/1J5z8OsAtgSp9i1eLxQFoxVexSZJuhI_-/view?usp=sharing'
+
+# Derive the file id from the url
+file_id = url.split('/')[-2]
+
+# Derive the download url of the file
+download_url = 'https://drive.google.com/uc?id=' + file_id
+
+# Give the location you want to save it in your local machine
+file_location = r'child vs adult.csv'
+
+# Download the file from drive to your local machine
+gdown.download(download_url, file_location)
+
+
+# ------------------------------------ Create the Logistic regression model -----------------------------------------
+
+# Read the CSV file
+data = pd.read_csv(file_location)
+
+# Print a sample from the csv dataset
+print('\n---------- First 5 rows of the Dataset ----------\n', data.head())
+
+# Please change the target variable according to the dataset
+# You can refer to the dataset details printed in the above step 
+target_column = 'who_am_I'
+
+# Seperate the training data by removing the target column
+X = data.drop(columns=[target_column])
+
+# Separate the target values
+y = data[target_column].values
+
+# Split the dataset into train and test data
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+# Create logistic regressor
+logistic_regressor = LogisticRegression()
+
+# Train the model
+logistic_regressor.fit(X_train, y_train)
+
+# Predict using test values
+y_pred = logistic_regressor.predict(X_test)
+
+# Get actual values and predicted values into a table
+predicted_results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
+print('\n---------- Predicted Results ----------\n', predicted_results)
+
+
+# ------------------------------- Calculate the Accuracy and Confusion Matrix --------------------------------------
+
+# Calculate the confusion matrix
+confusion_mat = confusion_matrix(y_test, y_pred)
+print ('\n---------- Confusion Matrix of Your Model -----------\n', confusion_mat)
+
+# Calculate accuracy using 'accuracy_score'
+accuracy = accuracy_score(y_test, y_pred)
+print('\n---------- Accuracy of Your Model -----------\n', accuracy)
\ No newline at end of file

From d8b1f5aeba3a233aaea52225f28037ed5de909d5 Mon Sep 17 00:00:00 2001
From: ChathukiKet <chathuki.navanjana@pyxeda.ai>
Date: Fri, 26 Nov 2021 23:06:00 +0530
Subject: [PATCH 2/2] remove the print statement

---
 Chapter10/LogisticRegression/logistic_regression.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/Chapter10/LogisticRegression/logistic_regression.py b/Chapter10/LogisticRegression/logistic_regression.py
index bc3813f..9b1281f 100644
--- a/Chapter10/LogisticRegression/logistic_regression.py
+++ b/Chapter10/LogisticRegression/logistic_regression.py
@@ -84,10 +84,6 @@
 # Predict using test values
 y_pred = logistic_regressor.predict(X_test)
 
-# Get actual values and predicted values into a table
-predicted_results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
-print('\n---------- Predicted Results ----------\n', predicted_results)
-
 
 # ------------------------------- Calculate the Accuracy and Confusion Matrix --------------------------------------