From fa5c8b8b956155a3c3dae345de48d85163b9cfa5 Mon Sep 17 00:00:00 2001 From: "naveen.atukorala@pyxeda.ai" Date: Mon, 8 Nov 2021 09:34:53 +0530 Subject: [PATCH] random forest for human activity prediction --- Chapter14/activityPrediction/README.md | 15 ++ .../random_forest_activity_predictor.ipynb | 190 ++++++++++++++++++ .../random_forest_activity_predictor.py | 103 +++++++++- 3 files changed, 305 insertions(+), 3 deletions(-) create mode 100644 Chapter14/activityPrediction/random_forest_activity_predictor.ipynb diff --git a/Chapter14/activityPrediction/README.md b/Chapter14/activityPrediction/README.md index e69de29..702d9ca 100644 --- a/Chapter14/activityPrediction/README.md +++ b/Chapter14/activityPrediction/README.md @@ -0,0 +1,15 @@ +What it does : + + 1. This Python program implements random forest model for human activity prediction using UCI HAR Dataset. + +Dependancies : + + 1. sklearn module is needed to be installed in the local machine to run this program. + +Things to check before running : + + 1. Check whether you have given the correct location of your dataset file. + 2. You should have access to the file in the Google Drive. + + + \ No newline at end of file diff --git a/Chapter14/activityPrediction/random_forest_activity_predictor.ipynb b/Chapter14/activityPrediction/random_forest_activity_predictor.ipynb new file mode 100644 index 0000000..6a4a303 --- /dev/null +++ b/Chapter14/activityPrediction/random_forest_activity_predictor.ipynb @@ -0,0 +1,190 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "random_forest_activity_predictor.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "8xLWQeV4UPh2" + }, + "source": [ + "# **Problem: Random forest for human activity prediction.**\n", + "\n", + "Random forest for human activity prediction using UCI HAR Dataset.\n", + "\n", + "**Examples:**\n", + "\n", + "Change the variable 'url' by providing the google drive URL of the zip file, that you want to download.\n", + "\n", + "Eg:url = 'https://drive.google.com/file/d/1K7izykrla-qEuekekLayfGddml17calY/view?usp=sharing'\n", + "\n", + "Run all the cells. After executing the last cell, you will see the bthe accuracy score of the model as well.\n", + "\n", + "**Notes:**\n", + "\n", + "Following things are needed to be checked before running the program.\n", + " 1. Sklearn module is needed to run this program in a notebook. \n", + " 2. Check whether you have given the correct location of your zip file.\n", + " 3. You should have access to the file in the Google Drive.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V_xkWyuYVRl_" + }, + "source": [ + "# **Import Modules**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "2iGN-VmnVVxC" + }, + "source": [ + "# Import pandas\n", + "import pandas as pd\n", + "\n", + "# Import RandomForestClassifier since we are using random forest for this problem\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "# Import GridSearchCV\n", + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "# Import accuracy_score to calculate accuracy\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "# Import gdown module to download files from google drive\n", + "import gdown\n", + "\n", + "# Import zip file module to open the zip file\n", + "from zipfile import ZipFile" + ], + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mPLYAU7ZZIgv" + }, + "source": [ + "# **Get the file location from google drive**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "zzNzKjELZOUV" + }, + "source": [ + "# Please change the URL as needed (make sure you have the access to the file)\n", + "\n", + "url = 'https://drive.google.com/file/d/1z_zn7vv-Sk60fdoQuPN3h9wkyP8H-FQR/view?usp=sharing'\n", + "\n", + "# Derive the file id from the URL\n", + "file_id = url.split('/')[-2]\n", + "\n", + "# Derive the download url of the the file\n", + "download_url = 'https://drive.google.com/uc?id=' + file_id\n", + "\n", + "# Give the location you want to save\n", + "file_location = 'UCI_HAR_Dataset.zip'\n", + "\n", + "# Download the file from drive\n", + "gdown.download(download_url, file_location, quiet=False)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9OKD1-GlaDDv" + }, + "source": [ + "# **Unzip the zip dataset**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "nY7UjA3UaFFN" + }, + "source": [ + "!unzip /content/UCI_HAR_Dataset.zip -d \"/content/unzipped_folder/\"" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m25lZG87ZjJd" + }, + "source": [ + "# **Begin Activity prediction operation**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ShhkO__YZkrl" + }, + "source": [ + "# Read train and test file using pandas\n", + "xtrain=pd.read_table(r'/content/unzipped_folder/UCI HAR Dataset/train/X_train.txt',delim_whitespace=True,header=None)\n", + "\n", + "\n", + "xtest=pd.read_table(r'/content/unzipped_folder/UCI HAR Dataset/test/X_test.txt',delim_whitespace=True,header=None)\n", + "\n", + "\n", + "ytrain=pd.read_table(r'/content/unzipped_folder/UCI HAR Dataset/train/y_train.txt',header=None)\n", + "\n", + "\n", + "ytest=pd.read_table(r'/content/unzipped_folder/UCI HAR Dataset/test/y_test.txt',header=None)\n", + "\n", + "# Return first 5 raws of the xtrain dataframe\n", + "xtrain.head()\n", + "\n", + "# Initialize randomforest classifier\n", + "classifier = RandomForestClassifier()\n", + "\n", + "# Define parameters for GridSearchCV method below\n", + "parameters = {'n_estimators': [10, 100, 1000], 'max_depth': [3, 6, 9], 'max_features' : ['auto', 'log2']}\n", + "\n", + "# Derive the model\n", + "model=GridSearchCV(classifier,parameters,n_jobs=-1,cv=4,scoring='accuracy',verbose=4)\n", + "\n", + "# Fit training data\n", + "model.fit(xtrain.to_numpy(),ytrain.to_numpy().ravel().T)\n", + "\n", + "# Get the predictions\n", + "ypred=model.predict(xtest)\n", + "\n", + "# Calculate the accuracy of the model\n", + "accuracy=accuracy_score(ytest,ypred)\n", + "\n", + "# Print accuracy score\n", + "print('Accuracy Score: '+ str(accuracy*100) + ' %')" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/Chapter14/activityPrediction/random_forest_activity_predictor.py b/Chapter14/activityPrediction/random_forest_activity_predictor.py index daf8121..d6b3e86 100644 --- a/Chapter14/activityPrediction/random_forest_activity_predictor.py +++ b/Chapter14/activityPrediction/random_forest_activity_predictor.py @@ -1,3 +1,100 @@ -# TODO: Random Forest for activity prediction on this dataset: -# https://archive.ics.uci.edu/ml/datasets/human+activity+recognition+using+smartphones -# TODO: Code should be well commented. \ No newline at end of file +'''Copyright (c) 2021 AIClub + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the Software without restriction, including without +limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO +EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN +AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE.''' + +# Random forest for human activity prediction using UCI HAR Dataset. + +# Import pandas +import pandas as pd + +# Import RandomForestClassifier since we are using random forest for this problem +from sklearn.ensemble import RandomForestClassifier + +# Import GridSearchCV +from sklearn.model_selection import GridSearchCV + +# Import accuracy_score to calculate accuracy +from sklearn.metrics import accuracy_score + +# Import gdown module to download files from google drive +import gdown + +# Import zip file module to open the zip file +from zipfile import ZipFile + +#--------------------------------------------- Get the file location from google drive ---------------------------------------- + +# Please change the URL as needed (make sure you have the access to the file) + +url = 'https://drive.google.com/file/d/1z_zn7vv-Sk60fdoQuPN3h9wkyP8H-FQR/view?usp=sharing' + +# Derive the file id from the URL +file_id = url.split('/')[-2] + +# Derive the download url of the the file +download_url = 'https://drive.google.com/uc?id=' + file_id + +# Give the location you want to save it in your local machine +file_location = 'UCI_HAR_Dataset.zip' + +#--------------------------------------------- Download and extract the zip file ------------------------------------------------- + +# Download the file from drive to your local machine +gdown.download(download_url, file_location) + +# Open the downloaded zip file and extract its contents +with ZipFile(file_location, "r") as zip_file: + zip_file.extractall() + +#--------------------------------------------- Begin Activity prediction operation ----------------------------------------------- + +# Read train and test file using pandas +xtrain=pd.read_table(r'UCI HAR Dataset\train\X_train.txt',delim_whitespace=True,header=None) + + +xtest=pd.read_table(r'UCI HAR Dataset\test\X_test.txt',delim_whitespace=True,header=None) + + +ytrain=pd.read_table(r'UCI HAR Dataset\train\y_train.txt',header=None) + + +ytest=pd.read_table(r'UCI HAR Dataset\test\y_test.txt',header=None) + +# Return first 5 raws of the xtrain dataframe +xtrain.head() + +# Initialize randomforest classifier +classifier = RandomForestClassifier() + +# Define parameters for GridSearchCV method below +parameters = {'n_estimators': [10, 100, 1000], 'max_depth': [3, 6, 9], 'max_features' : ['auto', 'log2']} + +# Derive the model +model=GridSearchCV(classifier,parameters,n_jobs=-1,cv=4,scoring='accuracy',verbose=4) + +# Fit training data +model.fit(xtrain.to_numpy(),ytrain.to_numpy().ravel().T) + +# Get the predictions +ypred=model.predict(xtest) + +# Calculate the accuracy of the model +accuracy=accuracy_score(ytest,ypred) + +# Print accuracy score +print('Accuracy Score: '+ str(accuracy*100) + ' %') + +