pyxeda · naveenDevPyxeda · Nov 8, 2021 · Nov 8, 2021
diff --git a/Chapter14/dataGeneration/README.md b/Chapter14/dataGeneration/README.md
@@ -0,0 +1,19 @@
+What it does : 
+
+    1. Combine two CSV files, where each row belong to a unique timestamp, which is common across the two tables
+    2. Predict the solar power generation at any point using the combined CSV file.
+
+Dependancies :
+
+    1. matplotlib module is needed to be installed in the local machine to run this program. 
+    2. sklearn module is needed to be installed in the local machine to run this program. 
+    3. gdown module is needed to be installed in the local machine.
+    4. zipfile module is needed to be installed in the local machine.
+
+Things to check before running :
+
+    1. Check whether you have given the correct location of your dataset file.
+    2. You should have access to the file in the Google Drive.
+
+
+
diff --git a/Chapter14/dataGeneration/data_generator.ipynb b/Chapter14/dataGeneration/data_generator.ipynb
@@ -0,0 +1,276 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "data_generator.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hFv0gCCZFUGv"
+      },
+      "source": [
+        "# **Problem: Predict Solar power generation.**\n",
+        "\n",
+        "1. Combine two CSV files, where each row belong to a unique timestamp, which is common across the two tables.\n",
+        "2. Predict the solar power generation at any point using the combined CSV file.\n",
+        "\n",
+        "**Examples:**\n",
+        "\n",
+        "Change the variable 'url' by providing the google drive URL of the zip file, that you want to download.\n",
+        "\n",
+        "Eg:url = 'https://drive.google.com/file/d/1dVBMQb-eKRq92WMKfJDbTBt-j-W_5s5u/view?usp=sharing'\n",
+        "\n",
+        "Run all the cells. After executing the last cell, you will see the predicted solar power mapped with the actual solar power generated.\n",
+        "\n",
+        "**Notes:**\n",
+        "\n",
+        "Following things are needed to be checked before running the program.\n",
+        "1. matplotlib module is needed to be installed in the local machine to run this program. \n",
+        "2. sklearn module is needed to be installed in the local machine to run this program. \n",
+        "3. gdown module is needed to be installed in the local machine.\n",
+        "4. zipfile module is needed to be installed in the local machine.\n",
+        "5. Check whether you have given the correct location of your dataset file.\n",
+        "6. You should have access to the file in the Google Drive.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "PgDcnNPEGETe"
+      },
+      "source": [
+        "# **Import Modules**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "lnbm7tctGHeT"
+      },
+      "source": [
+        "# Import pandas\n",
+        "import pandas as pd\n",
+        "\n",
+        "# Import pyplot module to plot the results\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "# Import train_test_split module to split the data into train and test\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "\n",
+        "# Import LinearRegression module to use in model training\n",
+        "from sklearn.linear_model import LinearRegression\n",
+        "\n",
+        "# Import gdown module to download files from google drive\n",
+        "import gdown\n",
+        "\n",
+        "# Import zip file module to open the zip file\n",
+        "from zipfile import ZipFile"
+      ],
+      "execution_count": 1,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rpDGPXjKGK20"
+      },
+      "source": [
+        "# **Get the file location from google drive and download**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 102
+        },
+        "id": "22ygR_COGOi2",
+        "outputId": "bff0a77c-1a85-4e11-9418-0fb8bc7bce95"
+      },
+      "source": [
+        ",# Please change the URL as needed (make sure you have the access to the file)\n",
+        "\n",
+        "url = 'https://drive.google.com/file/d/1dVBMQb-eKRq92WMKfJDbTBt-j-W_5s5u/view?usp=sharing'\n",
+        "\n",
+        "# Derive the file id from the URL\n",
+        "file_id = url.split('/')[-2]\n",
+        "\n",
+        "# Derive the download url of the the file\n",
+        "download_url = 'https://drive.google.com/uc?id=' + file_id\n",
+        "\n",
+        "# Give the location you want to save it in your local machine\n",
+        "file_location = 'solar.zip'\n",
+        "\n",
+        "# Download the file from drive to your local machine\n",
+        "gdown.download(download_url, file_location, quiet=False)"
+      ],
+      "execution_count": 6,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Downloading...\n",
+            "From: https://drive.google.com/uc?id=1dVBMQb-eKRq92WMKfJDbTBt-j-W_5s5u\n",
+            "To: /content/solar.zip\n",
+            "100%|██████████| 1.01M/1.01M [00:00<00:00, 105MB/s]\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'solar.zip'"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 6
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yQFUYnj3GQ8i"
+      },
+      "source": [
+        "# **Unzip the zip dataset**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "XFjoCiZ0GUEM",
+        "outputId": "e11287c9-818a-4cb5-90b7-149d43240511"
+      },
+      "source": [
+        "!unzip /content/solar.zip -d \"/content/unzipped_folder/\""
+      ],
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Archive:  /content/solar.zip\n",
+            "   creating: /content/unzipped_folder/solar/\n",
+            "  inflating: /content/unzipped_folder/solar/Plant_2_Generation_Data.csv  \n",
+            "  inflating: /content/unzipped_folder/solar/Plant_2_Weather_Sensor_Data.csv  \n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "waqSASm3GXqC"
+      },
+      "source": [
+        "# **Read and combine the CSVs**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Ql0wj_T4Ga4r"
+      },
+      "source": [
+        "# Read 1st csv file\n",
+        "plant = pd.read_csv('unzipped_folder/solar/Plant_2_Generation_Data.csv', sep = ',', engine = 'python', header = 0)\n",
+        "\n",
+        "# Read 2nd csv file\n",
+        "weather = pd.read_csv('unzipped_folder/solar/Plant_2_Weather_Sensor_Data.csv', sep = ',', engine = 'python', header = 0)\n",
+        "\n",
+        "# Combine the two csv files using DATE_TIME coloumn\n",
+        "combined_file = plant.merge(weather, on=[\"DATE_TIME\", \"PLANT_ID\"], suffixes=(\"_GENERATION\", \"_WEATHER\"))\n",
+        "\n",
+        "# Save the combined as a csv\n",
+        "combined_file.to_csv('output.csv', sep = ',')"
+      ],
+      "execution_count": 9,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uzJNGQdOGdxV"
+      },
+      "source": [
+        "# **Start the training and prediction**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "GMD2mGPWGgtn"
+      },
+      "source": [
+        "# Get feature coloumns\n",
+        "X2 = combined_file[['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE', 'IRRADIATION']]\n",
+        "\n",
+        "# Get target coloumn\n",
+        "y2 = combined_file['AC_POWER']\n",
+        "\n",
+        "# Split the data into train and test\n",
+        "X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y2, test_size=0.3)\n",
+        "\n",
+        "# Initialize LinearRegression class\n",
+        "lm2 = LinearRegression()\n",
+        "\n",
+        "# Fit the training data\n",
+        "lm2.fit(X2_train, y2_train)\n",
+        "\n",
+        "# Get the predictions\n",
+        "predictions = lm2.predict(X2_test)"
+      ],
+      "execution_count": 10,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2sHTClUUGkop"
+      },
+      "source": [
+        "# **Plot the results**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ojs96P6ZGn5U"
+      },
+      "source": [
+        "plt.scatter(y2_test, predictions)\n",
+        "plt.title('Actual Solar Output Values vs Predicted Values for Plant 2')\n",
+        "plt.xlabel('Predicted Output')\n",
+        "plt.ylabel('Actual Output')\n",
+        "\n",
+        "plt.show()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}