From d5a045ff1427ea83fb54a44b2a9b852eef0ed06c Mon Sep 17 00:00:00 2001
From: Daniel Gonzalez <daniel.gonzalez1065@gmail.com>
Date: Mon, 22 Nov 2021 14:48:02 -0600
Subject: [PATCH 1/2] entrega lab

---
 your-code/main.ipynb | 1225 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 1023 insertions(+), 202 deletions(-)

diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 31724c5..8857238 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -1,204 +1,1025 @@
 {
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 1. Import pandas library"
-   ]
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.5"
+    },
+    "colab": {
+      "name": "main.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    }
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 2. Import pymysql and sqlalchemy as you have learnt in the lesson of importing/exporting data \n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 3. Create a mysql engine to set the connection to the server. Check the connection details in [this link](https://relational.fit.cvut.cz/search?tableCount%5B%5D=0-10&tableCount%5B%5D=10-30&dataType%5B%5D=Numeric&databaseSize%5B%5D=KB&databaseSize%5B%5D=MB)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 4. Import the users table "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 5. Rename Id column to userId"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 6. Import the posts table. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 7. Rename Id column to postId and OwnerUserId to userId"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 8. Define new dataframes for users and posts with the following selected columns:\n",
-    "    **users columns**: userId, Reputation,Views,UpVotes,DownVotes\n",
-    "    **posts columns**: postId, Score,userID,ViewCount,CommentCount"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 8. Merge both dataframes, users and posts. \n",
-    "You will need to make a [merge](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html) of posts and users dataframes."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 9. How many missing values do you have in your merged dataframe? On which columns?"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 10. You will need to make something with missing values.  Will you clean or filling them? Explain. \n",
-    "**Remember** to check the results of your code before passing to the next step"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 11. Adjust the data types in order to avoid future issues. Which ones should be changed? "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Bonus: Identify extreme values in your merged dataframe as you have learned in class, create a dataframe called outliers with the same columns as our data set and calculate the bounds. The values of the outliers dataframe will be the values of the merged_df that fall outside that bounds. You will need to save your outliers dataframe to a csv file on your-code folder."
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ibRuvGpz7xaN"
+      },
+      "source": [
+        "#### 1. Import pandas library"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "q9akui1F7xaW",
+        "outputId": "a4e32126-f214-489c-ac91-b1b000c1ffb2"
+      },
+      "source": [
+        "!pip install pymysql"
+      ],
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Collecting pymysql\n",
+            "  Downloading PyMySQL-1.0.2-py3-none-any.whl (43 kB)\n",
+            "\u001b[?25l\r\u001b[K     |███████▌                        | 10 kB 21.8 MB/s eta 0:00:01\r\u001b[K     |███████████████                 | 20 kB 23.9 MB/s eta 0:00:01\r\u001b[K     |██████████████████████▍         | 30 kB 12.2 MB/s eta 0:00:01\r\u001b[K     |██████████████████████████████  | 40 kB 9.4 MB/s eta 0:00:01\r\u001b[K     |████████████████████████████████| 43 kB 1.8 MB/s \n",
+            "\u001b[?25hInstalling collected packages: pymysql\n",
+            "Successfully installed pymysql-1.0.2\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "tdtIReKs7zQQ"
+      },
+      "source": [
+        "import pandas as pd"
+      ],
+      "execution_count": 45,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "3LWluftN7xaY"
+      },
+      "source": [
+        "#### 2. Import pymysql and sqlalchemy as you have learnt in the lesson of importing/exporting data \n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "OMkTAepQ7xaY"
+      },
+      "source": [
+        "import pymysql\n",
+        "from sqlalchemy import create_engine"
+      ],
+      "execution_count": 46,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kzzXvfxL7xaZ"
+      },
+      "source": [
+        "#### 3. Create a mysql engine to set the connection to the server. Check the connection details in [this link](https://relational.fit.cvut.cz/search?tableCount%5B%5D=0-10&tableCount%5B%5D=10-30&dataType%5B%5D=Numeric&databaseSize%5B%5D=KB&databaseSize%5B%5D=MB)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "yJzjhyAs7xaZ"
+      },
+      "source": [
+        "engine = create_engine('mysql+pymysql://guest:relational@relational.fit.cvut.cz/stats')"
+      ],
+      "execution_count": 47,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BKCX8cM-7xaZ"
+      },
+      "source": [
+        "#### 4. Import the users table "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 348
+        },
+        "id": "zgO3qWJN7xaa",
+        "outputId": "a733d330-af41-4a3a-c7f6-d3718592e3a5"
+      },
+      "source": [
+        "data = pd.read_sql_query('SELECT * FROM stats.users',engine)\n",
+        "data.head()"
+      ],
+      "execution_count": 48,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Id</th>\n",
+              "      <th>Reputation</th>\n",
+              "      <th>CreationDate</th>\n",
+              "      <th>DisplayName</th>\n",
+              "      <th>LastAccessDate</th>\n",
+              "      <th>WebsiteUrl</th>\n",
+              "      <th>Location</th>\n",
+              "      <th>AboutMe</th>\n",
+              "      <th>Views</th>\n",
+              "      <th>UpVotes</th>\n",
+              "      <th>DownVotes</th>\n",
+              "      <th>AccountId</th>\n",
+              "      <th>Age</th>\n",
+              "      <th>ProfileImageUrl</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>-1</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2010-07-19 06:55:26</td>\n",
+              "      <td>Community</td>\n",
+              "      <td>2010-07-19 06:55:26</td>\n",
+              "      <td>http://meta.stackexchange.com/</td>\n",
+              "      <td>on the server farm</td>\n",
+              "      <td>&lt;p&gt;Hi, I'm not really a person.&lt;/p&gt;\\n\\n&lt;p&gt;I'm ...</td>\n",
+              "      <td>0</td>\n",
+              "      <td>5007</td>\n",
+              "      <td>1920</td>\n",
+              "      <td>-1</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>2</td>\n",
+              "      <td>101</td>\n",
+              "      <td>2010-07-19 14:01:36</td>\n",
+              "      <td>Geoff Dalgas</td>\n",
+              "      <td>2013-11-12 22:07:23</td>\n",
+              "      <td>http://stackoverflow.com</td>\n",
+              "      <td>Corvallis, OR</td>\n",
+              "      <td>&lt;p&gt;Developer on the StackOverflow team.  Find ...</td>\n",
+              "      <td>25</td>\n",
+              "      <td>3</td>\n",
+              "      <td>0</td>\n",
+              "      <td>2</td>\n",
+              "      <td>37.0</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>3</td>\n",
+              "      <td>101</td>\n",
+              "      <td>2010-07-19 15:34:50</td>\n",
+              "      <td>Jarrod Dixon</td>\n",
+              "      <td>2014-08-08 06:42:58</td>\n",
+              "      <td>http://stackoverflow.com</td>\n",
+              "      <td>New York, NY</td>\n",
+              "      <td>&lt;p&gt;&lt;a href=\"http://blog.stackoverflow.com/2009...</td>\n",
+              "      <td>22</td>\n",
+              "      <td>19</td>\n",
+              "      <td>0</td>\n",
+              "      <td>3</td>\n",
+              "      <td>35.0</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>4</td>\n",
+              "      <td>101</td>\n",
+              "      <td>2010-07-19 19:03:27</td>\n",
+              "      <td>Emmett</td>\n",
+              "      <td>2014-01-02 09:31:02</td>\n",
+              "      <td>http://minesweeperonline.com</td>\n",
+              "      <td>San Francisco, CA</td>\n",
+              "      <td>&lt;p&gt;currently at a startup in SF&lt;/p&gt;\\n\\n&lt;p&gt;form...</td>\n",
+              "      <td>11</td>\n",
+              "      <td>0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>1998</td>\n",
+              "      <td>28.0</td>\n",
+              "      <td>http://i.stack.imgur.com/d1oHX.jpg</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>5</td>\n",
+              "      <td>6792</td>\n",
+              "      <td>2010-07-19 19:03:57</td>\n",
+              "      <td>Shane</td>\n",
+              "      <td>2014-08-13 00:23:47</td>\n",
+              "      <td>http://www.statalgo.com</td>\n",
+              "      <td>New York, NY</td>\n",
+              "      <td>&lt;p&gt;Quantitative researcher focusing on statist...</td>\n",
+              "      <td>1145</td>\n",
+              "      <td>662</td>\n",
+              "      <td>5</td>\n",
+              "      <td>54503</td>\n",
+              "      <td>35.0</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   Id  Reputation  ...   Age                     ProfileImageUrl\n",
+              "0  -1           1  ...   NaN                                None\n",
+              "1   2         101  ...  37.0                                None\n",
+              "2   3         101  ...  35.0                                None\n",
+              "3   4         101  ...  28.0  http://i.stack.imgur.com/d1oHX.jpg\n",
+              "4   5        6792  ...  35.0                                None\n",
+              "\n",
+              "[5 rows x 14 columns]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 48
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iEGINkSQ7xab"
+      },
+      "source": [
+        "#### 5. Rename Id column to userId"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 348
+        },
+        "id": "TCBu6hVW7xab",
+        "outputId": "0372fb55-8429-4635-e185-644e9431ca35"
+      },
+      "source": [
+        "users = data.rename(columns={'Id': 'userId'}).head()\n",
+        "users.head()"
+      ],
+      "execution_count": 49,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>userId</th>\n",
+              "      <th>Reputation</th>\n",
+              "      <th>CreationDate</th>\n",
+              "      <th>DisplayName</th>\n",
+              "      <th>LastAccessDate</th>\n",
+              "      <th>WebsiteUrl</th>\n",
+              "      <th>Location</th>\n",
+              "      <th>AboutMe</th>\n",
+              "      <th>Views</th>\n",
+              "      <th>UpVotes</th>\n",
+              "      <th>DownVotes</th>\n",
+              "      <th>AccountId</th>\n",
+              "      <th>Age</th>\n",
+              "      <th>ProfileImageUrl</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>-1</td>\n",
+              "      <td>1</td>\n",
+              "      <td>2010-07-19 06:55:26</td>\n",
+              "      <td>Community</td>\n",
+              "      <td>2010-07-19 06:55:26</td>\n",
+              "      <td>http://meta.stackexchange.com/</td>\n",
+              "      <td>on the server farm</td>\n",
+              "      <td>&lt;p&gt;Hi, I'm not really a person.&lt;/p&gt;\\n\\n&lt;p&gt;I'm ...</td>\n",
+              "      <td>0</td>\n",
+              "      <td>5007</td>\n",
+              "      <td>1920</td>\n",
+              "      <td>-1</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>2</td>\n",
+              "      <td>101</td>\n",
+              "      <td>2010-07-19 14:01:36</td>\n",
+              "      <td>Geoff Dalgas</td>\n",
+              "      <td>2013-11-12 22:07:23</td>\n",
+              "      <td>http://stackoverflow.com</td>\n",
+              "      <td>Corvallis, OR</td>\n",
+              "      <td>&lt;p&gt;Developer on the StackOverflow team.  Find ...</td>\n",
+              "      <td>25</td>\n",
+              "      <td>3</td>\n",
+              "      <td>0</td>\n",
+              "      <td>2</td>\n",
+              "      <td>37.0</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>3</td>\n",
+              "      <td>101</td>\n",
+              "      <td>2010-07-19 15:34:50</td>\n",
+              "      <td>Jarrod Dixon</td>\n",
+              "      <td>2014-08-08 06:42:58</td>\n",
+              "      <td>http://stackoverflow.com</td>\n",
+              "      <td>New York, NY</td>\n",
+              "      <td>&lt;p&gt;&lt;a href=\"http://blog.stackoverflow.com/2009...</td>\n",
+              "      <td>22</td>\n",
+              "      <td>19</td>\n",
+              "      <td>0</td>\n",
+              "      <td>3</td>\n",
+              "      <td>35.0</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>4</td>\n",
+              "      <td>101</td>\n",
+              "      <td>2010-07-19 19:03:27</td>\n",
+              "      <td>Emmett</td>\n",
+              "      <td>2014-01-02 09:31:02</td>\n",
+              "      <td>http://minesweeperonline.com</td>\n",
+              "      <td>San Francisco, CA</td>\n",
+              "      <td>&lt;p&gt;currently at a startup in SF&lt;/p&gt;\\n\\n&lt;p&gt;form...</td>\n",
+              "      <td>11</td>\n",
+              "      <td>0</td>\n",
+              "      <td>0</td>\n",
+              "      <td>1998</td>\n",
+              "      <td>28.0</td>\n",
+              "      <td>http://i.stack.imgur.com/d1oHX.jpg</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>5</td>\n",
+              "      <td>6792</td>\n",
+              "      <td>2010-07-19 19:03:57</td>\n",
+              "      <td>Shane</td>\n",
+              "      <td>2014-08-13 00:23:47</td>\n",
+              "      <td>http://www.statalgo.com</td>\n",
+              "      <td>New York, NY</td>\n",
+              "      <td>&lt;p&gt;Quantitative researcher focusing on statist...</td>\n",
+              "      <td>1145</td>\n",
+              "      <td>662</td>\n",
+              "      <td>5</td>\n",
+              "      <td>54503</td>\n",
+              "      <td>35.0</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   userId  Reputation  ...   Age                     ProfileImageUrl\n",
+              "0      -1           1  ...   NaN                                None\n",
+              "1       2         101  ...  37.0                                None\n",
+              "2       3         101  ...  35.0                                None\n",
+              "3       4         101  ...  28.0  http://i.stack.imgur.com/d1oHX.jpg\n",
+              "4       5        6792  ...  35.0                                None\n",
+              "\n",
+              "[5 rows x 14 columns]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 49
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "koTr9ylS7xab"
+      },
+      "source": [
+        "#### 6. Import the posts table. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 521
+        },
+        "id": "dFRjmFkh7xac",
+        "outputId": "41765990-7ea2-4345-f482-44a4be7aa4f3"
+      },
+      "source": [
+        "posts = pd.read_sql_query('SELECT * FROM stats.posts',engine)\n",
+        "posts.head()"
+      ],
+      "execution_count": 50,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>Id</th>\n",
+              "      <th>PostTypeId</th>\n",
+              "      <th>AcceptedAnswerId</th>\n",
+              "      <th>CreaionDate</th>\n",
+              "      <th>Score</th>\n",
+              "      <th>ViewCount</th>\n",
+              "      <th>Body</th>\n",
+              "      <th>OwnerUserId</th>\n",
+              "      <th>LasActivityDate</th>\n",
+              "      <th>Title</th>\n",
+              "      <th>Tags</th>\n",
+              "      <th>AnswerCount</th>\n",
+              "      <th>CommentCount</th>\n",
+              "      <th>FavoriteCount</th>\n",
+              "      <th>LastEditorUserId</th>\n",
+              "      <th>LastEditDate</th>\n",
+              "      <th>CommunityOwnedDate</th>\n",
+              "      <th>ParentId</th>\n",
+              "      <th>ClosedDate</th>\n",
+              "      <th>OwnerDisplayName</th>\n",
+              "      <th>LastEditorDisplayName</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>1</td>\n",
+              "      <td>1</td>\n",
+              "      <td>15.0</td>\n",
+              "      <td>2010-07-19 19:12:12</td>\n",
+              "      <td>23</td>\n",
+              "      <td>1278.0</td>\n",
+              "      <td>&lt;p&gt;How should I elicit prior distributions fro...</td>\n",
+              "      <td>8.0</td>\n",
+              "      <td>2010-09-15 21:08:26</td>\n",
+              "      <td>Eliciting priors from experts</td>\n",
+              "      <td>&lt;bayesian&gt;&lt;prior&gt;&lt;elicitation&gt;</td>\n",
+              "      <td>5.0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>14.0</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaT</td>\n",
+              "      <td>NaT</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaT</td>\n",
+              "      <td>None</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>2</td>\n",
+              "      <td>1</td>\n",
+              "      <td>59.0</td>\n",
+              "      <td>2010-07-19 19:12:57</td>\n",
+              "      <td>22</td>\n",
+              "      <td>8198.0</td>\n",
+              "      <td>&lt;p&gt;In many different statistical methods there...</td>\n",
+              "      <td>24.0</td>\n",
+              "      <td>2012-11-12 09:21:54</td>\n",
+              "      <td>What is normality?</td>\n",
+              "      <td>&lt;distributions&gt;&lt;normality&gt;</td>\n",
+              "      <td>7.0</td>\n",
+              "      <td>1</td>\n",
+              "      <td>8.0</td>\n",
+              "      <td>88.0</td>\n",
+              "      <td>2010-08-07 17:56:44</td>\n",
+              "      <td>NaT</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaT</td>\n",
+              "      <td>None</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>3</td>\n",
+              "      <td>1</td>\n",
+              "      <td>5.0</td>\n",
+              "      <td>2010-07-19 19:13:28</td>\n",
+              "      <td>54</td>\n",
+              "      <td>3613.0</td>\n",
+              "      <td>&lt;p&gt;What are some valuable Statistical Analysis...</td>\n",
+              "      <td>18.0</td>\n",
+              "      <td>2013-05-27 14:48:36</td>\n",
+              "      <td>What are some valuable Statistical Analysis op...</td>\n",
+              "      <td>&lt;software&gt;&lt;open-source&gt;</td>\n",
+              "      <td>19.0</td>\n",
+              "      <td>4</td>\n",
+              "      <td>36.0</td>\n",
+              "      <td>183.0</td>\n",
+              "      <td>2011-02-12 05:50:03</td>\n",
+              "      <td>2010-07-19 19:13:28</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaT</td>\n",
+              "      <td>None</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>4</td>\n",
+              "      <td>1</td>\n",
+              "      <td>135.0</td>\n",
+              "      <td>2010-07-19 19:13:31</td>\n",
+              "      <td>13</td>\n",
+              "      <td>5224.0</td>\n",
+              "      <td>&lt;p&gt;I have two groups of data.  Each with a dif...</td>\n",
+              "      <td>23.0</td>\n",
+              "      <td>2010-09-08 03:00:19</td>\n",
+              "      <td>Assessing the significance of differences in d...</td>\n",
+              "      <td>&lt;distributions&gt;&lt;statistical-significance&gt;</td>\n",
+              "      <td>5.0</td>\n",
+              "      <td>2</td>\n",
+              "      <td>2.0</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaT</td>\n",
+              "      <td>NaT</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>NaT</td>\n",
+              "      <td>None</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>5</td>\n",
+              "      <td>2</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>2010-07-19 19:14:43</td>\n",
+              "      <td>81</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>&lt;p&gt;The R-project&lt;/p&gt;\\n\\n&lt;p&gt;&lt;a href=\"http://www...</td>\n",
+              "      <td>23.0</td>\n",
+              "      <td>2010-07-19 19:21:15</td>\n",
+              "      <td>None</td>\n",
+              "      <td>None</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>3</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>23.0</td>\n",
+              "      <td>2010-07-19 19:21:15</td>\n",
+              "      <td>2010-07-19 19:14:43</td>\n",
+              "      <td>3.0</td>\n",
+              "      <td>NaT</td>\n",
+              "      <td>None</td>\n",
+              "      <td>None</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   Id  PostTypeId  ...  OwnerDisplayName LastEditorDisplayName\n",
+              "0   1           1  ...              None                  None\n",
+              "1   2           1  ...              None                  None\n",
+              "2   3           1  ...              None                  None\n",
+              "3   4           1  ...              None                  None\n",
+              "4   5           2  ...              None                  None\n",
+              "\n",
+              "[5 rows x 21 columns]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 50
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KdAiFpJD7xac"
+      },
+      "source": [
+        "#### 7. Rename Id column to postId and OwnerUserId to userId"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "EPGQNuFB7xac"
+      },
+      "source": [
+        "posts2 = posts.rename(columns={'Id': 'postID', 'OwnerUserId': 'userId'}).head()\n",
+        "posts2.head()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dc2i-tvc7xad"
+      },
+      "source": [
+        "#### 8. Define new dataframes for users and posts with the following selected columns:\n",
+        "    **users columns**: userId, Reputation,Views,UpVotes,DownVotes\n",
+        "    **posts columns**: postId, Score,userID,ViewCount,CommentCount"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "pp37FD9l7xad"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 206
+        },
+        "id": "U5CQ5hi2Ezla",
+        "outputId": "4a2a856a-3ea5-4833-dfee-9cd2a48f24b0"
+      },
+      "source": [
+        "users_filt = users[['userId','Reputation','Views','UpVotes','DownVotes']]\n",
+        "posts_filt = posts2[['postID','Score','userId','ViewCount','CommentCount']]\n",
+        "posts_filt.head()\n"
+      ],
+      "execution_count": 51,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>postID</th>\n",
+              "      <th>Score</th>\n",
+              "      <th>userId</th>\n",
+              "      <th>ViewCount</th>\n",
+              "      <th>CommentCount</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>1</td>\n",
+              "      <td>23</td>\n",
+              "      <td>8.0</td>\n",
+              "      <td>1278.0</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>2</td>\n",
+              "      <td>22</td>\n",
+              "      <td>24.0</td>\n",
+              "      <td>8198.0</td>\n",
+              "      <td>1</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>3</td>\n",
+              "      <td>54</td>\n",
+              "      <td>18.0</td>\n",
+              "      <td>3613.0</td>\n",
+              "      <td>4</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>4</td>\n",
+              "      <td>13</td>\n",
+              "      <td>23.0</td>\n",
+              "      <td>5224.0</td>\n",
+              "      <td>2</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>5</td>\n",
+              "      <td>81</td>\n",
+              "      <td>23.0</td>\n",
+              "      <td>NaN</td>\n",
+              "      <td>3</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "   postID  Score  userId  ViewCount  CommentCount\n",
+              "0       1     23     8.0     1278.0             1\n",
+              "1       2     22    24.0     8198.0             1\n",
+              "2       3     54    18.0     3613.0             4\n",
+              "3       4     13    23.0     5224.0             2\n",
+              "4       5     81    23.0        NaN             3"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 51
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Jir8RBEO7xad"
+      },
+      "source": [
+        "#### 8. Merge both dataframes, users and posts. \n",
+        "You will need to make a [merge](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html) of posts and users dataframes."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 49
+        },
+        "id": "qxEO0rH57xae",
+        "outputId": "8e319777-dcb2-4e3d-cc6e-03e0889843b3"
+      },
+      "source": [
+        "merged_df = pd.merge(left=users_filt, right=posts_filt)\n",
+        "merged_df.head(4)"
+      ],
+      "execution_count": 58,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>userId</th>\n",
+              "      <th>Reputation</th>\n",
+              "      <th>Views</th>\n",
+              "      <th>UpVotes</th>\n",
+              "      <th>DownVotes</th>\n",
+              "      <th>postID</th>\n",
+              "      <th>Score</th>\n",
+              "      <th>ViewCount</th>\n",
+              "      <th>CommentCount</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "Empty DataFrame\n",
+              "Columns: [userId, Reputation, Views, UpVotes, DownVotes, postID, Score, ViewCount, CommentCount]\n",
+              "Index: []"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 58
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ELp32l5u7xae"
+      },
+      "source": [
+        "#### 9. How many missing values do you have in your merged dataframe? On which columns?"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "xX0em7kc7xae"
+      },
+      "source": [
+        "users.info()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Ce0fLZ97JV3M"
+      },
+      "source": [
+        "merged_df.info()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "510cRIWe7xaf"
+      },
+      "source": [
+        "#### 10. You will need to make something with missing values.  Will you clean or filling them? Explain. \n",
+        "**Remember** to check the results of your code before passing to the next step"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 49
+        },
+        "id": "2XSJxL5C7xaf",
+        "outputId": "f28212c4-fc21-48ee-cacb-2c51a14aba22"
+      },
+      "source": [
+        "merged_df[['ViewCount']] = merged_df[['ViewCount']].fillna(0)\n",
+        "merged_df.head()"
+      ],
+      "execution_count": 59,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>userId</th>\n",
+              "      <th>Reputation</th>\n",
+              "      <th>Views</th>\n",
+              "      <th>UpVotes</th>\n",
+              "      <th>DownVotes</th>\n",
+              "      <th>postID</th>\n",
+              "      <th>Score</th>\n",
+              "      <th>ViewCount</th>\n",
+              "      <th>CommentCount</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "Empty DataFrame\n",
+              "Columns: [userId, Reputation, Views, UpVotes, DownVotes, postID, Score, ViewCount, CommentCount]\n",
+              "Index: []"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 59
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "s0LJUJQW7xaf"
+      },
+      "source": [
+        "#### 11. Adjust the data types in order to avoid future issues. Which ones should be changed? "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "k6NUFICr7xaf"
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "s0Hwo-CO7xaf"
+      },
+      "source": [
+        "#### Bonus: Identify extreme values in your merged dataframe as you have learned in class, create a dataframe called outliers with the same columns as our data set and calculate the bounds. The values of the outliers dataframe will be the values of the merged_df that fall outside that bounds. You will need to save your outliers dataframe to a csv file on your-code folder."
+      ]
+    }
+  ]
+}
\ No newline at end of file

From 3b5dfacbeb384cddd06ddb4b9d69b44c64d327d1 Mon Sep 17 00:00:00 2001
From: Daniel Gonzalez <daniel.gonzalez1065@gmail.com>
Date: Thu, 25 Nov 2021 20:17:34 -0600
Subject: [PATCH 2/2] Solucion Lab

---
 your-code/weather.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/your-code/weather.ipynb b/your-code/weather.ipynb
index 4fc40ab..b419ee1 100644
--- a/your-code/weather.ipynb
+++ b/your-code/weather.ipynb
@@ -47,7 +47,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -61,7 +61,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.8.11"
   }
  },
  "nbformat": 4,