diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..b56a444
Binary files /dev/null and b/.DS_Store differ
diff --git a/your-code/.DS_Store b/your-code/.DS_Store
new file mode 100644
index 0000000..40fe872
Binary files /dev/null and b/your-code/.DS_Store differ
diff --git a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb
new file mode 100644
index 0000000..4a983fc
--- /dev/null
+++ b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb
@@ -0,0 +1,1324 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 1. Import pandas library"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 2. Import users table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Id</th>\n",
+       "      <th>Reputation</th>\n",
+       "      <th>CreationDate</th>\n",
+       "      <th>DisplayName</th>\n",
+       "      <th>LastAccessDate</th>\n",
+       "      <th>WebsiteUrl</th>\n",
+       "      <th>Location</th>\n",
+       "      <th>AboutMe</th>\n",
+       "      <th>Views</th>\n",
+       "      <th>UpVotes</th>\n",
+       "      <th>DownVotes</th>\n",
+       "      <th>AccountId</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>ProfileImageUrl</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2010-07-19 06:55:26</td>\n",
+       "      <td>Community</td>\n",
+       "      <td>2010-07-19 06:55:26</td>\n",
+       "      <td>http://meta.stackexchange.com/</td>\n",
+       "      <td>on the server farm</td>\n",
+       "      <td>&lt;p&gt;Hi, I'm not really a person.&lt;/p&gt;\\r\\n\\r\\n&lt;p&gt;...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5007</td>\n",
+       "      <td>1920</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2010-07-19 14:01:36</td>\n",
+       "      <td>Geoff Dalgas</td>\n",
+       "      <td>2013-11-12 22:07:23</td>\n",
+       "      <td>http://stackoverflow.com</td>\n",
+       "      <td>Corvallis, OR</td>\n",
+       "      <td>&lt;p&gt;Developer on the StackOverflow team.  Find ...</td>\n",
+       "      <td>25</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>37.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2010-07-19 15:34:50</td>\n",
+       "      <td>Jarrod Dixon</td>\n",
+       "      <td>2014-08-08 06:42:58</td>\n",
+       "      <td>http://stackoverflow.com</td>\n",
+       "      <td>New York, NY</td>\n",
+       "      <td>&lt;p&gt;&lt;a href=\"http://blog.stackoverflow.com/2009...</td>\n",
+       "      <td>22</td>\n",
+       "      <td>19</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2010-07-19 19:03:27</td>\n",
+       "      <td>Emmett</td>\n",
+       "      <td>2014-01-02 09:31:02</td>\n",
+       "      <td>http://minesweeperonline.com</td>\n",
+       "      <td>San Francisco, CA</td>\n",
+       "      <td>&lt;p&gt;currently at a startup in SF&lt;/p&gt;\\r\\n\\r\\n&lt;p&gt;...</td>\n",
+       "      <td>11</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1998</td>\n",
+       "      <td>28.0</td>\n",
+       "      <td>http://i.stack.imgur.com/d1oHX.jpg</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>6792</td>\n",
+       "      <td>2010-07-19 19:03:57</td>\n",
+       "      <td>Shane</td>\n",
+       "      <td>2014-08-13 00:23:47</td>\n",
+       "      <td>http://www.statalgo.com</td>\n",
+       "      <td>New York, NY</td>\n",
+       "      <td>&lt;p&gt;Quantitative researcher focusing on statist...</td>\n",
+       "      <td>1145</td>\n",
+       "      <td>662</td>\n",
+       "      <td>5</td>\n",
+       "      <td>54503</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Id  Reputation         CreationDate   DisplayName       LastAccessDate  \\\n",
+       "0  -1           1  2010-07-19 06:55:26     Community  2010-07-19 06:55:26   \n",
+       "1   2         101  2010-07-19 14:01:36  Geoff Dalgas  2013-11-12 22:07:23   \n",
+       "2   3         101  2010-07-19 15:34:50  Jarrod Dixon  2014-08-08 06:42:58   \n",
+       "3   4         101  2010-07-19 19:03:27        Emmett  2014-01-02 09:31:02   \n",
+       "4   5        6792  2010-07-19 19:03:57         Shane  2014-08-13 00:23:47   \n",
+       "\n",
+       "                       WebsiteUrl            Location  \\\n",
+       "0  http://meta.stackexchange.com/  on the server farm   \n",
+       "1        http://stackoverflow.com       Corvallis, OR   \n",
+       "2        http://stackoverflow.com        New York, NY   \n",
+       "3    http://minesweeperonline.com   San Francisco, CA   \n",
+       "4         http://www.statalgo.com        New York, NY   \n",
+       "\n",
+       "                                             AboutMe  Views  UpVotes  \\\n",
+       "0  <p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>...      0     5007   \n",
+       "1  <p>Developer on the StackOverflow team.  Find ...     25        3   \n",
+       "2  <p><a href=\"http://blog.stackoverflow.com/2009...     22       19   \n",
+       "3  <p>currently at a startup in SF</p>\\r\\n\\r\\n<p>...     11        0   \n",
+       "4  <p>Quantitative researcher focusing on statist...   1145      662   \n",
+       "\n",
+       "   DownVotes  AccountId   Age                     ProfileImageUrl  \n",
+       "0       1920         -1   NaN                                 NaN  \n",
+       "1          0          2  37.0                                 NaN  \n",
+       "2          0          3  35.0                                 NaN  \n",
+       "3          0       1998  28.0  http://i.stack.imgur.com/d1oHX.jpg  \n",
+       "4          5      54503  35.0                                 NaN  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "users_table=pd.read_csv(\"users_table.csv\")\n",
+    "users_table.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 3. Rename Id column to userId"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>userId</th>\n",
+       "      <th>Reputation</th>\n",
+       "      <th>CreationDate</th>\n",
+       "      <th>DisplayName</th>\n",
+       "      <th>LastAccessDate</th>\n",
+       "      <th>WebsiteUrl</th>\n",
+       "      <th>Location</th>\n",
+       "      <th>AboutMe</th>\n",
+       "      <th>Views</th>\n",
+       "      <th>UpVotes</th>\n",
+       "      <th>DownVotes</th>\n",
+       "      <th>AccountId</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>ProfileImageUrl</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2010-07-19 06:55:26</td>\n",
+       "      <td>Community</td>\n",
+       "      <td>2010-07-19 06:55:26</td>\n",
+       "      <td>http://meta.stackexchange.com/</td>\n",
+       "      <td>on the server farm</td>\n",
+       "      <td>&lt;p&gt;Hi, I'm not really a person.&lt;/p&gt;\\r\\n\\r\\n&lt;p&gt;...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5007</td>\n",
+       "      <td>1920</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2010-07-19 14:01:36</td>\n",
+       "      <td>Geoff Dalgas</td>\n",
+       "      <td>2013-11-12 22:07:23</td>\n",
+       "      <td>http://stackoverflow.com</td>\n",
+       "      <td>Corvallis, OR</td>\n",
+       "      <td>&lt;p&gt;Developer on the StackOverflow team.  Find ...</td>\n",
+       "      <td>25</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>37.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2010-07-19 15:34:50</td>\n",
+       "      <td>Jarrod Dixon</td>\n",
+       "      <td>2014-08-08 06:42:58</td>\n",
+       "      <td>http://stackoverflow.com</td>\n",
+       "      <td>New York, NY</td>\n",
+       "      <td>&lt;p&gt;&lt;a href=\"http://blog.stackoverflow.com/2009...</td>\n",
+       "      <td>22</td>\n",
+       "      <td>19</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2010-07-19 19:03:27</td>\n",
+       "      <td>Emmett</td>\n",
+       "      <td>2014-01-02 09:31:02</td>\n",
+       "      <td>http://minesweeperonline.com</td>\n",
+       "      <td>San Francisco, CA</td>\n",
+       "      <td>&lt;p&gt;currently at a startup in SF&lt;/p&gt;\\r\\n\\r\\n&lt;p&gt;...</td>\n",
+       "      <td>11</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1998</td>\n",
+       "      <td>28.0</td>\n",
+       "      <td>http://i.stack.imgur.com/d1oHX.jpg</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>6792</td>\n",
+       "      <td>2010-07-19 19:03:57</td>\n",
+       "      <td>Shane</td>\n",
+       "      <td>2014-08-13 00:23:47</td>\n",
+       "      <td>http://www.statalgo.com</td>\n",
+       "      <td>New York, NY</td>\n",
+       "      <td>&lt;p&gt;Quantitative researcher focusing on statist...</td>\n",
+       "      <td>1145</td>\n",
+       "      <td>662</td>\n",
+       "      <td>5</td>\n",
+       "      <td>54503</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40320</th>\n",
+       "      <td>55743</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2014-09-13 21:03:50</td>\n",
+       "      <td>AussieMeg</td>\n",
+       "      <td>2014-09-13 21:18:52</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5026902</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>http://graph.facebook.com/665821703/picture?ty...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40321</th>\n",
+       "      <td>55744</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2014-09-13 21:39:30</td>\n",
+       "      <td>Mia Maria</td>\n",
+       "      <td>2014-09-13 21:39:30</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5026998</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40322</th>\n",
+       "      <td>55745</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2014-09-13 23:45:27</td>\n",
+       "      <td>tronbabylove</td>\n",
+       "      <td>2014-09-13 23:45:27</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>481766</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>https://www.gravatar.com/avatar/faa7a3fdbd8308...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40323</th>\n",
+       "      <td>55746</td>\n",
+       "      <td>106</td>\n",
+       "      <td>2014-09-14 00:29:41</td>\n",
+       "      <td>GPP</td>\n",
+       "      <td>2014-09-14 02:05:17</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;p&gt;Stats noobie, product, marketing &amp;amp; medi...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>976289</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>https://www.gravatar.com/avatar/6d9e9fa6b783a3...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40324</th>\n",
+       "      <td>55747</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2014-09-14 01:01:44</td>\n",
+       "      <td>Shivam Agrawal</td>\n",
+       "      <td>2014-09-14 01:19:04</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>India</td>\n",
+       "      <td>&lt;p&gt;Maths Enthusiast &lt;/p&gt;\\r\\n</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5027354</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>https://lh4.googleusercontent.com/-ZsXhwVaFmiY...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>40325 rows × 14 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       userId  Reputation         CreationDate     DisplayName  \\\n",
+       "0          -1           1  2010-07-19 06:55:26       Community   \n",
+       "1           2         101  2010-07-19 14:01:36    Geoff Dalgas   \n",
+       "2           3         101  2010-07-19 15:34:50    Jarrod Dixon   \n",
+       "3           4         101  2010-07-19 19:03:27          Emmett   \n",
+       "4           5        6792  2010-07-19 19:03:57           Shane   \n",
+       "...       ...         ...                  ...             ...   \n",
+       "40320   55743           1  2014-09-13 21:03:50       AussieMeg   \n",
+       "40321   55744           6  2014-09-13 21:39:30       Mia Maria   \n",
+       "40322   55745         101  2014-09-13 23:45:27    tronbabylove   \n",
+       "40323   55746         106  2014-09-14 00:29:41             GPP   \n",
+       "40324   55747           1  2014-09-14 01:01:44  Shivam Agrawal   \n",
+       "\n",
+       "            LastAccessDate                      WebsiteUrl  \\\n",
+       "0      2010-07-19 06:55:26  http://meta.stackexchange.com/   \n",
+       "1      2013-11-12 22:07:23        http://stackoverflow.com   \n",
+       "2      2014-08-08 06:42:58        http://stackoverflow.com   \n",
+       "3      2014-01-02 09:31:02    http://minesweeperonline.com   \n",
+       "4      2014-08-13 00:23:47         http://www.statalgo.com   \n",
+       "...                    ...                             ...   \n",
+       "40320  2014-09-13 21:18:52                             NaN   \n",
+       "40321  2014-09-13 21:39:30                             NaN   \n",
+       "40322  2014-09-13 23:45:27                             NaN   \n",
+       "40323  2014-09-14 02:05:17                             NaN   \n",
+       "40324  2014-09-14 01:19:04                             NaN   \n",
+       "\n",
+       "                 Location                                            AboutMe  \\\n",
+       "0      on the server farm  <p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>...   \n",
+       "1           Corvallis, OR  <p>Developer on the StackOverflow team.  Find ...   \n",
+       "2            New York, NY  <p><a href=\"http://blog.stackoverflow.com/2009...   \n",
+       "3       San Francisco, CA  <p>currently at a startup in SF</p>\\r\\n\\r\\n<p>...   \n",
+       "4            New York, NY  <p>Quantitative researcher focusing on statist...   \n",
+       "...                   ...                                                ...   \n",
+       "40320                 NaN                                                NaN   \n",
+       "40321                 NaN                                                NaN   \n",
+       "40322       United States                                                NaN   \n",
+       "40323                 NaN  <p>Stats noobie, product, marketing &amp; medi...   \n",
+       "40324               India                       <p>Maths Enthusiast </p>\\r\\n   \n",
+       "\n",
+       "       Views  UpVotes  DownVotes  AccountId   Age  \\\n",
+       "0          0     5007       1920         -1   NaN   \n",
+       "1         25        3          0          2  37.0   \n",
+       "2         22       19          0          3  35.0   \n",
+       "3         11        0          0       1998  28.0   \n",
+       "4       1145      662          5      54503  35.0   \n",
+       "...      ...      ...        ...        ...   ...   \n",
+       "40320      0        0          0    5026902   NaN   \n",
+       "40321      1        0          0    5026998   NaN   \n",
+       "40322      0        0          0     481766   NaN   \n",
+       "40323      1        0          0     976289   NaN   \n",
+       "40324      0        0          0    5027354   NaN   \n",
+       "\n",
+       "                                         ProfileImageUrl  \n",
+       "0                                                    NaN  \n",
+       "1                                                    NaN  \n",
+       "2                                                    NaN  \n",
+       "3                     http://i.stack.imgur.com/d1oHX.jpg  \n",
+       "4                                                    NaN  \n",
+       "...                                                  ...  \n",
+       "40320  http://graph.facebook.com/665821703/picture?ty...  \n",
+       "40321                                                NaN  \n",
+       "40322  https://www.gravatar.com/avatar/faa7a3fdbd8308...  \n",
+       "40323  https://www.gravatar.com/avatar/6d9e9fa6b783a3...  \n",
+       "40324  https://lh4.googleusercontent.com/-ZsXhwVaFmiY...  \n",
+       "\n",
+       "[40325 rows x 14 columns]"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "users_table=users_table.rename(columns={\"Id\":\"userId\"})\n",
+    "users_table"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 4. Import posts table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Id</th>\n",
+       "      <th>PostTypeId</th>\n",
+       "      <th>AcceptedAnswerId</th>\n",
+       "      <th>CreaionDate</th>\n",
+       "      <th>Score</th>\n",
+       "      <th>ViewCount</th>\n",
+       "      <th>Body</th>\n",
+       "      <th>OwnerUserId</th>\n",
+       "      <th>LasActivityDate</th>\n",
+       "      <th>Title</th>\n",
+       "      <th>...</th>\n",
+       "      <th>AnswerCount</th>\n",
+       "      <th>CommentCount</th>\n",
+       "      <th>FavoriteCount</th>\n",
+       "      <th>LastEditorUserId</th>\n",
+       "      <th>LastEditDate</th>\n",
+       "      <th>CommunityOwnedDate</th>\n",
+       "      <th>ParentId</th>\n",
+       "      <th>ClosedDate</th>\n",
+       "      <th>OwnerDisplayName</th>\n",
+       "      <th>LastEditorDisplayName</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>2010-07-19 19:12:12</td>\n",
+       "      <td>23</td>\n",
+       "      <td>1278.0</td>\n",
+       "      <td>&lt;p&gt;How should I elicit prior distributions fro...</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>2010-09-15 21:08:26</td>\n",
+       "      <td>Eliciting priors from experts</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>1 rows × 21 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Id  PostTypeId  AcceptedAnswerId          CreaionDate  Score  ViewCount  \\\n",
+       "0   1           1              15.0  2010-07-19 19:12:12     23     1278.0   \n",
+       "\n",
+       "                                                Body  OwnerUserId  \\\n",
+       "0  <p>How should I elicit prior distributions fro...          8.0   \n",
+       "\n",
+       "       LasActivityDate                          Title  ... AnswerCount  \\\n",
+       "0  2010-09-15 21:08:26  Eliciting priors from experts  ...         5.0   \n",
+       "\n",
+       "   CommentCount  FavoriteCount  LastEditorUserId  LastEditDate  \\\n",
+       "0             1           14.0               NaN           NaN   \n",
+       "\n",
+       "  CommunityOwnedDate ParentId  ClosedDate OwnerDisplayName  \\\n",
+       "0                NaN      NaN         NaN              NaN   \n",
+       "\n",
+       "  LastEditorDisplayName  \n",
+       "0                   NaN  \n",
+       "\n",
+       "[1 rows x 21 columns]"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "posts_table=pd.read_csv(\"posts_table.csv\")\n",
+    "posts_table.head(1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 5. Rename Id column to postId and OwnerUserId to userId"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>postId</th>\n",
+       "      <th>PostTypeId</th>\n",
+       "      <th>AcceptedAnswerId</th>\n",
+       "      <th>CreaionDate</th>\n",
+       "      <th>Score</th>\n",
+       "      <th>ViewCount</th>\n",
+       "      <th>Body</th>\n",
+       "      <th>userId</th>\n",
+       "      <th>LasActivityDate</th>\n",
+       "      <th>Title</th>\n",
+       "      <th>...</th>\n",
+       "      <th>AnswerCount</th>\n",
+       "      <th>CommentCount</th>\n",
+       "      <th>FavoriteCount</th>\n",
+       "      <th>LastEditorUserId</th>\n",
+       "      <th>LastEditDate</th>\n",
+       "      <th>CommunityOwnedDate</th>\n",
+       "      <th>ParentId</th>\n",
+       "      <th>ClosedDate</th>\n",
+       "      <th>OwnerDisplayName</th>\n",
+       "      <th>LastEditorDisplayName</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>2010-07-19 19:12:12</td>\n",
+       "      <td>23</td>\n",
+       "      <td>1278.0</td>\n",
+       "      <td>&lt;p&gt;How should I elicit prior distributions fro...</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>2010-09-15 21:08:26</td>\n",
+       "      <td>Eliciting priors from experts</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>2010-07-19 19:12:57</td>\n",
+       "      <td>22</td>\n",
+       "      <td>8198.0</td>\n",
+       "      <td>&lt;p&gt;In many different statistical methods there...</td>\n",
+       "      <td>24.0</td>\n",
+       "      <td>2012-11-12 09:21:54</td>\n",
+       "      <td>What is normality?</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>88.0</td>\n",
+       "      <td>2010-08-07 17:56:44</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>2010-07-19 19:13:28</td>\n",
+       "      <td>54</td>\n",
+       "      <td>3613.0</td>\n",
+       "      <td>&lt;p&gt;What are some valuable Statistical Analysis...</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>2013-05-27 14:48:36</td>\n",
+       "      <td>What are some valuable Statistical Analysis op...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>183.0</td>\n",
+       "      <td>2011-02-12 05:50:03</td>\n",
+       "      <td>2010-07-19 19:13:28</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>135.0</td>\n",
+       "      <td>2010-07-19 19:13:31</td>\n",
+       "      <td>13</td>\n",
+       "      <td>5224.0</td>\n",
+       "      <td>&lt;p&gt;I have two groups of data.  Each with a dif...</td>\n",
+       "      <td>23.0</td>\n",
+       "      <td>2010-09-08 03:00:19</td>\n",
+       "      <td>Assessing the significance of differences in d...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>2</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2010-07-19 19:14:43</td>\n",
+       "      <td>81</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;p&gt;The R-project&lt;/p&gt;\\n\\n&lt;p&gt;&lt;a href=\"http://www...</td>\n",
+       "      <td>23.0</td>\n",
+       "      <td>2010-07-19 19:21:15</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>23.0</td>\n",
+       "      <td>2010-07-19 19:21:15</td>\n",
+       "      <td>2010-07-19 19:14:43</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 21 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   postId  PostTypeId  AcceptedAnswerId          CreaionDate  Score  \\\n",
+       "0       1           1              15.0  2010-07-19 19:12:12     23   \n",
+       "1       2           1              59.0  2010-07-19 19:12:57     22   \n",
+       "2       3           1               5.0  2010-07-19 19:13:28     54   \n",
+       "3       4           1             135.0  2010-07-19 19:13:31     13   \n",
+       "4       5           2               NaN  2010-07-19 19:14:43     81   \n",
+       "\n",
+       "   ViewCount                                               Body  userId  \\\n",
+       "0     1278.0  <p>How should I elicit prior distributions fro...     8.0   \n",
+       "1     8198.0  <p>In many different statistical methods there...    24.0   \n",
+       "2     3613.0  <p>What are some valuable Statistical Analysis...    18.0   \n",
+       "3     5224.0  <p>I have two groups of data.  Each with a dif...    23.0   \n",
+       "4        NaN  <p>The R-project</p>\\n\\n<p><a href=\"http://www...    23.0   \n",
+       "\n",
+       "       LasActivityDate                                              Title  \\\n",
+       "0  2010-09-15 21:08:26                      Eliciting priors from experts   \n",
+       "1  2012-11-12 09:21:54                                 What is normality?   \n",
+       "2  2013-05-27 14:48:36  What are some valuable Statistical Analysis op...   \n",
+       "3  2010-09-08 03:00:19  Assessing the significance of differences in d...   \n",
+       "4  2010-07-19 19:21:15                                                NaN   \n",
+       "\n",
+       "   ... AnswerCount  CommentCount  FavoriteCount  LastEditorUserId  \\\n",
+       "0  ...         5.0             1           14.0               NaN   \n",
+       "1  ...         7.0             1            8.0              88.0   \n",
+       "2  ...        19.0             4           36.0             183.0   \n",
+       "3  ...         5.0             2            2.0               NaN   \n",
+       "4  ...         NaN             3            NaN              23.0   \n",
+       "\n",
+       "          LastEditDate   CommunityOwnedDate ParentId  ClosedDate  \\\n",
+       "0                  NaN                  NaN      NaN         NaN   \n",
+       "1  2010-08-07 17:56:44                  NaN      NaN         NaN   \n",
+       "2  2011-02-12 05:50:03  2010-07-19 19:13:28      NaN         NaN   \n",
+       "3                  NaN                  NaN      NaN         NaN   \n",
+       "4  2010-07-19 19:21:15  2010-07-19 19:14:43      3.0         NaN   \n",
+       "\n",
+       "  OwnerDisplayName LastEditorDisplayName  \n",
+       "0              NaN                   NaN  \n",
+       "1              NaN                   NaN  \n",
+       "2              NaN                   NaN  \n",
+       "3              NaN                   NaN  \n",
+       "4              NaN                   NaN  \n",
+       "\n",
+       "[5 rows x 21 columns]"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "posts_table=posts_table.rename(columns={\"Id\":\"postId\",\"OwnerUserId\":\"userId\"})\n",
+    "posts_table.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 6. Define new dataframes for users and posts with the following selected columns:\n",
+    "    **users columns**: userId, Reputation,Views,UpVotes,DownVotes\n",
+    "    **posts columns**: postId, Score,userId,ViewCount,CommentCount"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   userId  Reputation  Views  UpVotes  DownVotes\n",
+      "0      -1           1      0     5007       1920\n",
+      "1       2         101     25        3          0\n",
+      "2       3         101     22       19          0\n",
+      "3       4         101     11        0          0\n",
+      "4       5        6792   1145      662          5\n",
+      "   postId  Score  userId  ViewCount  CommentCount\n",
+      "0       1     23     8.0     1278.0             1\n",
+      "1       2     22    24.0     8198.0             1\n",
+      "2       3     54    18.0     3613.0             4\n",
+      "3       4     13    23.0     5224.0             2\n",
+      "4       5     81    23.0        NaN             3\n"
+     ]
+    }
+   ],
+   "source": [
+    "users_new=users_table[[\"userId\",\"Reputation\",\"Views\",\"UpVotes\",\"DownVotes\"]].copy()\n",
+    "print(users_new.head())\n",
+    "posts_new=posts_table[[\"postId\",\"Score\",\"userId\",\"ViewCount\",\"CommentCount\"]].copy()\n",
+    "print(posts_new.head())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 7. Merge both dataframes, users and posts. \n",
+    "You will need to make a [merge](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html) of posts and users dataframes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(40325, 5) 40325\n",
+      "(40000, 5) 8138\n",
+      "(72187, 9)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>userId</th>\n",
+       "      <th>Reputation</th>\n",
+       "      <th>Views</th>\n",
+       "      <th>UpVotes</th>\n",
+       "      <th>DownVotes</th>\n",
+       "      <th>postId</th>\n",
+       "      <th>Score</th>\n",
+       "      <th>ViewCount</th>\n",
+       "      <th>CommentCount</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5007.0</td>\n",
+       "      <td>1920.0</td>\n",
+       "      <td>2175.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5007.0</td>\n",
+       "      <td>1920.0</td>\n",
+       "      <td>8576.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5007.0</td>\n",
+       "      <td>1920.0</td>\n",
+       "      <td>8578.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5007.0</td>\n",
+       "      <td>1920.0</td>\n",
+       "      <td>8981.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5007.0</td>\n",
+       "      <td>1920.0</td>\n",
+       "      <td>8982.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72182</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>46117.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>355.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72183</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>46260.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>145.0</td>\n",
+       "      <td>5.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72184</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>46836.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>406.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72185</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>46892.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72186</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>47308.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>129.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>72187 rows × 9 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       userId  Reputation  Views  UpVotes  DownVotes   postId  Score  \\\n",
+       "0        -1.0         1.0    0.0   5007.0     1920.0   2175.0    0.0   \n",
+       "1        -1.0         1.0    0.0   5007.0     1920.0   8576.0    0.0   \n",
+       "2        -1.0         1.0    0.0   5007.0     1920.0   8578.0    0.0   \n",
+       "3        -1.0         1.0    0.0   5007.0     1920.0   8981.0    0.0   \n",
+       "4        -1.0         1.0    0.0   5007.0     1920.0   8982.0    0.0   \n",
+       "...       ...         ...    ...      ...        ...      ...    ...   \n",
+       "72182     NaN         NaN    NaN      NaN        NaN  46117.0    0.0   \n",
+       "72183     NaN         NaN    NaN      NaN        NaN  46260.0    5.0   \n",
+       "72184     NaN         NaN    NaN      NaN        NaN  46836.0    3.0   \n",
+       "72185     NaN         NaN    NaN      NaN        NaN  46892.0    1.0   \n",
+       "72186     NaN         NaN    NaN      NaN        NaN  47308.0    2.0   \n",
+       "\n",
+       "       ViewCount  CommentCount  \n",
+       "0            NaN           0.0  \n",
+       "1            NaN           0.0  \n",
+       "2            NaN           0.0  \n",
+       "3            NaN           0.0  \n",
+       "4            NaN           0.0  \n",
+       "...          ...           ...  \n",
+       "72182      355.0           0.0  \n",
+       "72183      145.0           5.0  \n",
+       "72184      406.0           0.0  \n",
+       "72185        NaN           0.0  \n",
+       "72186      129.0           0.0  \n",
+       "\n",
+       "[72187 rows x 9 columns]"
+      ]
+     },
+     "execution_count": 66,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a=users_new.shape\n",
+    "a1=users_new[\"userId\"].nunique()\n",
+    "b=posts_new.shape\n",
+    "b1=posts_new[\"userId\"].nunique() # get the number of unique value almost double numbers of rows, which is weird\n",
+    "print(a,a1)\n",
+    "print(b,b1)\n",
+    "data_merge=users_new.merge(posts_new,how=\"outer\",on=\"userId\")  \n",
+    "\n",
+    "print(data_merge.shape)\n",
+    "data_merge"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 8. How many missing values do you have in your merged dataframe? On which columns?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "userId           1038\n",
+       "Reputation       1038\n",
+       "Views            1038\n",
+       "UpVotes          1038\n",
+       "DownVotes        1038\n",
+       "postId          32187\n",
+       "Score           32187\n",
+       "ViewCount       56292\n",
+       "CommentCount    32187\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_merge.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 9. You will need to make something with missing values.  Will you clean or filling them? Explain. \n",
+    "**Remember** to check the results of your code before passing to the next step"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(71149, 9)\n",
+      "   userId  Reputation  Views  UpVotes  DownVotes  postId  Score  ViewCount  \\\n",
+      "0    -1.0         1.0    0.0   5007.0     1920.0  2175.0    0.0        NaN   \n",
+      "1    -1.0         1.0    0.0   5007.0     1920.0  8576.0    0.0        NaN   \n",
+      "2    -1.0         1.0    0.0   5007.0     1920.0  8578.0    0.0        NaN   \n",
+      "3    -1.0         1.0    0.0   5007.0     1920.0  8981.0    0.0        NaN   \n",
+      "4    -1.0         1.0    0.0   5007.0     1920.0  8982.0    0.0        NaN   \n",
+      "\n",
+      "   CommentCount  \n",
+      "0           0.0  \n",
+      "1           0.0  \n",
+      "2           0.0  \n",
+      "3           0.0  \n",
+      "4           0.0  \n",
+      "(58584, 7)\n",
+      "     userId  Reputation   Views  UpVotes  DownVotes  Score  CommentCount\n",
+      "0      -1.0         1.0     0.0   5007.0     1920.0    0.0           0.0\n",
+      "123     2.0       101.0    25.0      3.0        0.0    NaN           NaN\n",
+      "124     3.0       101.0    22.0     19.0        0.0    NaN           NaN\n",
+      "125     4.0       101.0    11.0      0.0        0.0    NaN           NaN\n",
+      "126     5.0      6792.0  1145.0    662.0        5.0  152.0           5.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Drop rows with missing userID, without userId the data is not valid\n",
+    "data_validId =data_merge.dropna(axis=0, subset=['userId'])\n",
+    "print(data_validId.shape)\n",
+    "print(data_validId.head())\n",
+    "#PostId is not accurate with one user has many different post ID, and half of the table missing postID -> remove postID\n",
+    "#Remove ViewCount because most of info is missing ( 56292 out of 70K row)\n",
+    "data_validId2=data_validId.drop([\"postId\",\"ViewCount\"],axis=1)\n",
+    "data_validId2=data_validId2.drop_duplicates()\n",
+    "print(data_validId2.shape)\n",
+    "print(data_validId2.head())\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 10. Adjust the data types in order to avoid future issues. Which ones should be changed? "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "    userId Reputation   Views  UpVotes  DownVotes  Score  CommentCount\n",
+      "0     -1.0        1.0     0.0   5007.0     1920.0    0.0           0.0\n",
+      "123    2.0      101.0    25.0      3.0        0.0    NaN           NaN\n",
+      "124    3.0      101.0    22.0     19.0        0.0    NaN           NaN\n",
+      "125    4.0      101.0    11.0      0.0        0.0    NaN           NaN\n",
+      "126    5.0     6792.0  1145.0    662.0        5.0  152.0           5.0\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "userId           object\n",
+       "Reputation       object\n",
+       "Views           float64\n",
+       "UpVotes         float64\n",
+       "DownVotes       float64\n",
+       "Score           float64\n",
+       "CommentCount    float64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 94,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#userID, reputation should be string\n",
+    "convert_str = {'userId': str,'Reputation': str}\n",
+    "data_validId2 = data_validId2.astype(convert_str)\n",
+    "print(data_validId2.head())\n",
+    "data_validId2.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/your-code/.posts_table.csv.icloud b/your-code/.posts_table.csv.icloud
new file mode 100644
index 0000000..06e51c5
Binary files /dev/null and b/your-code/.posts_table.csv.icloud differ
diff --git a/your-code/.users_table.csv.icloud b/your-code/.users_table.csv.icloud
new file mode 100644
index 0000000..a7132c5
Binary files /dev/null and b/your-code/.users_table.csv.icloud differ
diff --git a/your-code/datasets.rar b/your-code/datasets.rar
deleted file mode 100644
index db8661c..0000000
Binary files a/your-code/datasets.rar and /dev/null differ
diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 7900997..4a983fc 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -1,169 +1,1324 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 1. Import pandas library"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 2. Import users table:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 3. Rename Id column to userId"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 4. Import posts table:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 5. Rename Id column to postId and OwnerUserId to userId"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 6. Define new dataframes for users and posts with the following selected columns:\n",
-    "    **users columns**: userId, Reputation,Views,UpVotes,DownVotes\n",
-    "    **posts columns**: postId, Score,userId,ViewCount,CommentCount"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 7. Merge both dataframes, users and posts. \n",
-    "You will need to make a [merge](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html) of posts and users dataframes."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 8. How many missing values do you have in your merged dataframe? On which columns?"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 9. You will need to make something with missing values.  Will you clean or filling them? Explain. \n",
-    "**Remember** to check the results of your code before passing to the next step"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### 10. Adjust the data types in order to avoid future issues. Which ones should be changed? "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 1. Import pandas library"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 2. Import users table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Id</th>\n",
+       "      <th>Reputation</th>\n",
+       "      <th>CreationDate</th>\n",
+       "      <th>DisplayName</th>\n",
+       "      <th>LastAccessDate</th>\n",
+       "      <th>WebsiteUrl</th>\n",
+       "      <th>Location</th>\n",
+       "      <th>AboutMe</th>\n",
+       "      <th>Views</th>\n",
+       "      <th>UpVotes</th>\n",
+       "      <th>DownVotes</th>\n",
+       "      <th>AccountId</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>ProfileImageUrl</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2010-07-19 06:55:26</td>\n",
+       "      <td>Community</td>\n",
+       "      <td>2010-07-19 06:55:26</td>\n",
+       "      <td>http://meta.stackexchange.com/</td>\n",
+       "      <td>on the server farm</td>\n",
+       "      <td>&lt;p&gt;Hi, I'm not really a person.&lt;/p&gt;\\r\\n\\r\\n&lt;p&gt;...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5007</td>\n",
+       "      <td>1920</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2010-07-19 14:01:36</td>\n",
+       "      <td>Geoff Dalgas</td>\n",
+       "      <td>2013-11-12 22:07:23</td>\n",
+       "      <td>http://stackoverflow.com</td>\n",
+       "      <td>Corvallis, OR</td>\n",
+       "      <td>&lt;p&gt;Developer on the StackOverflow team.  Find ...</td>\n",
+       "      <td>25</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>37.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2010-07-19 15:34:50</td>\n",
+       "      <td>Jarrod Dixon</td>\n",
+       "      <td>2014-08-08 06:42:58</td>\n",
+       "      <td>http://stackoverflow.com</td>\n",
+       "      <td>New York, NY</td>\n",
+       "      <td>&lt;p&gt;&lt;a href=\"http://blog.stackoverflow.com/2009...</td>\n",
+       "      <td>22</td>\n",
+       "      <td>19</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2010-07-19 19:03:27</td>\n",
+       "      <td>Emmett</td>\n",
+       "      <td>2014-01-02 09:31:02</td>\n",
+       "      <td>http://minesweeperonline.com</td>\n",
+       "      <td>San Francisco, CA</td>\n",
+       "      <td>&lt;p&gt;currently at a startup in SF&lt;/p&gt;\\r\\n\\r\\n&lt;p&gt;...</td>\n",
+       "      <td>11</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1998</td>\n",
+       "      <td>28.0</td>\n",
+       "      <td>http://i.stack.imgur.com/d1oHX.jpg</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>6792</td>\n",
+       "      <td>2010-07-19 19:03:57</td>\n",
+       "      <td>Shane</td>\n",
+       "      <td>2014-08-13 00:23:47</td>\n",
+       "      <td>http://www.statalgo.com</td>\n",
+       "      <td>New York, NY</td>\n",
+       "      <td>&lt;p&gt;Quantitative researcher focusing on statist...</td>\n",
+       "      <td>1145</td>\n",
+       "      <td>662</td>\n",
+       "      <td>5</td>\n",
+       "      <td>54503</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Id  Reputation         CreationDate   DisplayName       LastAccessDate  \\\n",
+       "0  -1           1  2010-07-19 06:55:26     Community  2010-07-19 06:55:26   \n",
+       "1   2         101  2010-07-19 14:01:36  Geoff Dalgas  2013-11-12 22:07:23   \n",
+       "2   3         101  2010-07-19 15:34:50  Jarrod Dixon  2014-08-08 06:42:58   \n",
+       "3   4         101  2010-07-19 19:03:27        Emmett  2014-01-02 09:31:02   \n",
+       "4   5        6792  2010-07-19 19:03:57         Shane  2014-08-13 00:23:47   \n",
+       "\n",
+       "                       WebsiteUrl            Location  \\\n",
+       "0  http://meta.stackexchange.com/  on the server farm   \n",
+       "1        http://stackoverflow.com       Corvallis, OR   \n",
+       "2        http://stackoverflow.com        New York, NY   \n",
+       "3    http://minesweeperonline.com   San Francisco, CA   \n",
+       "4         http://www.statalgo.com        New York, NY   \n",
+       "\n",
+       "                                             AboutMe  Views  UpVotes  \\\n",
+       "0  <p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>...      0     5007   \n",
+       "1  <p>Developer on the StackOverflow team.  Find ...     25        3   \n",
+       "2  <p><a href=\"http://blog.stackoverflow.com/2009...     22       19   \n",
+       "3  <p>currently at a startup in SF</p>\\r\\n\\r\\n<p>...     11        0   \n",
+       "4  <p>Quantitative researcher focusing on statist...   1145      662   \n",
+       "\n",
+       "   DownVotes  AccountId   Age                     ProfileImageUrl  \n",
+       "0       1920         -1   NaN                                 NaN  \n",
+       "1          0          2  37.0                                 NaN  \n",
+       "2          0          3  35.0                                 NaN  \n",
+       "3          0       1998  28.0  http://i.stack.imgur.com/d1oHX.jpg  \n",
+       "4          5      54503  35.0                                 NaN  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "users_table=pd.read_csv(\"users_table.csv\")\n",
+    "users_table.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 3. Rename Id column to userId"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>userId</th>\n",
+       "      <th>Reputation</th>\n",
+       "      <th>CreationDate</th>\n",
+       "      <th>DisplayName</th>\n",
+       "      <th>LastAccessDate</th>\n",
+       "      <th>WebsiteUrl</th>\n",
+       "      <th>Location</th>\n",
+       "      <th>AboutMe</th>\n",
+       "      <th>Views</th>\n",
+       "      <th>UpVotes</th>\n",
+       "      <th>DownVotes</th>\n",
+       "      <th>AccountId</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>ProfileImageUrl</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2010-07-19 06:55:26</td>\n",
+       "      <td>Community</td>\n",
+       "      <td>2010-07-19 06:55:26</td>\n",
+       "      <td>http://meta.stackexchange.com/</td>\n",
+       "      <td>on the server farm</td>\n",
+       "      <td>&lt;p&gt;Hi, I'm not really a person.&lt;/p&gt;\\r\\n\\r\\n&lt;p&gt;...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5007</td>\n",
+       "      <td>1920</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2010-07-19 14:01:36</td>\n",
+       "      <td>Geoff Dalgas</td>\n",
+       "      <td>2013-11-12 22:07:23</td>\n",
+       "      <td>http://stackoverflow.com</td>\n",
+       "      <td>Corvallis, OR</td>\n",
+       "      <td>&lt;p&gt;Developer on the StackOverflow team.  Find ...</td>\n",
+       "      <td>25</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>37.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2010-07-19 15:34:50</td>\n",
+       "      <td>Jarrod Dixon</td>\n",
+       "      <td>2014-08-08 06:42:58</td>\n",
+       "      <td>http://stackoverflow.com</td>\n",
+       "      <td>New York, NY</td>\n",
+       "      <td>&lt;p&gt;&lt;a href=\"http://blog.stackoverflow.com/2009...</td>\n",
+       "      <td>22</td>\n",
+       "      <td>19</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2010-07-19 19:03:27</td>\n",
+       "      <td>Emmett</td>\n",
+       "      <td>2014-01-02 09:31:02</td>\n",
+       "      <td>http://minesweeperonline.com</td>\n",
+       "      <td>San Francisco, CA</td>\n",
+       "      <td>&lt;p&gt;currently at a startup in SF&lt;/p&gt;\\r\\n\\r\\n&lt;p&gt;...</td>\n",
+       "      <td>11</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1998</td>\n",
+       "      <td>28.0</td>\n",
+       "      <td>http://i.stack.imgur.com/d1oHX.jpg</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>6792</td>\n",
+       "      <td>2010-07-19 19:03:57</td>\n",
+       "      <td>Shane</td>\n",
+       "      <td>2014-08-13 00:23:47</td>\n",
+       "      <td>http://www.statalgo.com</td>\n",
+       "      <td>New York, NY</td>\n",
+       "      <td>&lt;p&gt;Quantitative researcher focusing on statist...</td>\n",
+       "      <td>1145</td>\n",
+       "      <td>662</td>\n",
+       "      <td>5</td>\n",
+       "      <td>54503</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40320</th>\n",
+       "      <td>55743</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2014-09-13 21:03:50</td>\n",
+       "      <td>AussieMeg</td>\n",
+       "      <td>2014-09-13 21:18:52</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5026902</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>http://graph.facebook.com/665821703/picture?ty...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40321</th>\n",
+       "      <td>55744</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2014-09-13 21:39:30</td>\n",
+       "      <td>Mia Maria</td>\n",
+       "      <td>2014-09-13 21:39:30</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5026998</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40322</th>\n",
+       "      <td>55745</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2014-09-13 23:45:27</td>\n",
+       "      <td>tronbabylove</td>\n",
+       "      <td>2014-09-13 23:45:27</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>481766</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>https://www.gravatar.com/avatar/faa7a3fdbd8308...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40323</th>\n",
+       "      <td>55746</td>\n",
+       "      <td>106</td>\n",
+       "      <td>2014-09-14 00:29:41</td>\n",
+       "      <td>GPP</td>\n",
+       "      <td>2014-09-14 02:05:17</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;p&gt;Stats noobie, product, marketing &amp;amp; medi...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>976289</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>https://www.gravatar.com/avatar/6d9e9fa6b783a3...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>40324</th>\n",
+       "      <td>55747</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2014-09-14 01:01:44</td>\n",
+       "      <td>Shivam Agrawal</td>\n",
+       "      <td>2014-09-14 01:19:04</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>India</td>\n",
+       "      <td>&lt;p&gt;Maths Enthusiast &lt;/p&gt;\\r\\n</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5027354</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>https://lh4.googleusercontent.com/-ZsXhwVaFmiY...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>40325 rows × 14 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       userId  Reputation         CreationDate     DisplayName  \\\n",
+       "0          -1           1  2010-07-19 06:55:26       Community   \n",
+       "1           2         101  2010-07-19 14:01:36    Geoff Dalgas   \n",
+       "2           3         101  2010-07-19 15:34:50    Jarrod Dixon   \n",
+       "3           4         101  2010-07-19 19:03:27          Emmett   \n",
+       "4           5        6792  2010-07-19 19:03:57           Shane   \n",
+       "...       ...         ...                  ...             ...   \n",
+       "40320   55743           1  2014-09-13 21:03:50       AussieMeg   \n",
+       "40321   55744           6  2014-09-13 21:39:30       Mia Maria   \n",
+       "40322   55745         101  2014-09-13 23:45:27    tronbabylove   \n",
+       "40323   55746         106  2014-09-14 00:29:41             GPP   \n",
+       "40324   55747           1  2014-09-14 01:01:44  Shivam Agrawal   \n",
+       "\n",
+       "            LastAccessDate                      WebsiteUrl  \\\n",
+       "0      2010-07-19 06:55:26  http://meta.stackexchange.com/   \n",
+       "1      2013-11-12 22:07:23        http://stackoverflow.com   \n",
+       "2      2014-08-08 06:42:58        http://stackoverflow.com   \n",
+       "3      2014-01-02 09:31:02    http://minesweeperonline.com   \n",
+       "4      2014-08-13 00:23:47         http://www.statalgo.com   \n",
+       "...                    ...                             ...   \n",
+       "40320  2014-09-13 21:18:52                             NaN   \n",
+       "40321  2014-09-13 21:39:30                             NaN   \n",
+       "40322  2014-09-13 23:45:27                             NaN   \n",
+       "40323  2014-09-14 02:05:17                             NaN   \n",
+       "40324  2014-09-14 01:19:04                             NaN   \n",
+       "\n",
+       "                 Location                                            AboutMe  \\\n",
+       "0      on the server farm  <p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>...   \n",
+       "1           Corvallis, OR  <p>Developer on the StackOverflow team.  Find ...   \n",
+       "2            New York, NY  <p><a href=\"http://blog.stackoverflow.com/2009...   \n",
+       "3       San Francisco, CA  <p>currently at a startup in SF</p>\\r\\n\\r\\n<p>...   \n",
+       "4            New York, NY  <p>Quantitative researcher focusing on statist...   \n",
+       "...                   ...                                                ...   \n",
+       "40320                 NaN                                                NaN   \n",
+       "40321                 NaN                                                NaN   \n",
+       "40322       United States                                                NaN   \n",
+       "40323                 NaN  <p>Stats noobie, product, marketing &amp; medi...   \n",
+       "40324               India                       <p>Maths Enthusiast </p>\\r\\n   \n",
+       "\n",
+       "       Views  UpVotes  DownVotes  AccountId   Age  \\\n",
+       "0          0     5007       1920         -1   NaN   \n",
+       "1         25        3          0          2  37.0   \n",
+       "2         22       19          0          3  35.0   \n",
+       "3         11        0          0       1998  28.0   \n",
+       "4       1145      662          5      54503  35.0   \n",
+       "...      ...      ...        ...        ...   ...   \n",
+       "40320      0        0          0    5026902   NaN   \n",
+       "40321      1        0          0    5026998   NaN   \n",
+       "40322      0        0          0     481766   NaN   \n",
+       "40323      1        0          0     976289   NaN   \n",
+       "40324      0        0          0    5027354   NaN   \n",
+       "\n",
+       "                                         ProfileImageUrl  \n",
+       "0                                                    NaN  \n",
+       "1                                                    NaN  \n",
+       "2                                                    NaN  \n",
+       "3                     http://i.stack.imgur.com/d1oHX.jpg  \n",
+       "4                                                    NaN  \n",
+       "...                                                  ...  \n",
+       "40320  http://graph.facebook.com/665821703/picture?ty...  \n",
+       "40321                                                NaN  \n",
+       "40322  https://www.gravatar.com/avatar/faa7a3fdbd8308...  \n",
+       "40323  https://www.gravatar.com/avatar/6d9e9fa6b783a3...  \n",
+       "40324  https://lh4.googleusercontent.com/-ZsXhwVaFmiY...  \n",
+       "\n",
+       "[40325 rows x 14 columns]"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "users_table=users_table.rename(columns={\"Id\":\"userId\"})\n",
+    "users_table"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 4. Import posts table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Id</th>\n",
+       "      <th>PostTypeId</th>\n",
+       "      <th>AcceptedAnswerId</th>\n",
+       "      <th>CreaionDate</th>\n",
+       "      <th>Score</th>\n",
+       "      <th>ViewCount</th>\n",
+       "      <th>Body</th>\n",
+       "      <th>OwnerUserId</th>\n",
+       "      <th>LasActivityDate</th>\n",
+       "      <th>Title</th>\n",
+       "      <th>...</th>\n",
+       "      <th>AnswerCount</th>\n",
+       "      <th>CommentCount</th>\n",
+       "      <th>FavoriteCount</th>\n",
+       "      <th>LastEditorUserId</th>\n",
+       "      <th>LastEditDate</th>\n",
+       "      <th>CommunityOwnedDate</th>\n",
+       "      <th>ParentId</th>\n",
+       "      <th>ClosedDate</th>\n",
+       "      <th>OwnerDisplayName</th>\n",
+       "      <th>LastEditorDisplayName</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>2010-07-19 19:12:12</td>\n",
+       "      <td>23</td>\n",
+       "      <td>1278.0</td>\n",
+       "      <td>&lt;p&gt;How should I elicit prior distributions fro...</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>2010-09-15 21:08:26</td>\n",
+       "      <td>Eliciting priors from experts</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>1 rows × 21 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Id  PostTypeId  AcceptedAnswerId          CreaionDate  Score  ViewCount  \\\n",
+       "0   1           1              15.0  2010-07-19 19:12:12     23     1278.0   \n",
+       "\n",
+       "                                                Body  OwnerUserId  \\\n",
+       "0  <p>How should I elicit prior distributions fro...          8.0   \n",
+       "\n",
+       "       LasActivityDate                          Title  ... AnswerCount  \\\n",
+       "0  2010-09-15 21:08:26  Eliciting priors from experts  ...         5.0   \n",
+       "\n",
+       "   CommentCount  FavoriteCount  LastEditorUserId  LastEditDate  \\\n",
+       "0             1           14.0               NaN           NaN   \n",
+       "\n",
+       "  CommunityOwnedDate ParentId  ClosedDate OwnerDisplayName  \\\n",
+       "0                NaN      NaN         NaN              NaN   \n",
+       "\n",
+       "  LastEditorDisplayName  \n",
+       "0                   NaN  \n",
+       "\n",
+       "[1 rows x 21 columns]"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "posts_table=pd.read_csv(\"posts_table.csv\")\n",
+    "posts_table.head(1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 5. Rename Id column to postId and OwnerUserId to userId"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>postId</th>\n",
+       "      <th>PostTypeId</th>\n",
+       "      <th>AcceptedAnswerId</th>\n",
+       "      <th>CreaionDate</th>\n",
+       "      <th>Score</th>\n",
+       "      <th>ViewCount</th>\n",
+       "      <th>Body</th>\n",
+       "      <th>userId</th>\n",
+       "      <th>LasActivityDate</th>\n",
+       "      <th>Title</th>\n",
+       "      <th>...</th>\n",
+       "      <th>AnswerCount</th>\n",
+       "      <th>CommentCount</th>\n",
+       "      <th>FavoriteCount</th>\n",
+       "      <th>LastEditorUserId</th>\n",
+       "      <th>LastEditDate</th>\n",
+       "      <th>CommunityOwnedDate</th>\n",
+       "      <th>ParentId</th>\n",
+       "      <th>ClosedDate</th>\n",
+       "      <th>OwnerDisplayName</th>\n",
+       "      <th>LastEditorDisplayName</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>2010-07-19 19:12:12</td>\n",
+       "      <td>23</td>\n",
+       "      <td>1278.0</td>\n",
+       "      <td>&lt;p&gt;How should I elicit prior distributions fro...</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>2010-09-15 21:08:26</td>\n",
+       "      <td>Eliciting priors from experts</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>2010-07-19 19:12:57</td>\n",
+       "      <td>22</td>\n",
+       "      <td>8198.0</td>\n",
+       "      <td>&lt;p&gt;In many different statistical methods there...</td>\n",
+       "      <td>24.0</td>\n",
+       "      <td>2012-11-12 09:21:54</td>\n",
+       "      <td>What is normality?</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>88.0</td>\n",
+       "      <td>2010-08-07 17:56:44</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>2010-07-19 19:13:28</td>\n",
+       "      <td>54</td>\n",
+       "      <td>3613.0</td>\n",
+       "      <td>&lt;p&gt;What are some valuable Statistical Analysis...</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>2013-05-27 14:48:36</td>\n",
+       "      <td>What are some valuable Statistical Analysis op...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>183.0</td>\n",
+       "      <td>2011-02-12 05:50:03</td>\n",
+       "      <td>2010-07-19 19:13:28</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>135.0</td>\n",
+       "      <td>2010-07-19 19:13:31</td>\n",
+       "      <td>13</td>\n",
+       "      <td>5224.0</td>\n",
+       "      <td>&lt;p&gt;I have two groups of data.  Each with a dif...</td>\n",
+       "      <td>23.0</td>\n",
+       "      <td>2010-09-08 03:00:19</td>\n",
+       "      <td>Assessing the significance of differences in d...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>2</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2010-07-19 19:14:43</td>\n",
+       "      <td>81</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>&lt;p&gt;The R-project&lt;/p&gt;\\n\\n&lt;p&gt;&lt;a href=\"http://www...</td>\n",
+       "      <td>23.0</td>\n",
+       "      <td>2010-07-19 19:21:15</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>23.0</td>\n",
+       "      <td>2010-07-19 19:21:15</td>\n",
+       "      <td>2010-07-19 19:14:43</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 21 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   postId  PostTypeId  AcceptedAnswerId          CreaionDate  Score  \\\n",
+       "0       1           1              15.0  2010-07-19 19:12:12     23   \n",
+       "1       2           1              59.0  2010-07-19 19:12:57     22   \n",
+       "2       3           1               5.0  2010-07-19 19:13:28     54   \n",
+       "3       4           1             135.0  2010-07-19 19:13:31     13   \n",
+       "4       5           2               NaN  2010-07-19 19:14:43     81   \n",
+       "\n",
+       "   ViewCount                                               Body  userId  \\\n",
+       "0     1278.0  <p>How should I elicit prior distributions fro...     8.0   \n",
+       "1     8198.0  <p>In many different statistical methods there...    24.0   \n",
+       "2     3613.0  <p>What are some valuable Statistical Analysis...    18.0   \n",
+       "3     5224.0  <p>I have two groups of data.  Each with a dif...    23.0   \n",
+       "4        NaN  <p>The R-project</p>\\n\\n<p><a href=\"http://www...    23.0   \n",
+       "\n",
+       "       LasActivityDate                                              Title  \\\n",
+       "0  2010-09-15 21:08:26                      Eliciting priors from experts   \n",
+       "1  2012-11-12 09:21:54                                 What is normality?   \n",
+       "2  2013-05-27 14:48:36  What are some valuable Statistical Analysis op...   \n",
+       "3  2010-09-08 03:00:19  Assessing the significance of differences in d...   \n",
+       "4  2010-07-19 19:21:15                                                NaN   \n",
+       "\n",
+       "   ... AnswerCount  CommentCount  FavoriteCount  LastEditorUserId  \\\n",
+       "0  ...         5.0             1           14.0               NaN   \n",
+       "1  ...         7.0             1            8.0              88.0   \n",
+       "2  ...        19.0             4           36.0             183.0   \n",
+       "3  ...         5.0             2            2.0               NaN   \n",
+       "4  ...         NaN             3            NaN              23.0   \n",
+       "\n",
+       "          LastEditDate   CommunityOwnedDate ParentId  ClosedDate  \\\n",
+       "0                  NaN                  NaN      NaN         NaN   \n",
+       "1  2010-08-07 17:56:44                  NaN      NaN         NaN   \n",
+       "2  2011-02-12 05:50:03  2010-07-19 19:13:28      NaN         NaN   \n",
+       "3                  NaN                  NaN      NaN         NaN   \n",
+       "4  2010-07-19 19:21:15  2010-07-19 19:14:43      3.0         NaN   \n",
+       "\n",
+       "  OwnerDisplayName LastEditorDisplayName  \n",
+       "0              NaN                   NaN  \n",
+       "1              NaN                   NaN  \n",
+       "2              NaN                   NaN  \n",
+       "3              NaN                   NaN  \n",
+       "4              NaN                   NaN  \n",
+       "\n",
+       "[5 rows x 21 columns]"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "posts_table=posts_table.rename(columns={\"Id\":\"postId\",\"OwnerUserId\":\"userId\"})\n",
+    "posts_table.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 6. Define new dataframes for users and posts with the following selected columns:\n",
+    "    **users columns**: userId, Reputation,Views,UpVotes,DownVotes\n",
+    "    **posts columns**: postId, Score,userId,ViewCount,CommentCount"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   userId  Reputation  Views  UpVotes  DownVotes\n",
+      "0      -1           1      0     5007       1920\n",
+      "1       2         101     25        3          0\n",
+      "2       3         101     22       19          0\n",
+      "3       4         101     11        0          0\n",
+      "4       5        6792   1145      662          5\n",
+      "   postId  Score  userId  ViewCount  CommentCount\n",
+      "0       1     23     8.0     1278.0             1\n",
+      "1       2     22    24.0     8198.0             1\n",
+      "2       3     54    18.0     3613.0             4\n",
+      "3       4     13    23.0     5224.0             2\n",
+      "4       5     81    23.0        NaN             3\n"
+     ]
+    }
+   ],
+   "source": [
+    "users_new=users_table[[\"userId\",\"Reputation\",\"Views\",\"UpVotes\",\"DownVotes\"]].copy()\n",
+    "print(users_new.head())\n",
+    "posts_new=posts_table[[\"postId\",\"Score\",\"userId\",\"ViewCount\",\"CommentCount\"]].copy()\n",
+    "print(posts_new.head())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 7. Merge both dataframes, users and posts. \n",
+    "You will need to make a [merge](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html) of posts and users dataframes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(40325, 5) 40325\n",
+      "(40000, 5) 8138\n",
+      "(72187, 9)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>userId</th>\n",
+       "      <th>Reputation</th>\n",
+       "      <th>Views</th>\n",
+       "      <th>UpVotes</th>\n",
+       "      <th>DownVotes</th>\n",
+       "      <th>postId</th>\n",
+       "      <th>Score</th>\n",
+       "      <th>ViewCount</th>\n",
+       "      <th>CommentCount</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5007.0</td>\n",
+       "      <td>1920.0</td>\n",
+       "      <td>2175.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5007.0</td>\n",
+       "      <td>1920.0</td>\n",
+       "      <td>8576.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5007.0</td>\n",
+       "      <td>1920.0</td>\n",
+       "      <td>8578.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5007.0</td>\n",
+       "      <td>1920.0</td>\n",
+       "      <td>8981.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5007.0</td>\n",
+       "      <td>1920.0</td>\n",
+       "      <td>8982.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72182</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>46117.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>355.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72183</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>46260.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>145.0</td>\n",
+       "      <td>5.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72184</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>46836.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>406.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72185</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>46892.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>72186</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>47308.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>129.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>72187 rows × 9 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       userId  Reputation  Views  UpVotes  DownVotes   postId  Score  \\\n",
+       "0        -1.0         1.0    0.0   5007.0     1920.0   2175.0    0.0   \n",
+       "1        -1.0         1.0    0.0   5007.0     1920.0   8576.0    0.0   \n",
+       "2        -1.0         1.0    0.0   5007.0     1920.0   8578.0    0.0   \n",
+       "3        -1.0         1.0    0.0   5007.0     1920.0   8981.0    0.0   \n",
+       "4        -1.0         1.0    0.0   5007.0     1920.0   8982.0    0.0   \n",
+       "...       ...         ...    ...      ...        ...      ...    ...   \n",
+       "72182     NaN         NaN    NaN      NaN        NaN  46117.0    0.0   \n",
+       "72183     NaN         NaN    NaN      NaN        NaN  46260.0    5.0   \n",
+       "72184     NaN         NaN    NaN      NaN        NaN  46836.0    3.0   \n",
+       "72185     NaN         NaN    NaN      NaN        NaN  46892.0    1.0   \n",
+       "72186     NaN         NaN    NaN      NaN        NaN  47308.0    2.0   \n",
+       "\n",
+       "       ViewCount  CommentCount  \n",
+       "0            NaN           0.0  \n",
+       "1            NaN           0.0  \n",
+       "2            NaN           0.0  \n",
+       "3            NaN           0.0  \n",
+       "4            NaN           0.0  \n",
+       "...          ...           ...  \n",
+       "72182      355.0           0.0  \n",
+       "72183      145.0           5.0  \n",
+       "72184      406.0           0.0  \n",
+       "72185        NaN           0.0  \n",
+       "72186      129.0           0.0  \n",
+       "\n",
+       "[72187 rows x 9 columns]"
+      ]
+     },
+     "execution_count": 66,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a=users_new.shape\n",
+    "a1=users_new[\"userId\"].nunique()\n",
+    "b=posts_new.shape\n",
+    "b1=posts_new[\"userId\"].nunique() # get the number of unique value almost double numbers of rows, which is weird\n",
+    "print(a,a1)\n",
+    "print(b,b1)\n",
+    "data_merge=users_new.merge(posts_new,how=\"outer\",on=\"userId\")  \n",
+    "\n",
+    "print(data_merge.shape)\n",
+    "data_merge"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 8. How many missing values do you have in your merged dataframe? On which columns?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "userId           1038\n",
+       "Reputation       1038\n",
+       "Views            1038\n",
+       "UpVotes          1038\n",
+       "DownVotes        1038\n",
+       "postId          32187\n",
+       "Score           32187\n",
+       "ViewCount       56292\n",
+       "CommentCount    32187\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data_merge.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 9. You will need to make something with missing values.  Will you clean or filling them? Explain. \n",
+    "**Remember** to check the results of your code before passing to the next step"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(71149, 9)\n",
+      "   userId  Reputation  Views  UpVotes  DownVotes  postId  Score  ViewCount  \\\n",
+      "0    -1.0         1.0    0.0   5007.0     1920.0  2175.0    0.0        NaN   \n",
+      "1    -1.0         1.0    0.0   5007.0     1920.0  8576.0    0.0        NaN   \n",
+      "2    -1.0         1.0    0.0   5007.0     1920.0  8578.0    0.0        NaN   \n",
+      "3    -1.0         1.0    0.0   5007.0     1920.0  8981.0    0.0        NaN   \n",
+      "4    -1.0         1.0    0.0   5007.0     1920.0  8982.0    0.0        NaN   \n",
+      "\n",
+      "   CommentCount  \n",
+      "0           0.0  \n",
+      "1           0.0  \n",
+      "2           0.0  \n",
+      "3           0.0  \n",
+      "4           0.0  \n",
+      "(58584, 7)\n",
+      "     userId  Reputation   Views  UpVotes  DownVotes  Score  CommentCount\n",
+      "0      -1.0         1.0     0.0   5007.0     1920.0    0.0           0.0\n",
+      "123     2.0       101.0    25.0      3.0        0.0    NaN           NaN\n",
+      "124     3.0       101.0    22.0     19.0        0.0    NaN           NaN\n",
+      "125     4.0       101.0    11.0      0.0        0.0    NaN           NaN\n",
+      "126     5.0      6792.0  1145.0    662.0        5.0  152.0           5.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Drop rows with missing userID, without userId the data is not valid\n",
+    "data_validId =data_merge.dropna(axis=0, subset=['userId'])\n",
+    "print(data_validId.shape)\n",
+    "print(data_validId.head())\n",
+    "#PostId is not accurate with one user has many different post ID, and half of the table missing postID -> remove postID\n",
+    "#Remove ViewCount because most of info is missing ( 56292 out of 70K row)\n",
+    "data_validId2=data_validId.drop([\"postId\",\"ViewCount\"],axis=1)\n",
+    "data_validId2=data_validId2.drop_duplicates()\n",
+    "print(data_validId2.shape)\n",
+    "print(data_validId2.head())\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 10. Adjust the data types in order to avoid future issues. Which ones should be changed? "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "    userId Reputation   Views  UpVotes  DownVotes  Score  CommentCount\n",
+      "0     -1.0        1.0     0.0   5007.0     1920.0    0.0           0.0\n",
+      "123    2.0      101.0    25.0      3.0        0.0    NaN           NaN\n",
+      "124    3.0      101.0    22.0     19.0        0.0    NaN           NaN\n",
+      "125    4.0      101.0    11.0      0.0        0.0    NaN           NaN\n",
+      "126    5.0     6792.0  1145.0    662.0        5.0  152.0           5.0\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "userId           object\n",
+       "Reputation       object\n",
+       "Views           float64\n",
+       "UpVotes         float64\n",
+       "DownVotes       float64\n",
+       "Score           float64\n",
+       "CommentCount    float64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 94,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#userID, reputation should be string\n",
+    "convert_str = {'userId': str,'Reputation': str}\n",
+    "data_validId2 = data_validId2.astype(convert_str)\n",
+    "print(data_validId2.head())\n",
+    "data_validId2.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

	Id	Reputation	CreationDate	DisplayName	LastAccessDate	WebsiteUrl	Location	AboutMe	Views	UpVotes	DownVotes	AccountId	Age	ProfileImageUrl
0	-1	1	2010-07-19 06:55:26	Community	2010-07-19 06:55:26	http://meta.stackexchange.com/	on the server farm	<p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>...	0	5007	1920	-1	NaN	NaN
1	2	101	2010-07-19 14:01:36	Geoff Dalgas	2013-11-12 22:07:23	http://stackoverflow.com	Corvallis, OR	<p>Developer on the StackOverflow team. Find ...	25	3	0	2	37.0	NaN
2	3	101	2010-07-19 15:34:50	Jarrod Dixon	2014-08-08 06:42:58	http://stackoverflow.com	New York, NY	<p><a href=\"http://blog.stackoverflow.com/2009...	22	19	0	3	35.0	NaN
3	4	101	2010-07-19 19:03:27	Emmett	2014-01-02 09:31:02	http://minesweeperonline.com	San Francisco, CA	<p>currently at a startup in SF</p>\\r\\n\\r\\n<p>...	11	0	0	1998	28.0	http://i.stack.imgur.com/d1oHX.jpg
4	5	6792	2010-07-19 19:03:57	Shane	2014-08-13 00:23:47	http://www.statalgo.com	New York, NY	<p>Quantitative researcher focusing on statist...	1145	662	5	54503	35.0	NaN
	postId	PostTypeId	AcceptedAnswerId	CreaionDate	Score	ViewCount	Body	userId	LasActivityDate	Title	...	AnswerCount	CommentCount	FavoriteCount	LastEditorUserId	LastEditDate	CommunityOwnedDate	ParentId	ClosedDate	OwnerDisplayName	LastEditorDisplayName
0	1	1	15.0	2010-07-19 19:12:12	23	1278.0	<p>How should I elicit prior distributions fro...	8.0	2010-09-15 21:08:26	Eliciting priors from experts	...	5.0	1	14.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	2	1	59.0	2010-07-19 19:12:57	22	8198.0	<p>In many different statistical methods there...	24.0	2012-11-12 09:21:54	What is normality?	...	7.0	1	8.0	88.0	2010-08-07 17:56:44	NaN	NaN	NaN	NaN	NaN
2	3	1	5.0	2010-07-19 19:13:28	54	3613.0	<p>What are some valuable Statistical Analysis...	18.0	2013-05-27 14:48:36	What are some valuable Statistical Analysis op...	...	19.0	4	36.0	183.0	2011-02-12 05:50:03	2010-07-19 19:13:28	NaN	NaN	NaN	NaN
3	4	1	135.0	2010-07-19 19:13:31	13	5224.0	<p>I have two groups of data. Each with a dif...	23.0	2010-09-08 03:00:19	Assessing the significance of differences in d...	...	5.0	2	2.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	5	2	NaN	2010-07-19 19:14:43	81	NaN	<p>The R-project</p>\\n\\n<p><a href=\"http://www...	23.0	2010-07-19 19:21:15	NaN	...	NaN	3	NaN	23.0	2010-07-19 19:21:15	2010-07-19 19:14:43	3.0	NaN	NaN	NaN
	userId	Reputation	Views	UpVotes	DownVotes	postId	Score	ViewCount	CommentCount
0	-1.0	1.0	0.0	5007.0	1920.0	2175.0	0.0	NaN	0.0
1	-1.0	1.0	0.0	5007.0	1920.0	8576.0	0.0	NaN	0.0
2	-1.0	1.0	0.0	5007.0	1920.0	8578.0	0.0	NaN	0.0
3	-1.0	1.0	0.0	5007.0	1920.0	8981.0	0.0	NaN	0.0
4	-1.0	1.0	0.0	5007.0	1920.0	8982.0	0.0	NaN	0.0
...	...	...	...	...	...	...	...	...	...
72182	NaN	NaN	NaN	NaN	NaN	46117.0	0.0	355.0	0.0
72183	NaN	NaN	NaN	NaN	NaN	46260.0	5.0	145.0	5.0
72184	NaN	NaN	NaN	NaN	NaN	46836.0	3.0	406.0	0.0
72185	NaN	NaN	NaN	NaN	NaN	46892.0	1.0	NaN	0.0
72186	NaN	NaN	NaN	NaN	NaN	47308.0	2.0	129.0	0.0