diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index bad6d94..7157c69 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -9,10 +9,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "import pandas as pd"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -23,10 +25,112 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Id</th>\n",
+       "      <th>Reputation</th>\n",
+       "      <th>CreationDate</th>\n",
+       "      <th>DisplayName</th>\n",
+       "      <th>LastAccessDate</th>\n",
+       "      <th>WebsiteUrl</th>\n",
+       "      <th>Location</th>\n",
+       "      <th>AboutMe</th>\n",
+       "      <th>Views</th>\n",
+       "      <th>UpVotes</th>\n",
+       "      <th>DownVotes</th>\n",
+       "      <th>AccountId</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>ProfileImageUrl</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2010-07-19 06:55:26</td>\n",
+       "      <td>Community</td>\n",
+       "      <td>2010-07-19 06:55:26</td>\n",
+       "      <td>http://meta.stackexchange.com/</td>\n",
+       "      <td>on the server farm</td>\n",
+       "      <td>&lt;p&gt;Hi, I'm not really a person.&lt;/p&gt;\\r\\n\\r\\n&lt;p&gt;...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5007</td>\n",
+       "      <td>1920</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2010-07-19 14:01:36</td>\n",
+       "      <td>Geoff Dalgas</td>\n",
+       "      <td>2013-11-12 22:07:23</td>\n",
+       "      <td>http://stackoverflow.com</td>\n",
+       "      <td>Corvallis, OR</td>\n",
+       "      <td>&lt;p&gt;Developer on the StackOverflow team.  Find ...</td>\n",
+       "      <td>25</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>37.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Id  Reputation         CreationDate   DisplayName       LastAccessDate  \\\n",
+       "0  -1           1  2010-07-19 06:55:26     Community  2010-07-19 06:55:26   \n",
+       "1   2         101  2010-07-19 14:01:36  Geoff Dalgas  2013-11-12 22:07:23   \n",
+       "\n",
+       "                       WebsiteUrl            Location  \\\n",
+       "0  http://meta.stackexchange.com/  on the server farm   \n",
+       "1        http://stackoverflow.com       Corvallis, OR   \n",
+       "\n",
+       "                                             AboutMe  Views  UpVotes  \\\n",
+       "0  <p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>...      0     5007   \n",
+       "1  <p>Developer on the StackOverflow team.  Find ...     25        3   \n",
+       "\n",
+       "   DownVotes  AccountId   Age ProfileImageUrl  \n",
+       "0       1920         -1   NaN             NaN  \n",
+       "1          0          2  37.0             NaN  "
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "users_table = pd.read_csv(\"users_table.csv\")\n",
+    "users_table.head(2)"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -37,10 +141,120 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "users_table = users_table.rename(columns={\"Id\": \"userId\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>userId</th>\n",
+       "      <th>Reputation</th>\n",
+       "      <th>CreationDate</th>\n",
+       "      <th>DisplayName</th>\n",
+       "      <th>LastAccessDate</th>\n",
+       "      <th>WebsiteUrl</th>\n",
+       "      <th>Location</th>\n",
+       "      <th>AboutMe</th>\n",
+       "      <th>Views</th>\n",
+       "      <th>UpVotes</th>\n",
+       "      <th>DownVotes</th>\n",
+       "      <th>AccountId</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>ProfileImageUrl</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2010-07-19 06:55:26</td>\n",
+       "      <td>Community</td>\n",
+       "      <td>2010-07-19 06:55:26</td>\n",
+       "      <td>http://meta.stackexchange.com/</td>\n",
+       "      <td>on the server farm</td>\n",
+       "      <td>&lt;p&gt;Hi, I'm not really a person.&lt;/p&gt;\\r\\n\\r\\n&lt;p&gt;...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>5007</td>\n",
+       "      <td>1920</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>101</td>\n",
+       "      <td>2010-07-19 14:01:36</td>\n",
+       "      <td>Geoff Dalgas</td>\n",
+       "      <td>2013-11-12 22:07:23</td>\n",
+       "      <td>http://stackoverflow.com</td>\n",
+       "      <td>Corvallis, OR</td>\n",
+       "      <td>&lt;p&gt;Developer on the StackOverflow team.  Find ...</td>\n",
+       "      <td>25</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>37.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   userId  Reputation         CreationDate   DisplayName       LastAccessDate  \\\n",
+       "0      -1           1  2010-07-19 06:55:26     Community  2010-07-19 06:55:26   \n",
+       "1       2         101  2010-07-19 14:01:36  Geoff Dalgas  2013-11-12 22:07:23   \n",
+       "\n",
+       "                       WebsiteUrl            Location  \\\n",
+       "0  http://meta.stackexchange.com/  on the server farm   \n",
+       "1        http://stackoverflow.com       Corvallis, OR   \n",
+       "\n",
+       "                                             AboutMe  Views  UpVotes  \\\n",
+       "0  <p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>...      0     5007   \n",
+       "1  <p>Developer on the StackOverflow team.  Find ...     25        3   \n",
+       "\n",
+       "   DownVotes  AccountId   Age ProfileImageUrl  \n",
+       "0       1920         -1   NaN             NaN  \n",
+       "1          0          2  37.0             NaN  "
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "users_table.head(2)"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -51,10 +265,152 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "posts_table = pd.read_csv(\"posts_table.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Id</th>\n",
+       "      <th>PostTypeId</th>\n",
+       "      <th>AcceptedAnswerId</th>\n",
+       "      <th>CreaionDate</th>\n",
+       "      <th>Score</th>\n",
+       "      <th>ViewCount</th>\n",
+       "      <th>Body</th>\n",
+       "      <th>OwnerUserId</th>\n",
+       "      <th>LasActivityDate</th>\n",
+       "      <th>Title</th>\n",
+       "      <th>...</th>\n",
+       "      <th>AnswerCount</th>\n",
+       "      <th>CommentCount</th>\n",
+       "      <th>FavoriteCount</th>\n",
+       "      <th>LastEditorUserId</th>\n",
+       "      <th>LastEditDate</th>\n",
+       "      <th>CommunityOwnedDate</th>\n",
+       "      <th>ParentId</th>\n",
+       "      <th>ClosedDate</th>\n",
+       "      <th>OwnerDisplayName</th>\n",
+       "      <th>LastEditorDisplayName</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>2010-07-19 19:12:12</td>\n",
+       "      <td>23</td>\n",
+       "      <td>1278.0</td>\n",
+       "      <td>&lt;p&gt;How should I elicit prior distributions fro...</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>2010-09-15 21:08:26</td>\n",
+       "      <td>Eliciting priors from experts</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>2010-07-19 19:12:57</td>\n",
+       "      <td>22</td>\n",
+       "      <td>8198.0</td>\n",
+       "      <td>&lt;p&gt;In many different statistical methods there...</td>\n",
+       "      <td>24.0</td>\n",
+       "      <td>2012-11-12 09:21:54</td>\n",
+       "      <td>What is normality?</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>88.0</td>\n",
+       "      <td>2010-08-07 17:56:44</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2 rows × 21 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Id  PostTypeId  AcceptedAnswerId          CreaionDate  Score  ViewCount  \\\n",
+       "0   1           1              15.0  2010-07-19 19:12:12     23     1278.0   \n",
+       "1   2           1              59.0  2010-07-19 19:12:57     22     8198.0   \n",
+       "\n",
+       "                                                Body  OwnerUserId  \\\n",
+       "0  <p>How should I elicit prior distributions fro...          8.0   \n",
+       "1  <p>In many different statistical methods there...         24.0   \n",
+       "\n",
+       "       LasActivityDate                          Title  ... AnswerCount  \\\n",
+       "0  2010-09-15 21:08:26  Eliciting priors from experts  ...         5.0   \n",
+       "1  2012-11-12 09:21:54             What is normality?  ...         7.0   \n",
+       "\n",
+       "   CommentCount  FavoriteCount  LastEditorUserId         LastEditDate  \\\n",
+       "0             1           14.0               NaN                  NaN   \n",
+       "1             1            8.0              88.0  2010-08-07 17:56:44   \n",
+       "\n",
+       "  CommunityOwnedDate ParentId  ClosedDate OwnerDisplayName  \\\n",
+       "0                NaN      NaN         NaN              NaN   \n",
+       "1                NaN      NaN         NaN              NaN   \n",
+       "\n",
+       "  LastEditorDisplayName  \n",
+       "0                   NaN  \n",
+       "1                   NaN  \n",
+       "\n",
+       "[2 rows x 21 columns]"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "posts_table.head(2)"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -65,10 +421,173 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "posts_table = posts_table.rename(columns={\"Id\": \"postId\", }).rename(columns={\"OwnerUserID\": \"userId\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# I give up, I've used the syntax before and the one following and none are working\n",
+    "\n",
+    "posts_table = posts_table.rename(columns=({\"Id\":\"postId\"}))\n",
+    "posts_table = posts_table.rename(columns=({\"OwnerUserID\":\"userId\"}))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "posts_table = posts_table.rename(columns=({\"Id\":\"postId\",\"OwnerUserID\":\"userId\"}))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>postId</th>\n",
+       "      <th>PostTypeId</th>\n",
+       "      <th>AcceptedAnswerId</th>\n",
+       "      <th>CreaionDate</th>\n",
+       "      <th>Score</th>\n",
+       "      <th>ViewCount</th>\n",
+       "      <th>Body</th>\n",
+       "      <th>OwnerUserId</th>\n",
+       "      <th>LasActivityDate</th>\n",
+       "      <th>Title</th>\n",
+       "      <th>...</th>\n",
+       "      <th>AnswerCount</th>\n",
+       "      <th>CommentCount</th>\n",
+       "      <th>FavoriteCount</th>\n",
+       "      <th>LastEditorUserId</th>\n",
+       "      <th>LastEditDate</th>\n",
+       "      <th>CommunityOwnedDate</th>\n",
+       "      <th>ParentId</th>\n",
+       "      <th>ClosedDate</th>\n",
+       "      <th>OwnerDisplayName</th>\n",
+       "      <th>LastEditorDisplayName</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>2010-07-19 19:12:12</td>\n",
+       "      <td>23</td>\n",
+       "      <td>1278.0</td>\n",
+       "      <td>&lt;p&gt;How should I elicit prior distributions fro...</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>2010-09-15 21:08:26</td>\n",
+       "      <td>Eliciting priors from experts</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>2010-07-19 19:12:57</td>\n",
+       "      <td>22</td>\n",
+       "      <td>8198.0</td>\n",
+       "      <td>&lt;p&gt;In many different statistical methods there...</td>\n",
+       "      <td>24.0</td>\n",
+       "      <td>2012-11-12 09:21:54</td>\n",
+       "      <td>What is normality?</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>88.0</td>\n",
+       "      <td>2010-08-07 17:56:44</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2 rows × 21 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   postId  PostTypeId  AcceptedAnswerId          CreaionDate  Score  \\\n",
+       "0       1           1              15.0  2010-07-19 19:12:12     23   \n",
+       "1       2           1              59.0  2010-07-19 19:12:57     22   \n",
+       "\n",
+       "   ViewCount                                               Body  OwnerUserId  \\\n",
+       "0     1278.0  <p>How should I elicit prior distributions fro...          8.0   \n",
+       "1     8198.0  <p>In many different statistical methods there...         24.0   \n",
+       "\n",
+       "       LasActivityDate                          Title  ... AnswerCount  \\\n",
+       "0  2010-09-15 21:08:26  Eliciting priors from experts  ...         5.0   \n",
+       "1  2012-11-12 09:21:54             What is normality?  ...         7.0   \n",
+       "\n",
+       "   CommentCount  FavoriteCount  LastEditorUserId         LastEditDate  \\\n",
+       "0             1           14.0               NaN                  NaN   \n",
+       "1             1            8.0              88.0  2010-08-07 17:56:44   \n",
+       "\n",
+       "  CommunityOwnedDate ParentId  ClosedDate OwnerDisplayName  \\\n",
+       "0                NaN      NaN         NaN              NaN   \n",
+       "1                NaN      NaN         NaN              NaN   \n",
+       "\n",
+       "  LastEditorDisplayName  \n",
+       "0                   NaN  \n",
+       "1                   NaN  \n",
+       "\n",
+       "[2 rows x 21 columns]"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "posts_table.head(2)"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -79,12 +598,53 @@
     "    **posts columns**: postId, Score,userId,ViewCount,CommentCount"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "KeyError",
+     "evalue": "\"['userId'] not in index\"",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[25], line 2\u001b[0m\n\u001b[0;32m      1\u001b[0m users_df \u001b[39m=\u001b[39m users_table\u001b[39m.\u001b[39mloc[:, [\u001b[39m\"\u001b[39m\u001b[39muserId\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mReputation\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mViews\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mUpVotes\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mDownVotes\u001b[39m\u001b[39m\"\u001b[39m]]\n\u001b[1;32m----> 2\u001b[0m posts_df \u001b[39m=\u001b[39m posts_table\u001b[39m.\u001b[39;49mloc[:, [\u001b[39m\"\u001b[39;49m\u001b[39muserId\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mpostId\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mScore\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mViewCount\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mCommentCount\u001b[39;49m\u001b[39m\"\u001b[39;49m]]\n",
+      "File \u001b[1;32mc:\\Users\\jsctr\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:1067\u001b[0m, in \u001b[0;36m_LocationIndexer.__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m   1065\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_is_scalar_access(key):\n\u001b[0;32m   1066\u001b[0m         \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj\u001b[39m.\u001b[39m_get_value(\u001b[39m*\u001b[39mkey, takeable\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_takeable)\n\u001b[1;32m-> 1067\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_getitem_tuple(key)\n\u001b[0;32m   1068\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m   1069\u001b[0m     \u001b[39m# we by definition only have the 0th axis\u001b[39;00m\n\u001b[0;32m   1070\u001b[0m     axis \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39maxis \u001b[39mor\u001b[39;00m \u001b[39m0\u001b[39m\n",
+      "File \u001b[1;32mc:\\Users\\jsctr\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:1256\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_tuple\u001b[1;34m(self, tup)\u001b[0m\n\u001b[0;32m   1253\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_multi_take_opportunity(tup):\n\u001b[0;32m   1254\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_multi_take(tup)\n\u001b[1;32m-> 1256\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_getitem_tuple_same_dim(tup)\n",
+      "File \u001b[1;32mc:\\Users\\jsctr\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:924\u001b[0m, in \u001b[0;36m_LocationIndexer._getitem_tuple_same_dim\u001b[1;34m(self, tup)\u001b[0m\n\u001b[0;32m    921\u001b[0m \u001b[39mif\u001b[39;00m com\u001b[39m.\u001b[39mis_null_slice(key):\n\u001b[0;32m    922\u001b[0m     \u001b[39mcontinue\u001b[39;00m\n\u001b[1;32m--> 924\u001b[0m retval \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39;49m(retval, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mname)\u001b[39m.\u001b[39;49m_getitem_axis(key, axis\u001b[39m=\u001b[39;49mi)\n\u001b[0;32m    925\u001b[0m \u001b[39m# We should never have retval.ndim < self.ndim, as that should\u001b[39;00m\n\u001b[0;32m    926\u001b[0m \u001b[39m#  be handled by the _getitem_lowerdim call above.\u001b[39;00m\n\u001b[0;32m    927\u001b[0m \u001b[39massert\u001b[39;00m retval\u001b[39m.\u001b[39mndim \u001b[39m==\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mndim\n",
+      "File \u001b[1;32mc:\\Users\\jsctr\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:1301\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_axis\u001b[1;34m(self, key, axis)\u001b[0m\n\u001b[0;32m   1298\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(key, \u001b[39m\"\u001b[39m\u001b[39mndim\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mand\u001b[39;00m key\u001b[39m.\u001b[39mndim \u001b[39m>\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[0;32m   1299\u001b[0m         \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mCannot index with multidimensional key\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m-> 1301\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_getitem_iterable(key, axis\u001b[39m=\u001b[39;49maxis)\n\u001b[0;32m   1303\u001b[0m \u001b[39m# nested tuple slicing\u001b[39;00m\n\u001b[0;32m   1304\u001b[0m \u001b[39mif\u001b[39;00m is_nested_tuple(key, labels):\n",
+      "File \u001b[1;32mc:\\Users\\jsctr\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:1239\u001b[0m, in \u001b[0;36m_LocIndexer._getitem_iterable\u001b[1;34m(self, key, axis)\u001b[0m\n\u001b[0;32m   1236\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_validate_key(key, axis)\n\u001b[0;32m   1238\u001b[0m \u001b[39m# A collection of keys\u001b[39;00m\n\u001b[1;32m-> 1239\u001b[0m keyarr, indexer \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_get_listlike_indexer(key, axis)\n\u001b[0;32m   1240\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj\u001b[39m.\u001b[39m_reindex_with_indexers(\n\u001b[0;32m   1241\u001b[0m     {axis: [keyarr, indexer]}, copy\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m, allow_dups\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m\n\u001b[0;32m   1242\u001b[0m )\n",
+      "File \u001b[1;32mc:\\Users\\jsctr\\anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:1432\u001b[0m, in \u001b[0;36m_LocIndexer._get_listlike_indexer\u001b[1;34m(self, key, axis)\u001b[0m\n\u001b[0;32m   1429\u001b[0m ax \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj\u001b[39m.\u001b[39m_get_axis(axis)\n\u001b[0;32m   1430\u001b[0m axis_name \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mobj\u001b[39m.\u001b[39m_get_axis_name(axis)\n\u001b[1;32m-> 1432\u001b[0m keyarr, indexer \u001b[39m=\u001b[39m ax\u001b[39m.\u001b[39;49m_get_indexer_strict(key, axis_name)\n\u001b[0;32m   1434\u001b[0m \u001b[39mreturn\u001b[39;00m keyarr, indexer\n",
+      "File \u001b[1;32mc:\\Users\\jsctr\\anaconda3\\lib\\site-packages\\pandas\\core\\indexes\\base.py:6070\u001b[0m, in \u001b[0;36mIndex._get_indexer_strict\u001b[1;34m(self, key, axis_name)\u001b[0m\n\u001b[0;32m   6067\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m   6068\u001b[0m     keyarr, indexer, new_indexer \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_reindex_non_unique(keyarr)\n\u001b[1;32m-> 6070\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_raise_if_missing(keyarr, indexer, axis_name)\n\u001b[0;32m   6072\u001b[0m keyarr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtake(indexer)\n\u001b[0;32m   6073\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(key, Index):\n\u001b[0;32m   6074\u001b[0m     \u001b[39m# GH 42790 - Preserve name from an Index\u001b[39;00m\n",
+      "File \u001b[1;32mc:\\Users\\jsctr\\anaconda3\\lib\\site-packages\\pandas\\core\\indexes\\base.py:6133\u001b[0m, in \u001b[0;36mIndex._raise_if_missing\u001b[1;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[0;32m   6130\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNone of [\u001b[39m\u001b[39m{\u001b[39;00mkey\u001b[39m}\u001b[39;00m\u001b[39m] are in the [\u001b[39m\u001b[39m{\u001b[39;00maxis_name\u001b[39m}\u001b[39;00m\u001b[39m]\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m   6132\u001b[0m not_found \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(ensure_index(key)[missing_mask\u001b[39m.\u001b[39mnonzero()[\u001b[39m0\u001b[39m]]\u001b[39m.\u001b[39munique())\n\u001b[1;32m-> 6133\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mnot_found\u001b[39m}\u001b[39;00m\u001b[39m not in index\u001b[39m\u001b[39m\"\u001b[39m)\n",
+      "\u001b[1;31mKeyError\u001b[0m: \"['userId'] not in index\""
+     ]
+    }
+   ],
+   "source": [
+    "users_df = users_table.loc[:, [\"userId\", \"Reputation\", \"Views\", \"UpVotes\", \"DownVotes\"]]\n",
+    "posts_df = posts_table.loc[:, [\"userId\", \"postId\", \"Score\", \"ViewCount\", \"CommentCount\"]]\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "users_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "posts_df.head()"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -99,7 +659,9 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "merged_df = pd.merge(users_df, posts_df, on='userId')"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -113,7 +675,9 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "# No idea, but I would do a .isnull method on all columns to figure it out"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -128,7 +692,10 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "# That depends, if there are very few values missing, I'd do the average of the whole column and fill the empty rows with that value,\n",
+    "    # and if the missing values are too many (like over 80% of the rows), I'd drop the column."
+   ]
   },
   {
    "cell_type": "markdown",
@@ -139,10 +706,59 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "postId                     int64\n",
+      "PostTypeId                 int64\n",
+      "AcceptedAnswerId         float64\n",
+      "CreaionDate               object\n",
+      "Score                      int64\n",
+      "ViewCount                float64\n",
+      "Body                      object\n",
+      "OwnerUserId              float64\n",
+      "LasActivityDate           object\n",
+      "Title                     object\n",
+      "Tags                      object\n",
+      "AnswerCount              float64\n",
+      "CommentCount               int64\n",
+      "FavoriteCount            float64\n",
+      "LastEditorUserId         float64\n",
+      "LastEditDate              object\n",
+      "CommunityOwnedDate        object\n",
+      "ParentId                 float64\n",
+      "ClosedDate                object\n",
+      "OwnerDisplayName          object\n",
+      "LastEditorDisplayName     object\n",
+      "dtype: object\n",
+      "userId               int64\n",
+      "Reputation           int64\n",
+      "CreationDate        object\n",
+      "DisplayName         object\n",
+      "LastAccessDate      object\n",
+      "WebsiteUrl          object\n",
+      "Location            object\n",
+      "AboutMe             object\n",
+      "Views                int64\n",
+      "UpVotes              int64\n",
+      "DownVotes            int64\n",
+      "AccountId            int64\n",
+      "Age                float64\n",
+      "ProfileImageUrl     object\n",
+      "dtype: object\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(posts_table.dtypes)\n",
+    "print(users_table.dtypes)\n",
+    "\n",
+    "#I'd change numerical variables into categorical"
+   ]
   }
  ],
  "metadata": {
@@ -161,7 +777,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,

	Id	Reputation	CreationDate	DisplayName	LastAccessDate	WebsiteUrl	Location	AboutMe	Views	UpVotes	DownVotes	AccountId	Age	ProfileImageUrl
0	-1	1	2010-07-19 06:55:26	Community	2010-07-19 06:55:26	http://meta.stackexchange.com/	on the server farm	<p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>...	0	5007	1920	-1	NaN	NaN
1	2	101	2010-07-19 14:01:36	Geoff Dalgas	2013-11-12 22:07:23	http://stackoverflow.com	Corvallis, OR	<p>Developer on the StackOverflow team. Find ...	25	3	0	2	37.0	NaN
	Id	PostTypeId	AcceptedAnswerId	CreaionDate	Score	ViewCount	Body	OwnerUserId	LasActivityDate	Title	...	AnswerCount	CommentCount	FavoriteCount	LastEditorUserId	LastEditDate	CommunityOwnedDate	ParentId	ClosedDate	OwnerDisplayName	LastEditorDisplayName
0	1	1	15.0	2010-07-19 19:12:12	23	1278.0	<p>How should I elicit prior distributions fro...	8.0	2010-09-15 21:08:26	Eliciting priors from experts	...	5.0	1	14.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	2	1	59.0	2010-07-19 19:12:57	22	8198.0	<p>In many different statistical methods there...	24.0	2012-11-12 09:21:54	What is normality?	...	7.0	1	8.0	88.0	2010-08-07 17:56:44	NaN	NaN	NaN	NaN	NaN