diff --git a/your-code/main.ipynb b/your-code/main.ipynb index bad6d94..4def7d8 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -9,10 +9,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 54, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import pandas as pd " + ] }, { "cell_type": "markdown", @@ -23,10 +25,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 55, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "user = pd.read_csv(\"users_table.csv\")" + ] }, { "cell_type": "markdown", @@ -37,10 +41,175 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userIdReputationCreationDateDisplayNameLastAccessDateWebsiteUrlLocationAboutMeViewsUpVotesDownVotesAccountIdAgeProfileImageUrl
0-112010-07-19 06:55:26Community2010-07-19 06:55:26http://meta.stackexchange.com/on the server farm<p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>...050071920-1NaNNaN
121012010-07-19 14:01:36Geoff Dalgas2013-11-12 22:07:23http://stackoverflow.comCorvallis, OR<p>Developer on the StackOverflow team. Find ...2530237.0NaN
231012010-07-19 15:34:50Jarrod Dixon2014-08-08 06:42:58http://stackoverflow.comNew York, NY<p><a href=\"http://blog.stackoverflow.com/2009...22190335.0NaN
341012010-07-19 19:03:27Emmett2014-01-02 09:31:02http://minesweeperonline.comSan Francisco, CA<p>currently at a startup in SF</p>\\r\\n\\r\\n<p>...1100199828.0http://i.stack.imgur.com/d1oHX.jpg
4567922010-07-19 19:03:57Shane2014-08-13 00:23:47http://www.statalgo.comNew York, NY<p>Quantitative researcher focusing on statist...114566255450335.0NaN
\n", + "
" + ], + "text/plain": [ + " userId Reputation CreationDate DisplayName LastAccessDate \n", + "0 -1 1 2010-07-19 06:55:26 Community 2010-07-19 06:55:26 \\\n", + "1 2 101 2010-07-19 14:01:36 Geoff Dalgas 2013-11-12 22:07:23 \n", + "2 3 101 2010-07-19 15:34:50 Jarrod Dixon 2014-08-08 06:42:58 \n", + "3 4 101 2010-07-19 19:03:27 Emmett 2014-01-02 09:31:02 \n", + "4 5 6792 2010-07-19 19:03:57 Shane 2014-08-13 00:23:47 \n", + "\n", + " WebsiteUrl Location \n", + "0 http://meta.stackexchange.com/ on the server farm \\\n", + "1 http://stackoverflow.com Corvallis, OR \n", + "2 http://stackoverflow.com New York, NY \n", + "3 http://minesweeperonline.com San Francisco, CA \n", + "4 http://www.statalgo.com New York, NY \n", + "\n", + " AboutMe Views UpVotes \n", + "0

Hi, I'm not really a person.

\\r\\n\\r\\n

... 0 5007 \\\n", + "1

Developer on the StackOverflow team. Find ... 25 3 \n", + "2

currently at a startup in SF

\\r\\n\\r\\n

... 11 0 \n", + "4

Quantitative researcher focusing on statist... 1145 662 \n", + "\n", + " DownVotes AccountId Age ProfileImageUrl \n", + "0 1920 -1 NaN NaN \n", + "1 0 2 37.0 NaN \n", + "2 0 3 35.0 NaN \n", + "3 0 1998 28.0 http://i.stack.imgur.com/d1oHX.jpg \n", + "4 5 54503 35.0 NaN " + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "user.rename(columns = {\"Id\":\"userId\"}, inplace = True)\n", + "user.head()" + ] }, { "cell_type": "markdown", @@ -51,10 +220,114 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "

\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdPostTypeIdAcceptedAnswerIdCreaionDateScoreViewCountBodyOwnerUserIdLasActivityDateTitle...AnswerCountCommentCountFavoriteCountLastEditorUserIdLastEditDateCommunityOwnedDateParentIdClosedDateOwnerDisplayNameLastEditorDisplayName
01115.02010-07-19 19:12:12231278.0<p>How should I elicit prior distributions fro...8.02010-09-15 21:08:26Eliciting priors from experts...5.0114.0NaNNaNNaNNaNNaNNaNNaN
\n", + "

1 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " Id PostTypeId AcceptedAnswerId CreaionDate Score ViewCount \n", + "0 1 1 15.0 2010-07-19 19:12:12 23 1278.0 \\\n", + "\n", + " Body OwnerUserId \n", + "0

How should I elicit prior distributions fro... 8.0 \\\n", + "\n", + " LasActivityDate Title ... AnswerCount \n", + "0 2010-09-15 21:08:26 Eliciting priors from experts ... 5.0 \\\n", + "\n", + " CommentCount FavoriteCount LastEditorUserId LastEditDate \n", + "0 1 14.0 NaN NaN \\\n", + "\n", + " CommunityOwnedDate ParentId ClosedDate OwnerDisplayName \n", + "0 NaN NaN NaN NaN \\\n", + "\n", + " LastEditorDisplayName \n", + "0 NaN \n", + "\n", + "[1 rows x 21 columns]" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "posts = pd.read_csv(\"posts_table.csv\")\n", + "posts.head(1)" + ] }, { "cell_type": "markdown", @@ -65,10 +338,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 58, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "posts.rename(columns = {\"Id\":\"postId\", \"OwnerUserId\": \"userId\"}, inplace = True)" + ] }, { "cell_type": "markdown", @@ -81,10 +356,301 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "

\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userIdReputationViewsUpVotesDownVotes
0-11050071920
121012530
2310122190
341011100
45679211456625
..................
40320557431000
40321557446100
4032255745101000
4032355746106100
40324557471000
\n", + "

40325 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " userId Reputation Views UpVotes DownVotes\n", + "0 -1 1 0 5007 1920\n", + "1 2 101 25 3 0\n", + "2 3 101 22 19 0\n", + "3 4 101 11 0 0\n", + "4 5 6792 1145 662 5\n", + "... ... ... ... ... ...\n", + "40320 55743 1 0 0 0\n", + "40321 55744 6 1 0 0\n", + "40322 55745 101 0 0 0\n", + "40323 55746 106 1 0 0\n", + "40324 55747 1 0 0 0\n", + "\n", + "[40325 rows x 5 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postIdScoreuserIdViewCountCommentCount
01238.01278.01
122224.08198.01
235418.03613.04
341323.05224.02
458123.0NaN3
..................
3999548321019966.0NaN0
39996483223892.0NaN2
399974832312020.0NaN0
3999848324319914.0NaN0
3999948325-119968.0116.04
\n", + "

40000 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " postId Score userId ViewCount CommentCount\n", + "0 1 23 8.0 1278.0 1\n", + "1 2 22 24.0 8198.0 1\n", + "2 3 54 18.0 3613.0 4\n", + "3 4 13 23.0 5224.0 2\n", + "4 5 81 23.0 NaN 3\n", + "... ... ... ... ... ...\n", + "39995 48321 0 19966.0 NaN 0\n", + "39996 48322 3 892.0 NaN 2\n", + "39997 48323 1 2020.0 NaN 0\n", + "39998 48324 3 19914.0 NaN 0\n", + "39999 48325 -1 19968.0 116.0 4\n", + "\n", + "[40000 rows x 5 columns]" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "user_columns = user[[\"userId\", \"Reputation\", \"Views\",\"UpVotes\",\"DownVotes\"]]\n", + "posts_columns = posts[[\"postId\", \"Score\", \"userId\", \"ViewCount\",\"CommentCount\"]]\n", + "display(user_columns)\n", + "posts_columns" + ] }, { "cell_type": "markdown", @@ -96,10 +662,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 77, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "merged = pd.merge(user_columns,posts_columns, how=\"left\", on = \"userId\")" + ] }, { "cell_type": "markdown", @@ -110,10 +678,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 78, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "userId 0\n", + "Reputation 0\n", + "Views 0\n", + "UpVotes 0\n", + "DownVotes 0\n", + "postId 32187\n", + "Score 32187\n", + "ViewCount 55759\n", + "CommentCount 32187\n", + "dtype: int64" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged.isna().sum()" + ] }, { "cell_type": "markdown", @@ -125,10 +715,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 73, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "userId 0\n", + "Reputation 0\n", + "Views 0\n", + "UpVotes 0\n", + "DownVotes 0\n", + "postId 0\n", + "Score 0\n", + "ViewCount 0\n", + "CommentCount 0\n", + "dtype: int64" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#I will clean the rows without postID and Score AND comment Count and replace the nan values in viewCount with 0\n", + "merged = merged.dropna(subset=[\"postId\"])\n", + "merged.fillna(0, inplace=True)\n", + "merged.isna().sum()\n", + "\n" + ] }, { "cell_type": "markdown", @@ -139,10 +764,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 80, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "userId int64\n", + "Reputation int64\n", + "Views int64\n", + "UpVotes int64\n", + "DownVotes int64\n", + "postId float64\n", + "Score float64\n", + "ViewCount float64\n", + "CommentCount float64\n", + "dtype: object" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged.dtypes" + ] } ], "metadata": { @@ -161,7 +808,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.11.2" } }, "nbformat": 4,