diff --git a/your-code/main.ipynb b/your-code/main.ipynb index bad6d94..1d368c3 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -9,10 +9,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import pandas as pd" + ] }, { "cell_type": "markdown", @@ -23,10 +25,175 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | Id | \n", + "Reputation | \n", + "CreationDate | \n", + "DisplayName | \n", + "LastAccessDate | \n", + "WebsiteUrl | \n", + "Location | \n", + "AboutMe | \n", + "Views | \n", + "UpVotes | \n", + "DownVotes | \n", + "AccountId | \n", + "Age | \n", + "ProfileImageUrl | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "-1 | \n", + "1 | \n", + "2010-07-19 06:55:26 | \n", + "Community | \n", + "2010-07-19 06:55:26 | \n", + "http://meta.stackexchange.com/ | \n", + "on the server farm | \n", + "<p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>... | \n", + "0 | \n", + "5007 | \n", + "1920 | \n", + "-1 | \n", + "NaN | \n", + "NaN | \n", + "
| 1 | \n", + "2 | \n", + "101 | \n", + "2010-07-19 14:01:36 | \n", + "Geoff Dalgas | \n", + "2013-11-12 22:07:23 | \n", + "http://stackoverflow.com | \n", + "Corvallis, OR | \n", + "<p>Developer on the StackOverflow team. Find ... | \n", + "25 | \n", + "3 | \n", + "0 | \n", + "2 | \n", + "37.0 | \n", + "NaN | \n", + "
| 2 | \n", + "3 | \n", + "101 | \n", + "2010-07-19 15:34:50 | \n", + "Jarrod Dixon | \n", + "2014-08-08 06:42:58 | \n", + "http://stackoverflow.com | \n", + "New York, NY | \n", + "<p><a href=\"http://blog.stackoverflow.com/2009... | \n", + "22 | \n", + "19 | \n", + "0 | \n", + "3 | \n", + "35.0 | \n", + "NaN | \n", + "
| 3 | \n", + "4 | \n", + "101 | \n", + "2010-07-19 19:03:27 | \n", + "Emmett | \n", + "2014-01-02 09:31:02 | \n", + "http://minesweeperonline.com | \n", + "San Francisco, CA | \n", + "<p>currently at a startup in SF</p>\\r\\n\\r\\n<p>... | \n", + "11 | \n", + "0 | \n", + "0 | \n", + "1998 | \n", + "28.0 | \n", + "http://i.stack.imgur.com/d1oHX.jpg | \n", + "
| 4 | \n", + "5 | \n", + "6792 | \n", + "2010-07-19 19:03:57 | \n", + "Shane | \n", + "2014-08-13 00:23:47 | \n", + "http://www.statalgo.com | \n", + "New York, NY | \n", + "<p>Quantitative researcher focusing on statist... | \n", + "1145 | \n", + "662 | \n", + "5 | \n", + "54503 | \n", + "35.0 | \n", + "NaN | \n", + "
Hi, I'm not really a person.
\\r\\n\\r\\n... 0 5007 \n", + "1
Developer on the StackOverflow team. Find ... 25 3 \n", + "2
\\r\\n\\r\\n... 11 0 \n", + "4
Quantitative researcher focusing on statist... 1145 662 \n", + "\n", + " DownVotes AccountId Age ProfileImageUrl \n", + "0 1920 -1 NaN NaN \n", + "1 0 2 37.0 NaN \n", + "2 0 3 35.0 NaN \n", + "3 0 1998 28.0 http://i.stack.imgur.com/d1oHX.jpg \n", + "4 5 54503 35.0 NaN " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users_df = pd.read_csv('users_table.csv')\n", + "users_df.head()" + ] }, { "cell_type": "markdown", @@ -37,10 +204,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "users_df.rename(columns = {\"Id\" : \"userId\"}, inplace = True)" + ] }, { "cell_type": "markdown", @@ -51,10 +220,234 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | Id | \n", + "PostTypeId | \n", + "AcceptedAnswerId | \n", + "CreaionDate | \n", + "Score | \n", + "ViewCount | \n", + "Body | \n", + "OwnerUserId | \n", + "LasActivityDate | \n", + "Title | \n", + "... | \n", + "AnswerCount | \n", + "CommentCount | \n", + "FavoriteCount | \n", + "LastEditorUserId | \n", + "LastEditDate | \n", + "CommunityOwnedDate | \n", + "ParentId | \n", + "ClosedDate | \n", + "OwnerDisplayName | \n", + "LastEditorDisplayName | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "1 | \n", + "15.0 | \n", + "2010-07-19 19:12:12 | \n", + "23 | \n", + "1278.0 | \n", + "<p>How should I elicit prior distributions fro... | \n", + "8.0 | \n", + "2010-09-15 21:08:26 | \n", + "Eliciting priors from experts | \n", + "... | \n", + "5.0 | \n", + "1 | \n", + "14.0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
| 1 | \n", + "2 | \n", + "1 | \n", + "59.0 | \n", + "2010-07-19 19:12:57 | \n", + "22 | \n", + "8198.0 | \n", + "<p>In many different statistical methods there... | \n", + "24.0 | \n", + "2012-11-12 09:21:54 | \n", + "What is normality? | \n", + "... | \n", + "7.0 | \n", + "1 | \n", + "8.0 | \n", + "88.0 | \n", + "2010-08-07 17:56:44 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
| 2 | \n", + "3 | \n", + "1 | \n", + "5.0 | \n", + "2010-07-19 19:13:28 | \n", + "54 | \n", + "3613.0 | \n", + "<p>What are some valuable Statistical Analysis... | \n", + "18.0 | \n", + "2013-05-27 14:48:36 | \n", + "What are some valuable Statistical Analysis op... | \n", + "... | \n", + "19.0 | \n", + "4 | \n", + "36.0 | \n", + "183.0 | \n", + "2011-02-12 05:50:03 | \n", + "2010-07-19 19:13:28 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
| 3 | \n", + "4 | \n", + "1 | \n", + "135.0 | \n", + "2010-07-19 19:13:31 | \n", + "13 | \n", + "5224.0 | \n", + "<p>I have two groups of data. Each with a dif... | \n", + "23.0 | \n", + "2010-09-08 03:00:19 | \n", + "Assessing the significance of differences in d... | \n", + "... | \n", + "5.0 | \n", + "2 | \n", + "2.0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
| 4 | \n", + "5 | \n", + "2 | \n", + "NaN | \n", + "2010-07-19 19:14:43 | \n", + "81 | \n", + "NaN | \n", + "<p>The R-project</p>\\n\\n<p><a href=\"http://www... | \n", + "23.0 | \n", + "2010-07-19 19:21:15 | \n", + "NaN | \n", + "... | \n", + "NaN | \n", + "3 | \n", + "NaN | \n", + "23.0 | \n", + "2010-07-19 19:21:15 | \n", + "2010-07-19 19:14:43 | \n", + "3.0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
5 rows × 21 columns
\n", + "How should I elicit prior distributions fro... 8.0 \n", + "1
In many different statistical methods there... 24.0 \n", + "2
What are some valuable Statistical Analysis... 18.0 \n", + "3
I have two groups of data. Each with a dif... 23.0 \n", + "4
The R-project
\\n\\n| \n", + " | postId | \n", + "Score | \n", + "userId | \n", + "ViewCount | \n", + "CommentCount | \n", + "
|---|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "23 | \n", + "8.0 | \n", + "1278.0 | \n", + "1 | \n", + "
| 1 | \n", + "2 | \n", + "22 | \n", + "24.0 | \n", + "8198.0 | \n", + "1 | \n", + "
| 2 | \n", + "3 | \n", + "54 | \n", + "18.0 | \n", + "3613.0 | \n", + "4 | \n", + "
| 3 | \n", + "4 | \n", + "13 | \n", + "23.0 | \n", + "5224.0 | \n", + "2 | \n", + "
| 4 | \n", + "5 | \n", + "81 | \n", + "23.0 | \n", + "NaN | \n", + "3 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 39995 | \n", + "48321 | \n", + "0 | \n", + "19966.0 | \n", + "NaN | \n", + "0 | \n", + "
| 39996 | \n", + "48322 | \n", + "3 | \n", + "892.0 | \n", + "NaN | \n", + "2 | \n", + "
| 39997 | \n", + "48323 | \n", + "1 | \n", + "2020.0 | \n", + "NaN | \n", + "0 | \n", + "
| 39998 | \n", + "48324 | \n", + "3 | \n", + "19914.0 | \n", + "NaN | \n", + "0 | \n", + "
| 39999 | \n", + "48325 | \n", + "-1 | \n", + "19968.0 | \n", + "116.0 | \n", + "4 | \n", + "
40000 rows × 5 columns
\n", + "" + ], + "text/plain": [ + " postId Score userId ViewCount CommentCount\n", + "0 1 23 8.0 1278.0 1\n", + "1 2 22 24.0 8198.0 1\n", + "2 3 54 18.0 3613.0 4\n", + "3 4 13 23.0 5224.0 2\n", + "4 5 81 23.0 NaN 3\n", + "... ... ... ... ... ...\n", + "39995 48321 0 19966.0 NaN 0\n", + "39996 48322 3 892.0 NaN 2\n", + "39997 48323 1 2020.0 NaN 0\n", + "39998 48324 3 19914.0 NaN 0\n", + "39999 48325 -1 19968.0 116.0 4\n", + "\n", + "[40000 rows x 5 columns]" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users = users_df[[\"userId\", \"Reputation\", \"Views\", \"UpVotes\", \"DownVotes\"]].copy()\n", + "posts = posts_df[[\"postId\", \"Score\", \"userId\", \"ViewCount\", \"CommentCount\"]].copy()\n", + "print(users)\n", + "posts" + ] }, { "cell_type": "markdown", @@ -96,10 +685,481 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 66, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "| \n", + " | userId | \n", + "Reputation | \n", + "Views | \n", + "UpVotes | \n", + "DownVotes | \n", + "postId | \n", + "Score | \n", + "ViewCount | \n", + "CommentCount | \n", + "
|---|---|---|---|---|---|---|---|---|---|
| 38932 | \n", + "27581 | \n", + "1159 | \n", + "112 | \n", + "186 | \n", + "12 | \n", + "33250 | \n", + "0 | \n", + "NaN | \n", + "0 | \n", + "
| 38933 | \n", + "27581 | \n", + "1159 | \n", + "112 | \n", + "186 | \n", + "12 | \n", + "40831 | \n", + "0 | \n", + "NaN | \n", + "0 | \n", + "
| 38934 | \n", + "27581 | \n", + "1159 | \n", + "112 | \n", + "186 | \n", + "12 | \n", + "40833 | \n", + "0 | \n", + "NaN | \n", + "0 | \n", + "
| 38935 | \n", + "30893 | \n", + "116 | \n", + "2 | \n", + "0 | \n", + "0 | \n", + "19444 | \n", + "3 | \n", + "61.0 | \n", + "1 | \n", + "
| 38936 | \n", + "31017 | \n", + "160 | \n", + "6 | \n", + "6 | \n", + "0 | \n", + "46345 | \n", + "4 | \n", + "3221.0 | \n", + "1 | \n", + "
| 38937 | \n", + "31124 | \n", + "166 | \n", + "5 | \n", + "3 | \n", + "0 | \n", + "14544 | \n", + "6 | \n", + "NaN | \n", + "2 | \n", + "
| 38938 | \n", + "31388 | \n", + "11 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "38872 | \n", + "1 | \n", + "NaN | \n", + "0 | \n", + "
| 38939 | \n", + "32036 | \n", + "7227 | \n", + "991 | \n", + "2664 | \n", + "143 | \n", + "28258 | \n", + "0 | \n", + "NaN | \n", + "0 | \n", + "
| 38940 | \n", + "35234 | \n", + "171 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "23445 | \n", + "14 | \n", + "840.0 | \n", + "0 | \n", + "
| 38941 | \n", + "35705 | \n", + "16 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "19577 | \n", + "3 | \n", + "409.0 | \n", + "0 | \n", + "
| 38942 | \n", + "35921 | \n", + "113 | \n", + "2 | \n", + "1 | \n", + "0 | \n", + "16397 | \n", + "-2 | \n", + "NaN | \n", + "1 | \n", + "
| 38943 | \n", + "36602 | \n", + "121 | \n", + "0 | \n", + "2 | \n", + "0 | \n", + "27672 | \n", + "4 | \n", + "2244.0 | \n", + "1 | \n", + "
| 38944 | \n", + "37890 | \n", + "111 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "41240 | \n", + "1 | \n", + "NaN | \n", + "3 | \n", + "
| 38945 | \n", + "40342 | \n", + "121 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "32863 | \n", + "4 | \n", + "144.0 | \n", + "6 | \n", + "
| 38946 | \n", + "42201 | \n", + "131 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "10683 | \n", + "3 | \n", + "NaN | \n", + "0 | \n", + "
| 38947 | \n", + "43908 | \n", + "384 | \n", + "77 | \n", + "18 | \n", + "0 | \n", + "24072 | \n", + "2 | \n", + "2361.0 | \n", + "4 | \n", + "
| 38948 | \n", + "43908 | \n", + "384 | \n", + "77 | \n", + "18 | \n", + "0 | \n", + "24107 | \n", + "0 | \n", + "551.0 | \n", + "5 | \n", + "
| 38949 | \n", + "43908 | \n", + "384 | \n", + "77 | \n", + "18 | \n", + "0 | \n", + "25702 | \n", + "1 | \n", + "228.0 | \n", + "0 | \n", + "
| 38950 | \n", + "43908 | \n", + "384 | \n", + "77 | \n", + "18 | \n", + "0 | \n", + "25811 | \n", + "47 | \n", + "4463.0 | \n", + "11 | \n", + "
| 38951 | \n", + "43908 | \n", + "384 | \n", + "77 | \n", + "18 | \n", + "0 | \n", + "26326 | \n", + "3 | \n", + "820.0 | \n", + "6 | \n", + "
| 38952 | \n", + "43908 | \n", + "384 | \n", + "77 | \n", + "18 | \n", + "0 | \n", + "26469 | \n", + "0 | \n", + "124.0 | \n", + "0 | \n", + "
| 38953 | \n", + "43908 | \n", + "384 | \n", + "77 | \n", + "18 | \n", + "0 | \n", + "26927 | \n", + "1 | \n", + "2259.0 | \n", + "0 | \n", + "
| 38954 | \n", + "44618 | \n", + "128 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "21104 | \n", + "3 | \n", + "3650.0 | \n", + "6 | \n", + "
| 38955 | \n", + "44915 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "34012 | \n", + "0 | \n", + "81.0 | \n", + "0 | \n", + "
| 38956 | \n", + "44995 | \n", + "11 | \n", + "9 | \n", + "0 | \n", + "0 | \n", + "44474 | \n", + "1 | \n", + "446.0 | \n", + "1 | \n", + "
| 38957 | \n", + "45934 | \n", + "11 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "34003 | \n", + "1 | \n", + "115.0 | \n", + "2 | \n", + "
| 38958 | \n", + "46192 | \n", + "36 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "40667 | \n", + "5 | \n", + "326.0 | \n", + "2 | \n", + "
| 38959 | \n", + "46522 | \n", + "235 | \n", + "13 | \n", + "27 | \n", + "1 | \n", + "17461 | \n", + "3 | \n", + "166.0 | \n", + "0 | \n", + "
| 38960 | \n", + "52371 | \n", + "221 | \n", + "2 | \n", + "0 | \n", + "0 | \n", + "27237 | \n", + "24 | \n", + "3357.0 | \n", + "5 | \n", + "
| 38961 | \n", + "55226 | \n", + "119 | \n", + "2 | \n", + "3 | \n", + "0 | \n", + "16176 | \n", + "1 | \n", + "NaN | \n", + "0 | \n", + "