diff --git a/your-code/main.ipynb b/your-code/main.ipynb index bad6d94..dcab503 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -9,10 +9,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import pandas as pd" + ] }, { "cell_type": "markdown", @@ -23,10 +25,325 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 55, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "users = pd.read_csv('users_table.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdReputationCreationDateDisplayNameLastAccessDateWebsiteUrlLocationAboutMeViewsUpVotesDownVotesAccountIdAgeProfileImageUrl
0-112010-07-19 06:55:26Community2010-07-19 06:55:26http://meta.stackexchange.com/on the server farm<p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>...050071920-1NaNNaN
121012010-07-19 14:01:36Geoff Dalgas2013-11-12 22:07:23http://stackoverflow.comCorvallis, OR<p>Developer on the StackOverflow team. Find ...2530237.0NaN
231012010-07-19 15:34:50Jarrod Dixon2014-08-08 06:42:58http://stackoverflow.comNew York, NY<p><a href=\"http://blog.stackoverflow.com/2009...22190335.0NaN
341012010-07-19 19:03:27Emmett2014-01-02 09:31:02http://minesweeperonline.comSan Francisco, CA<p>currently at a startup in SF</p>\\r\\n\\r\\n<p>...1100199828.0http://i.stack.imgur.com/d1oHX.jpg
4567922010-07-19 19:03:57Shane2014-08-13 00:23:47http://www.statalgo.comNew York, NY<p>Quantitative researcher focusing on statist...114566255450335.0NaN
.............................................
403205574312014-09-13 21:03:50AussieMeg2014-09-13 21:18:52NaNNaNNaN0005026902NaNhttp://graph.facebook.com/665821703/picture?ty...
403215574462014-09-13 21:39:30Mia Maria2014-09-13 21:39:30NaNNaNNaN1005026998NaNNaN
40322557451012014-09-13 23:45:27tronbabylove2014-09-13 23:45:27NaNUnited StatesNaN000481766NaNhttps://www.gravatar.com/avatar/faa7a3fdbd8308...
40323557461062014-09-14 00:29:41GPP2014-09-14 02:05:17NaNNaN<p>Stats noobie, product, marketing &amp; medi...100976289NaNhttps://www.gravatar.com/avatar/6d9e9fa6b783a3...
403245574712014-09-14 01:01:44Shivam Agrawal2014-09-14 01:19:04NaNIndia<p>Maths Enthusiast </p>\\r\\n0005027354NaNhttps://lh4.googleusercontent.com/-ZsXhwVaFmiY...
\n", + "

40325 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " Id Reputation CreationDate DisplayName \\\n", + "0 -1 1 2010-07-19 06:55:26 Community \n", + "1 2 101 2010-07-19 14:01:36 Geoff Dalgas \n", + "2 3 101 2010-07-19 15:34:50 Jarrod Dixon \n", + "3 4 101 2010-07-19 19:03:27 Emmett \n", + "4 5 6792 2010-07-19 19:03:57 Shane \n", + "... ... ... ... ... \n", + "40320 55743 1 2014-09-13 21:03:50 AussieMeg \n", + "40321 55744 6 2014-09-13 21:39:30 Mia Maria \n", + "40322 55745 101 2014-09-13 23:45:27 tronbabylove \n", + "40323 55746 106 2014-09-14 00:29:41 GPP \n", + "40324 55747 1 2014-09-14 01:01:44 Shivam Agrawal \n", + "\n", + " LastAccessDate WebsiteUrl \\\n", + "0 2010-07-19 06:55:26 http://meta.stackexchange.com/ \n", + "1 2013-11-12 22:07:23 http://stackoverflow.com \n", + "2 2014-08-08 06:42:58 http://stackoverflow.com \n", + "3 2014-01-02 09:31:02 http://minesweeperonline.com \n", + "4 2014-08-13 00:23:47 http://www.statalgo.com \n", + "... ... ... \n", + "40320 2014-09-13 21:18:52 NaN \n", + "40321 2014-09-13 21:39:30 NaN \n", + "40322 2014-09-13 23:45:27 NaN \n", + "40323 2014-09-14 02:05:17 NaN \n", + "40324 2014-09-14 01:19:04 NaN \n", + "\n", + " Location AboutMe \\\n", + "0 on the server farm

Hi, I'm not really a person.

\\r\\n\\r\\n

... \n", + "1 Corvallis, OR

Developer on the StackOverflow team. Find ... \n", + "2 New York, NY

currently at a startup in SF

\\r\\n\\r\\n

... \n", + "4 New York, NY

Quantitative researcher focusing on statist... \n", + "... ... ... \n", + "40320 NaN NaN \n", + "40321 NaN NaN \n", + "40322 United States NaN \n", + "40323 NaN

Stats noobie, product, marketing & medi... \n", + "40324 India

Maths Enthusiast

\\r\\n \n", + "\n", + " Views UpVotes DownVotes AccountId Age \\\n", + "0 0 5007 1920 -1 NaN \n", + "1 25 3 0 2 37.0 \n", + "2 22 19 0 3 35.0 \n", + "3 11 0 0 1998 28.0 \n", + "4 1145 662 5 54503 35.0 \n", + "... ... ... ... ... ... \n", + "40320 0 0 0 5026902 NaN \n", + "40321 1 0 0 5026998 NaN \n", + "40322 0 0 0 481766 NaN \n", + "40323 1 0 0 976289 NaN \n", + "40324 0 0 0 5027354 NaN \n", + "\n", + " ProfileImageUrl \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 http://i.stack.imgur.com/d1oHX.jpg \n", + "4 NaN \n", + "... ... \n", + "40320 http://graph.facebook.com/665821703/picture?ty... \n", + "40321 NaN \n", + "40322 https://www.gravatar.com/avatar/faa7a3fdbd8308... \n", + "40323 https://www.gravatar.com/avatar/6d9e9fa6b783a3... \n", + "40324 https://lh4.googleusercontent.com/-ZsXhwVaFmiY... \n", + "\n", + "[40325 rows x 14 columns]" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users" + ] }, { "cell_type": "markdown", @@ -37,10 +354,317 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userIdReputationCreationDateDisplayNameLastAccessDateWebsiteUrlLocationAboutMeViewsUpVotesDownVotesAccountIdAgeProfileImageUrl
0-112010-07-19 06:55:26Community2010-07-19 06:55:26http://meta.stackexchange.com/on the server farm<p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>...050071920-1NaNNaN
121012010-07-19 14:01:36Geoff Dalgas2013-11-12 22:07:23http://stackoverflow.comCorvallis, OR<p>Developer on the StackOverflow team. Find ...2530237.0NaN
231012010-07-19 15:34:50Jarrod Dixon2014-08-08 06:42:58http://stackoverflow.comNew York, NY<p><a href=\"http://blog.stackoverflow.com/2009...22190335.0NaN
341012010-07-19 19:03:27Emmett2014-01-02 09:31:02http://minesweeperonline.comSan Francisco, CA<p>currently at a startup in SF</p>\\r\\n\\r\\n<p>...1100199828.0http://i.stack.imgur.com/d1oHX.jpg
4567922010-07-19 19:03:57Shane2014-08-13 00:23:47http://www.statalgo.comNew York, NY<p>Quantitative researcher focusing on statist...114566255450335.0NaN
.............................................
403205574312014-09-13 21:03:50AussieMeg2014-09-13 21:18:52NaNNaNNaN0005026902NaNhttp://graph.facebook.com/665821703/picture?ty...
403215574462014-09-13 21:39:30Mia Maria2014-09-13 21:39:30NaNNaNNaN1005026998NaNNaN
40322557451012014-09-13 23:45:27tronbabylove2014-09-13 23:45:27NaNUnited StatesNaN000481766NaNhttps://www.gravatar.com/avatar/faa7a3fdbd8308...
40323557461062014-09-14 00:29:41GPP2014-09-14 02:05:17NaNNaN<p>Stats noobie, product, marketing &amp; medi...100976289NaNhttps://www.gravatar.com/avatar/6d9e9fa6b783a3...
403245574712014-09-14 01:01:44Shivam Agrawal2014-09-14 01:19:04NaNIndia<p>Maths Enthusiast </p>\\r\\n0005027354NaNhttps://lh4.googleusercontent.com/-ZsXhwVaFmiY...
\n", + "

40325 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " userId Reputation CreationDate DisplayName \\\n", + "0 -1 1 2010-07-19 06:55:26 Community \n", + "1 2 101 2010-07-19 14:01:36 Geoff Dalgas \n", + "2 3 101 2010-07-19 15:34:50 Jarrod Dixon \n", + "3 4 101 2010-07-19 19:03:27 Emmett \n", + "4 5 6792 2010-07-19 19:03:57 Shane \n", + "... ... ... ... ... \n", + "40320 55743 1 2014-09-13 21:03:50 AussieMeg \n", + "40321 55744 6 2014-09-13 21:39:30 Mia Maria \n", + "40322 55745 101 2014-09-13 23:45:27 tronbabylove \n", + "40323 55746 106 2014-09-14 00:29:41 GPP \n", + "40324 55747 1 2014-09-14 01:01:44 Shivam Agrawal \n", + "\n", + " LastAccessDate WebsiteUrl \\\n", + "0 2010-07-19 06:55:26 http://meta.stackexchange.com/ \n", + "1 2013-11-12 22:07:23 http://stackoverflow.com \n", + "2 2014-08-08 06:42:58 http://stackoverflow.com \n", + "3 2014-01-02 09:31:02 http://minesweeperonline.com \n", + "4 2014-08-13 00:23:47 http://www.statalgo.com \n", + "... ... ... \n", + "40320 2014-09-13 21:18:52 NaN \n", + "40321 2014-09-13 21:39:30 NaN \n", + "40322 2014-09-13 23:45:27 NaN \n", + "40323 2014-09-14 02:05:17 NaN \n", + "40324 2014-09-14 01:19:04 NaN \n", + "\n", + " Location AboutMe \\\n", + "0 on the server farm

Hi, I'm not really a person.

\\r\\n\\r\\n

... \n", + "1 Corvallis, OR

Developer on the StackOverflow team. Find ... \n", + "2 New York, NY

currently at a startup in SF

\\r\\n\\r\\n

... \n", + "4 New York, NY

Quantitative researcher focusing on statist... \n", + "... ... ... \n", + "40320 NaN NaN \n", + "40321 NaN NaN \n", + "40322 United States NaN \n", + "40323 NaN

Stats noobie, product, marketing & medi... \n", + "40324 India

Maths Enthusiast

\\r\\n \n", + "\n", + " Views UpVotes DownVotes AccountId Age \\\n", + "0 0 5007 1920 -1 NaN \n", + "1 25 3 0 2 37.0 \n", + "2 22 19 0 3 35.0 \n", + "3 11 0 0 1998 28.0 \n", + "4 1145 662 5 54503 35.0 \n", + "... ... ... ... ... ... \n", + "40320 0 0 0 5026902 NaN \n", + "40321 1 0 0 5026998 NaN \n", + "40322 0 0 0 481766 NaN \n", + "40323 1 0 0 976289 NaN \n", + "40324 0 0 0 5027354 NaN \n", + "\n", + " ProfileImageUrl \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 http://i.stack.imgur.com/d1oHX.jpg \n", + "4 NaN \n", + "... ... \n", + "40320 http://graph.facebook.com/665821703/picture?ty... \n", + "40321 NaN \n", + "40322 https://www.gravatar.com/avatar/faa7a3fdbd8308... \n", + "40323 https://www.gravatar.com/avatar/6d9e9fa6b783a3... \n", + "40324 https://lh4.googleusercontent.com/-ZsXhwVaFmiY... \n", + "\n", + "[40325 rows x 14 columns]" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users.rename(columns={'Id': 'userId'}, inplace=True)\n", + "users\n" + ] }, { "cell_type": "markdown", @@ -51,40 +675,785 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 58, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdPostTypeIdAcceptedAnswerIdCreaionDateScoreViewCountBodyOwnerUserIdLasActivityDateTitle...AnswerCountCommentCountFavoriteCountLastEditorUserIdLastEditDateCommunityOwnedDateParentIdClosedDateOwnerDisplayNameLastEditorDisplayName
01115.02010-07-19 19:12:12231278.0<p>How should I elicit prior distributions fro...8.02010-09-15 21:08:26Eliciting priors from experts...5.0114.0NaNNaNNaNNaNNaNNaNNaN
12159.02010-07-19 19:12:57228198.0<p>In many different statistical methods there...24.02012-11-12 09:21:54What is normality?...7.018.088.02010-08-07 17:56:44NaNNaNNaNNaNNaN
2315.02010-07-19 19:13:28543613.0<p>What are some valuable Statistical Analysis...18.02013-05-27 14:48:36What are some valuable Statistical Analysis op......19.0436.0183.02011-02-12 05:50:032010-07-19 19:13:28NaNNaNNaNNaN
341135.02010-07-19 19:13:31135224.0<p>I have two groups of data. Each with a dif...23.02010-09-08 03:00:19Assessing the significance of differences in d......5.022.0NaNNaNNaNNaNNaNNaNNaN
452NaN2010-07-19 19:14:4381NaN<p>The R-project</p>\\n\\n<p><a href=\"http://www...23.02010-07-19 19:21:15NaN...NaN3NaN23.02010-07-19 19:21:152010-07-19 19:14:433.0NaNNaNNaN
..................................................................
39995483212NaN2013-01-23 09:00:010NaN<p>you can use the matlab codes for svm and co...19966.02013-01-23 09:00:01NaN...NaN0NaNNaNNaNNaN45118.0NaNNaNNaN
39996483222NaN2013-01-23 09:09:343NaN<p>I use <a href=\"http://www.gnu.org/software/...892.02013-01-23 13:13:30NaN...NaN2NaN892.02013-01-23 13:13:30NaN48311.0NaNNaNNaN
39997483232NaN2013-01-23 09:16:441NaN<p>If I understand your question correctly, yo...2020.02013-01-23 09:16:44NaN...NaN0NaNNaNNaNNaN48247.0NaNNaNNaN
39998483242NaN2013-01-23 09:36:073NaN<p>Doesn't really help you with your question,...19914.02013-01-23 09:36:07NaN...NaN0NaNNaNNaNNaN48297.0NaNNaNNaN
39999483251NaN2013-01-23 09:44:07-1116.0<p>I have 10 vectors each having 100,000 point...19968.02013-02-22 11:23:54are data sets obtained from a Normal distribut......2.04NaNNaNNaNNaNNaNNaNNaNNaN
\n", + "

40000 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " Id PostTypeId AcceptedAnswerId CreaionDate Score \\\n", + "0 1 1 15.0 2010-07-19 19:12:12 23 \n", + "1 2 1 59.0 2010-07-19 19:12:57 22 \n", + "2 3 1 5.0 2010-07-19 19:13:28 54 \n", + "3 4 1 135.0 2010-07-19 19:13:31 13 \n", + "4 5 2 NaN 2010-07-19 19:14:43 81 \n", + "... ... ... ... ... ... \n", + "39995 48321 2 NaN 2013-01-23 09:00:01 0 \n", + "39996 48322 2 NaN 2013-01-23 09:09:34 3 \n", + "39997 48323 2 NaN 2013-01-23 09:16:44 1 \n", + "39998 48324 2 NaN 2013-01-23 09:36:07 3 \n", + "39999 48325 1 NaN 2013-01-23 09:44:07 -1 \n", + "\n", + " ViewCount Body \\\n", + "0 1278.0

How should I elicit prior distributions fro... \n", + "1 8198.0

In many different statistical methods there... \n", + "2 3613.0

What are some valuable Statistical Analysis... \n", + "3 5224.0

I have two groups of data. Each with a dif... \n", + "4 NaN

The R-project

\\n\\n

you can use the matlab codes for svm and co... \n", + "39996 NaN

I use If I understand your question correctly, yo... \n", + "39998 NaN

Doesn't really help you with your question,... \n", + "39999 116.0

I have 10 vectors each having 100,000 point... \n", + "\n", + " OwnerUserId LasActivityDate \\\n", + "0 8.0 2010-09-15 21:08:26 \n", + "1 24.0 2012-11-12 09:21:54 \n", + "2 18.0 2013-05-27 14:48:36 \n", + "3 23.0 2010-09-08 03:00:19 \n", + "4 23.0 2010-07-19 19:21:15 \n", + "... ... ... \n", + "39995 19966.0 2013-01-23 09:00:01 \n", + "39996 892.0 2013-01-23 13:13:30 \n", + "39997 2020.0 2013-01-23 09:16:44 \n", + "39998 19914.0 2013-01-23 09:36:07 \n", + "39999 19968.0 2013-02-22 11:23:54 \n", + "\n", + " Title ... AnswerCount \\\n", + "0 Eliciting priors from experts ... 5.0 \n", + "1 What is normality? ... 7.0 \n", + "2 What are some valuable Statistical Analysis op... ... 19.0 \n", + "3 Assessing the significance of differences in d... ... 5.0 \n", + "4 NaN ... NaN \n", + "... ... ... ... \n", + "39995 NaN ... NaN \n", + "39996 NaN ... NaN \n", + "39997 NaN ... NaN \n", + "39998 NaN ... NaN \n", + "39999 are data sets obtained from a Normal distribut... ... 2.0 \n", + "\n", + " CommentCount FavoriteCount LastEditorUserId LastEditDate \\\n", + "0 1 14.0 NaN NaN \n", + "1 1 8.0 88.0 2010-08-07 17:56:44 \n", + "2 4 36.0 183.0 2011-02-12 05:50:03 \n", + "3 2 2.0 NaN NaN \n", + "4 3 NaN 23.0 2010-07-19 19:21:15 \n", + "... ... ... ... ... \n", + "39995 0 NaN NaN NaN \n", + "39996 2 NaN 892.0 2013-01-23 13:13:30 \n", + "39997 0 NaN NaN NaN \n", + "39998 0 NaN NaN NaN \n", + "39999 4 NaN NaN NaN \n", + "\n", + " CommunityOwnedDate ParentId ClosedDate OwnerDisplayName \\\n", + "0 NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN \n", + "2 2010-07-19 19:13:28 NaN NaN NaN \n", + "3 NaN NaN NaN NaN \n", + "4 2010-07-19 19:14:43 3.0 NaN NaN \n", + "... ... ... ... ... \n", + "39995 NaN 45118.0 NaN NaN \n", + "39996 NaN 48311.0 NaN NaN \n", + "39997 NaN 48247.0 NaN NaN \n", + "39998 NaN 48297.0 NaN NaN \n", + "39999 NaN NaN NaN NaN \n", + "\n", + " LastEditorDisplayName \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "39995 NaN \n", + "39996 NaN \n", + "39997 NaN \n", + "39998 NaN \n", + "39999 NaN \n", + "\n", + "[40000 rows x 21 columns]" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "posts = pd.read_csv('posts_table.csv')\n", + "posts" + ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 27, "metadata": {}, + "outputs": [], "source": [ "#### 5. Rename Id column to postId and OwnerUserId to userId" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "posts.rename(columns={'Id': 'postId', 'OwnerUserId': 'userId'}, inplace=True)\n" + ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "#### 6. Define new dataframes for users and posts with the following selected columns:\n", + "# 6. Define new dataframes for users and posts with the following selected columns:\n", " **users columns**: userId, Reputation,Views,UpVotes,DownVotes\n", " **posts columns**: postId, Score,userId,ViewCount,CommentCount" ] }, + { + "cell_type": "raw", + "metadata": {}, + "source": [] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 71, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "users_new = users.loc[:, ['userId', 'Reputation', 'Views', 'UpVotes', 'DownVotes']]" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "posts_new = posts.loc[:, ['postId', 'Score', 'userId', 'ViewCount', 'CommentCount']]" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userIdReputationViewsUpVotesDownVotes
0-11050071920
121012530
2310122190
341011100
45679211456625
..................
40320557431000
40321557446100
4032255745101000
4032355746106100
40324557471000
\n", + "

40325 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " userId Reputation Views UpVotes DownVotes\n", + "0 -1 1 0 5007 1920\n", + "1 2 101 25 3 0\n", + "2 3 101 22 19 0\n", + "3 4 101 11 0 0\n", + "4 5 6792 1145 662 5\n", + "... ... ... ... ... ...\n", + "40320 55743 1 0 0 0\n", + "40321 55744 6 1 0 0\n", + "40322 55745 101 0 0 0\n", + "40323 55746 106 1 0 0\n", + "40324 55747 1 0 0 0\n", + "\n", + "[40325 rows x 5 columns]" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users_new" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postIdScoreuserIdViewCountCommentCount
01238.01278.01
122224.08198.01
235418.03613.04
341323.05224.02
458123.0NaN3
..................
3999548321019966.0NaN0
39996483223892.0NaN2
399974832312020.0NaN0
3999848324319914.0NaN0
3999948325-119968.0116.04
\n", + "

40000 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " postId Score userId ViewCount CommentCount\n", + "0 1 23 8.0 1278.0 1\n", + "1 2 22 24.0 8198.0 1\n", + "2 3 54 18.0 3613.0 4\n", + "3 4 13 23.0 5224.0 2\n", + "4 5 81 23.0 NaN 3\n", + "... ... ... ... ... ...\n", + "39995 48321 0 19966.0 NaN 0\n", + "39996 48322 3 892.0 NaN 2\n", + "39997 48323 1 2020.0 NaN 0\n", + "39998 48324 3 19914.0 NaN 0\n", + "39999 48325 -1 19968.0 116.0 4\n", + "\n", + "[40000 rows x 5 columns]" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "posts_new" + ] }, { "cell_type": "markdown", @@ -96,10 +1465,429 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 83, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userIdReputationCreationDateDisplayNameLastAccessDateWebsiteUrlLocationAboutMeViewsUpVotes...AnswerCountCommentCountFavoriteCountLastEditorUserIdLastEditDateCommunityOwnedDateParentIdClosedDateOwnerDisplayNameLastEditorDisplayName
0-112010-07-19 06:55:26Community2010-07-19 06:55:26http://meta.stackexchange.com/on the server farm<p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>...05007...NaN0NaN-1.02014-04-23 13:43:43NaNNaNNaNNaNNaN
123567922010-07-19 19:03:57Shane2014-08-13 00:23:47http://www.statalgo.comNew York, NY<p>Quantitative researcher focusing on statist...1145662...15.05137.022047.02013-06-07 06:38:102010-08-09 13:05:50NaNNaNNaNNaN
24064572010-07-19 19:04:07Harlan2014-08-07 19:49:44http://www.harlan.harris.nameDistrict of Columbia<ul>\\r\\n<li>PhD in CS/AI/Machine Learning/Cogn...11447...NaN0NaNNaNNaNNaN1.0NaNNaNNaN
25274292010-07-19 19:04:37Vince2014-09-10 21:14:12http://bioinformatics.ucdavis.eduDavis, CAI'm a recent graduate of UC Davis in Economics...5620...7.027.088.02010-10-19 06:42:192011-06-02 11:09:11NaNNaNNaNNaN
254867642010-07-19 19:04:52csgillespie2014-09-09 21:15:08http://www.mas.ncl.ac.uk/~ncsg3/Newcastle, United Kingdom<p>I'm a statistics lecturer at Newcastle Univ...1089604...5.0114.0NaNNaNNaNNaNNaNNaNNaN
..................................................................
3895745934112014-05-21 17:13:23jasonweiyi2014-06-08 13:36:47NaNNaNNaN10...1.02NaN805.02013-05-13 04:41:41NaNNaNNaNjasonweiyiNaN
3895846192362014-05-26 15:29:30user17387532014-09-10 19:52:34NaNNaNNaN10...1.021.0930.02012-10-18 15:07:40NaNNaNNaNuser1738753NaN
38959465222352014-06-01 17:25:18Andy Blankertz2014-09-13 18:03:07NaNNaN<p>Actuary for a life insurance company.</p>\\r\\n1327...3.00NaNNaNNaNNaNNaNNaNuser1009703NaN
38960523712212014-07-19 13:36:58Karel Petranek2014-07-20 09:32:16NaNNaNNaN20...7.058.0NaNNaN2014-07-30 20:09:14NaNNaNdark_charlieNaN
38961552261192014-09-04 07:34:48Klas Lindbäck2014-09-08 11:07:33http://N/ASweden<p>Working with a wide range of languages, but...23...NaN0NaNNaNNaNNaN16174.0NaNKlas LindbäckNaN
\n", + "

8138 rows × 34 columns

\n", + "
" + ], + "text/plain": [ + " userId Reputation CreationDate DisplayName \\\n", + "0 -1 1 2010-07-19 06:55:26 Community \n", + "123 5 6792 2010-07-19 19:03:57 Shane \n", + "240 6 457 2010-07-19 19:04:07 Harlan \n", + "252 7 429 2010-07-19 19:04:37 Vince \n", + "254 8 6764 2010-07-19 19:04:52 csgillespie \n", + "... ... ... ... ... \n", + "38957 45934 11 2014-05-21 17:13:23 jasonweiyi \n", + "38958 46192 36 2014-05-26 15:29:30 user1738753 \n", + "38959 46522 235 2014-06-01 17:25:18 Andy Blankertz \n", + "38960 52371 221 2014-07-19 13:36:58 Karel Petranek \n", + "38961 55226 119 2014-09-04 07:34:48 Klas Lindbäck \n", + "\n", + " LastAccessDate WebsiteUrl \\\n", + "0 2010-07-19 06:55:26 http://meta.stackexchange.com/ \n", + "123 2014-08-13 00:23:47 http://www.statalgo.com \n", + "240 2014-08-07 19:49:44 http://www.harlan.harris.name \n", + "252 2014-09-10 21:14:12 http://bioinformatics.ucdavis.edu \n", + "254 2014-09-09 21:15:08 http://www.mas.ncl.ac.uk/~ncsg3/ \n", + "... ... ... \n", + "38957 2014-06-08 13:36:47 NaN \n", + "38958 2014-09-10 19:52:34 NaN \n", + "38959 2014-09-13 18:03:07 NaN \n", + "38960 2014-07-20 09:32:16 NaN \n", + "38961 2014-09-08 11:07:33 http://N/A \n", + "\n", + " Location \\\n", + "0 on the server farm \n", + "123 New York, NY \n", + "240 District of Columbia \n", + "252 Davis, CA \n", + "254 Newcastle, United Kingdom \n", + "... ... \n", + "38957 NaN \n", + "38958 NaN \n", + "38959 NaN \n", + "38960 NaN \n", + "38961 Sweden \n", + "\n", + " AboutMe Views UpVotes ... \\\n", + "0

Hi, I'm not really a person.

\\r\\n\\r\\n

... 0 5007 ... \n", + "123

Quantitative researcher focusing on statist... 1145 662 ... \n", + "240