diff --git a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb index 31724c5..d4d08bd 100644 --- a/your-code/.ipynb_checkpoints/main-checkpoint.ipynb +++ b/your-code/.ipynb_checkpoints/main-checkpoint.ipynb @@ -9,10 +9,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import pandas as pd" + ] }, { "cell_type": "markdown", @@ -23,10 +25,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import pymysql\n", + "from sqlalchemy import create_engine" + ] }, { "cell_type": "markdown", @@ -37,10 +42,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "engine = create_engine('mysql+pymysql://guest:relational@relational.fit.cvut.cz')" + ] }, { "cell_type": "markdown", @@ -51,10 +58,327 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "#user_tab = pd.read_sql_query('SEELECT * FROM stats.users', engine)\n", + "user_tab = pd.read_sql_query('SELECT * FROM stats.users', \n", + " engine)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | Id | \n", + "Reputation | \n", + "CreationDate | \n", + "DisplayName | \n", + "LastAccessDate | \n", + "WebsiteUrl | \n", + "Location | \n", + "AboutMe | \n", + "Views | \n", + "UpVotes | \n", + "DownVotes | \n", + "AccountId | \n", + "Age | \n", + "ProfileImageUrl | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "-1 | \n", + "1 | \n", + "2010-07-19 06:55:26 | \n", + "Community | \n", + "2010-07-19 06:55:26 | \n", + "http://meta.stackexchange.com/ | \n", + "on the server farm | \n", + "<p>Hi, I'm not really a person.</p>\\n\\n<p>I'm ... | \n", + "0 | \n", + "5007 | \n", + "1920 | \n", + "-1 | \n", + "NaN | \n", + "None | \n", + "
| 1 | \n", + "2 | \n", + "101 | \n", + "2010-07-19 14:01:36 | \n", + "Geoff Dalgas | \n", + "2013-11-12 22:07:23 | \n", + "http://stackoverflow.com | \n", + "Corvallis, OR | \n", + "<p>Developer on the StackOverflow team. Find ... | \n", + "25 | \n", + "3 | \n", + "0 | \n", + "2 | \n", + "37.0 | \n", + "None | \n", + "
| 2 | \n", + "3 | \n", + "101 | \n", + "2010-07-19 15:34:50 | \n", + "Jarrod Dixon | \n", + "2014-08-08 06:42:58 | \n", + "http://stackoverflow.com | \n", + "New York, NY | \n", + "<p><a href=\"http://blog.stackoverflow.com/2009... | \n", + "22 | \n", + "19 | \n", + "0 | \n", + "3 | \n", + "35.0 | \n", + "None | \n", + "
| 3 | \n", + "4 | \n", + "101 | \n", + "2010-07-19 19:03:27 | \n", + "Emmett | \n", + "2014-01-02 09:31:02 | \n", + "http://minesweeperonline.com | \n", + "San Francisco, CA | \n", + "<p>currently at a startup in SF</p>\\n\\n<p>form... | \n", + "11 | \n", + "0 | \n", + "0 | \n", + "1998 | \n", + "28.0 | \n", + "http://i.stack.imgur.com/d1oHX.jpg | \n", + "
| 4 | \n", + "5 | \n", + "6792 | \n", + "2010-07-19 19:03:57 | \n", + "Shane | \n", + "2014-08-13 00:23:47 | \n", + "http://www.statalgo.com | \n", + "New York, NY | \n", + "<p>Quantitative researcher focusing on statist... | \n", + "1145 | \n", + "662 | \n", + "5 | \n", + "54503 | \n", + "35.0 | \n", + "None | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 40320 | \n", + "55743 | \n", + "1 | \n", + "2014-09-13 21:03:50 | \n", + "AussieMeg | \n", + "2014-09-13 21:18:52 | \n", + "None | \n", + "None | \n", + "None | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "5026902 | \n", + "NaN | \n", + "http://graph.facebook.com/665821703/picture?ty... | \n", + "
| 40321 | \n", + "55744 | \n", + "6 | \n", + "2014-09-13 21:39:30 | \n", + "Mia Maria | \n", + "2014-09-13 21:39:30 | \n", + "None | \n", + "None | \n", + "None | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "5026998 | \n", + "NaN | \n", + "None | \n", + "
| 40322 | \n", + "55745 | \n", + "101 | \n", + "2014-09-13 23:45:27 | \n", + "tronbabylove | \n", + "2014-09-13 23:45:27 | \n", + "None | \n", + "United States | \n", + "None | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "481766 | \n", + "NaN | \n", + "https://www.gravatar.com/avatar/faa7a3fdbd8308... | \n", + "
| 40323 | \n", + "55746 | \n", + "106 | \n", + "2014-09-14 00:29:41 | \n", + "GPP | \n", + "2014-09-14 02:05:17 | \n", + "None | \n", + "None | \n", + "<p>Stats noobie, product, marketing & medi... | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "976289 | \n", + "NaN | \n", + "https://www.gravatar.com/avatar/6d9e9fa6b783a3... | \n", + "
| 40324 | \n", + "55747 | \n", + "1 | \n", + "2014-09-14 01:01:44 | \n", + "Shivam Agrawal | \n", + "2014-09-14 01:19:04 | \n", + "None | \n", + "India | \n", + "<p>Maths Enthusiast </p>\\n | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "5027354 | \n", + "NaN | \n", + "https://lh4.googleusercontent.com/-ZsXhwVaFmiY... | \n", + "
40325 rows × 14 columns
\n", + "Hi, I'm not really a person.
\\n\\nI'm ... 0 5007 \n", + "1
Developer on the StackOverflow team. Find ... 25 3 \n", + "2
\\n\\nform... 11 0 \n", + "4
Quantitative researcher focusing on statist... 1145 662 \n", + "... ... ... ... \n", + "40320 None 0 0 \n", + "40321 None 1 0 \n", + "40322 None 0 0 \n", + "40323
Stats noobie, product, marketing & medi... 1 0 \n", + "40324
Maths Enthusiast
\\n 0 0 \n", + "\n", + " DownVotes AccountId Age \\\n", + "0 1920 -1 NaN \n", + "1 0 2 37.0 \n", + "2 0 3 35.0 \n", + "3 0 1998 28.0 \n", + "4 5 54503 35.0 \n", + "... ... ... ... \n", + "40320 0 5026902 NaN \n", + "40321 0 5026998 NaN \n", + "40322 0 481766 NaN \n", + "40323 0 976289 NaN \n", + "40324 0 5027354 NaN \n", + "\n", + " ProfileImageUrl \n", + "0 None \n", + "1 None \n", + "2 None \n", + "3 http://i.stack.imgur.com/d1oHX.jpg \n", + "4 None \n", + "... ... \n", + "40320 http://graph.facebook.com/665821703/picture?ty... \n", + "40321 None \n", + "40322 https://www.gravatar.com/avatar/faa7a3fdbd8308... \n", + "40323 https://www.gravatar.com/avatar/6d9e9fa6b783a3... \n", + "40324 https://lh4.googleusercontent.com/-ZsXhwVaFmiY... \n", + "\n", + "[40325 rows x 14 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "user_tab" + ] }, { "cell_type": "markdown", @@ -65,10 +389,317 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "| \n", + " | userId | \n", + "Reputation | \n", + "CreationDate | \n", + "DisplayName | \n", + "LastAccessDate | \n", + "WebsiteUrl | \n", + "Location | \n", + "AboutMe | \n", + "Views | \n", + "UpVotes | \n", + "DownVotes | \n", + "AccountId | \n", + "Age | \n", + "ProfileImageUrl | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "-1 | \n", + "1 | \n", + "2010-07-19 06:55:26 | \n", + "Community | \n", + "2010-07-19 06:55:26 | \n", + "http://meta.stackexchange.com/ | \n", + "on the server farm | \n", + "<p>Hi, I'm not really a person.</p>\\n\\n<p>I'm ... | \n", + "0 | \n", + "5007 | \n", + "1920 | \n", + "-1 | \n", + "NaN | \n", + "None | \n", + "
| 1 | \n", + "2 | \n", + "101 | \n", + "2010-07-19 14:01:36 | \n", + "Geoff Dalgas | \n", + "2013-11-12 22:07:23 | \n", + "http://stackoverflow.com | \n", + "Corvallis, OR | \n", + "<p>Developer on the StackOverflow team. Find ... | \n", + "25 | \n", + "3 | \n", + "0 | \n", + "2 | \n", + "37.0 | \n", + "None | \n", + "
| 2 | \n", + "3 | \n", + "101 | \n", + "2010-07-19 15:34:50 | \n", + "Jarrod Dixon | \n", + "2014-08-08 06:42:58 | \n", + "http://stackoverflow.com | \n", + "New York, NY | \n", + "<p><a href=\"http://blog.stackoverflow.com/2009... | \n", + "22 | \n", + "19 | \n", + "0 | \n", + "3 | \n", + "35.0 | \n", + "None | \n", + "
| 3 | \n", + "4 | \n", + "101 | \n", + "2010-07-19 19:03:27 | \n", + "Emmett | \n", + "2014-01-02 09:31:02 | \n", + "http://minesweeperonline.com | \n", + "San Francisco, CA | \n", + "<p>currently at a startup in SF</p>\\n\\n<p>form... | \n", + "11 | \n", + "0 | \n", + "0 | \n", + "1998 | \n", + "28.0 | \n", + "http://i.stack.imgur.com/d1oHX.jpg | \n", + "
| 4 | \n", + "5 | \n", + "6792 | \n", + "2010-07-19 19:03:57 | \n", + "Shane | \n", + "2014-08-13 00:23:47 | \n", + "http://www.statalgo.com | \n", + "New York, NY | \n", + "<p>Quantitative researcher focusing on statist... | \n", + "1145 | \n", + "662 | \n", + "5 | \n", + "54503 | \n", + "35.0 | \n", + "None | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 40320 | \n", + "55743 | \n", + "1 | \n", + "2014-09-13 21:03:50 | \n", + "AussieMeg | \n", + "2014-09-13 21:18:52 | \n", + "None | \n", + "None | \n", + "None | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "5026902 | \n", + "NaN | \n", + "http://graph.facebook.com/665821703/picture?ty... | \n", + "
| 40321 | \n", + "55744 | \n", + "6 | \n", + "2014-09-13 21:39:30 | \n", + "Mia Maria | \n", + "2014-09-13 21:39:30 | \n", + "None | \n", + "None | \n", + "None | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "5026998 | \n", + "NaN | \n", + "None | \n", + "
| 40322 | \n", + "55745 | \n", + "101 | \n", + "2014-09-13 23:45:27 | \n", + "tronbabylove | \n", + "2014-09-13 23:45:27 | \n", + "None | \n", + "United States | \n", + "None | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "481766 | \n", + "NaN | \n", + "https://www.gravatar.com/avatar/faa7a3fdbd8308... | \n", + "
| 40323 | \n", + "55746 | \n", + "106 | \n", + "2014-09-14 00:29:41 | \n", + "GPP | \n", + "2014-09-14 02:05:17 | \n", + "None | \n", + "None | \n", + "<p>Stats noobie, product, marketing & medi... | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "976289 | \n", + "NaN | \n", + "https://www.gravatar.com/avatar/6d9e9fa6b783a3... | \n", + "
| 40324 | \n", + "55747 | \n", + "1 | \n", + "2014-09-14 01:01:44 | \n", + "Shivam Agrawal | \n", + "2014-09-14 01:19:04 | \n", + "None | \n", + "India | \n", + "<p>Maths Enthusiast </p>\\n | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "5027354 | \n", + "NaN | \n", + "https://lh4.googleusercontent.com/-ZsXhwVaFmiY... | \n", + "
40325 rows × 14 columns
\n", + "Hi, I'm not really a person.
\\n\\nI'm ... 0 5007 \n", + "1
Developer on the StackOverflow team. Find ... 25 3 \n", + "2
\\n\\nform... 11 0 \n", + "4
Quantitative researcher focusing on statist... 1145 662 \n", + "... ... ... ... \n", + "40320 None 0 0 \n", + "40321 None 1 0 \n", + "40322 None 0 0 \n", + "40323
Stats noobie, product, marketing & medi... 1 0 \n", + "40324
Maths Enthusiast
\\n 0 0 \n", + "\n", + " DownVotes AccountId Age \\\n", + "0 1920 -1 NaN \n", + "1 0 2 37.0 \n", + "2 0 3 35.0 \n", + "3 0 1998 28.0 \n", + "4 5 54503 35.0 \n", + "... ... ... ... \n", + "40320 0 5026902 NaN \n", + "40321 0 5026998 NaN \n", + "40322 0 481766 NaN \n", + "40323 0 976289 NaN \n", + "40324 0 5027354 NaN \n", + "\n", + " ProfileImageUrl \n", + "0 None \n", + "1 None \n", + "2 None \n", + "3 http://i.stack.imgur.com/d1oHX.jpg \n", + "4 None \n", + "... ... \n", + "40320 http://graph.facebook.com/665821703/picture?ty... \n", + "40321 None \n", + "40322 https://www.gravatar.com/avatar/faa7a3fdbd8308... \n", + "40323 https://www.gravatar.com/avatar/6d9e9fa6b783a3... \n", + "40324 https://lh4.googleusercontent.com/-ZsXhwVaFmiY... \n", + "\n", + "[40325 rows x 14 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "user_tab = user_tab.rename(columns={'Id':'userId'})\n", + "user_tab" + ] }, { "cell_type": "markdown", @@ -79,10 +710,428 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "| \n", + " | Id | \n", + "PostTypeId | \n", + "AcceptedAnswerId | \n", + "CreaionDate | \n", + "Score | \n", + "ViewCount | \n", + "Body | \n", + "OwnerUserId | \n", + "LasActivityDate | \n", + "Title | \n", + "... | \n", + "AnswerCount | \n", + "CommentCount | \n", + "FavoriteCount | \n", + "LastEditorUserId | \n", + "LastEditDate | \n", + "CommunityOwnedDate | \n", + "ParentId | \n", + "ClosedDate | \n", + "OwnerDisplayName | \n", + "LastEditorDisplayName | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "1 | \n", + "15.0 | \n", + "2010-07-19 19:12:12 | \n", + "23 | \n", + "1278.0 | \n", + "<p>How should I elicit prior distributions fro... | \n", + "8.0 | \n", + "2010-09-15 21:08:26 | \n", + "Eliciting priors from experts | \n", + "... | \n", + "5.0 | \n", + "1 | \n", + "14.0 | \n", + "NaN | \n", + "NaT | \n", + "NaT | \n", + "NaN | \n", + "NaT | \n", + "None | \n", + "None | \n", + "
| 1 | \n", + "2 | \n", + "1 | \n", + "59.0 | \n", + "2010-07-19 19:12:57 | \n", + "22 | \n", + "8198.0 | \n", + "<p>In many different statistical methods there... | \n", + "24.0 | \n", + "2012-11-12 09:21:54 | \n", + "What is normality? | \n", + "... | \n", + "7.0 | \n", + "1 | \n", + "8.0 | \n", + "88.0 | \n", + "2010-08-07 17:56:44 | \n", + "NaT | \n", + "NaN | \n", + "NaT | \n", + "None | \n", + "None | \n", + "
| 2 | \n", + "3 | \n", + "1 | \n", + "5.0 | \n", + "2010-07-19 19:13:28 | \n", + "54 | \n", + "3613.0 | \n", + "<p>What are some valuable Statistical Analysis... | \n", + "18.0 | \n", + "2013-05-27 14:48:36 | \n", + "What are some valuable Statistical Analysis op... | \n", + "... | \n", + "19.0 | \n", + "4 | \n", + "36.0 | \n", + "183.0 | \n", + "2011-02-12 05:50:03 | \n", + "2010-07-19 19:13:28 | \n", + "NaN | \n", + "NaT | \n", + "None | \n", + "None | \n", + "
| 3 | \n", + "4 | \n", + "1 | \n", + "135.0 | \n", + "2010-07-19 19:13:31 | \n", + "13 | \n", + "5224.0 | \n", + "<p>I have two groups of data. Each with a dif... | \n", + "23.0 | \n", + "2010-09-08 03:00:19 | \n", + "Assessing the significance of differences in d... | \n", + "... | \n", + "5.0 | \n", + "2 | \n", + "2.0 | \n", + "NaN | \n", + "NaT | \n", + "NaT | \n", + "NaN | \n", + "NaT | \n", + "None | \n", + "None | \n", + "
| 4 | \n", + "5 | \n", + "2 | \n", + "NaN | \n", + "2010-07-19 19:14:43 | \n", + "81 | \n", + "NaN | \n", + "<p>The R-project</p>\\n\\n<p><a href=\"http://www... | \n", + "23.0 | \n", + "2010-07-19 19:21:15 | \n", + "None | \n", + "... | \n", + "NaN | \n", + "3 | \n", + "NaN | \n", + "23.0 | \n", + "2010-07-19 19:21:15 | \n", + "2010-07-19 19:14:43 | \n", + "3.0 | \n", + "NaT | \n", + "None | \n", + "None | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 91971 | \n", + "115374 | \n", + "2 | \n", + "NaN | \n", + "2014-09-13 23:45:39 | \n", + "2 | \n", + "NaN | \n", + "<p>This grew too long for a comment, but I thi... | \n", + "805.0 | \n", + "2014-09-14 02:05:41 | \n", + "None | \n", + "... | \n", + "NaN | \n", + "2 | \n", + "NaN | \n", + "805.0 | \n", + "2014-09-14 02:05:41 | \n", + "NaT | \n", + "115367.0 | \n", + "NaT | \n", + "None | \n", + "None | \n", + "
| 91972 | \n", + "115375 | \n", + "1 | \n", + "NaN | \n", + "2014-09-13 23:46:05 | \n", + "0 | \n", + "9.0 | \n", + "<p>Assume a classification problem where there... | \n", + "49365.0 | \n", + "2014-09-14 02:09:23 | \n", + "Detecting a consistent pattern in a dataset vi... | \n", + "... | \n", + "1.0 | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaT | \n", + "NaT | \n", + "NaN | \n", + "NaT | \n", + "None | \n", + "None | \n", + "
| 91973 | \n", + "115376 | \n", + "1 | \n", + "NaN | \n", + "2014-09-14 01:27:54 | \n", + "1 | \n", + "5.0 | \n", + "<p>My goal is to create a formula that can giv... | \n", + "55746.0 | \n", + "2014-09-14 01:40:55 | \n", + "How to project video viewcount based on histor... | \n", + "... | \n", + "0.0 | \n", + "2 | \n", + "NaN | \n", + "7290.0 | \n", + "2014-09-14 01:40:55 | \n", + "NaT | \n", + "NaN | \n", + "NaT | \n", + "None | \n", + "None | \n", + "
| 91974 | \n", + "115377 | \n", + "2 | \n", + "NaN | \n", + "2014-09-14 02:03:28 | \n", + "0 | \n", + "NaN | \n", + "<p>As a practical answer to the real questions... | \n", + "805.0 | \n", + "2014-09-14 02:54:13 | \n", + "None | \n", + "... | \n", + "NaN | \n", + "0 | \n", + "NaN | \n", + "805.0 | \n", + "2014-09-14 02:54:13 | \n", + "NaT | \n", + "115358.0 | \n", + "NaT | \n", + "None | \n", + "None | \n", + "
| 91975 | \n", + "115378 | \n", + "2 | \n", + "NaN | \n", + "2014-09-14 02:09:23 | \n", + "0 | \n", + "NaN | \n", + "<p>Decision trees are notoriously <strong>unst... | \n", + "7250.0 | \n", + "2014-09-14 02:09:23 | \n", + "None | \n", + "... | \n", + "NaN | \n", + "0 | \n", + "NaN | \n", + "NaN | \n", + "NaT | \n", + "NaT | \n", + "115375.0 | \n", + "NaT | \n", + "None | \n", + "None | \n", + "
91976 rows × 21 columns
\n", + "How should I elicit prior distributions fro... \n", + "1 8198.0
In many different statistical methods there... \n", + "2 3613.0
What are some valuable Statistical Analysis... \n", + "3 5224.0
I have two groups of data. Each with a dif... \n", + "4 NaN
The R-project
\\n\\nThis grew too long for a comment, but I thi... \n",
+ "91972 9.0 Assume a classification problem where there... \n",
+ "91973 5.0 My goal is to create a formula that can giv... \n",
+ "91974 NaN As a practical answer to the real questions... \n",
+ "91975 NaN Decision trees are notoriously unst... \n",
+ "\n",
+ " OwnerUserId LasActivityDate \\\n",
+ "0 8.0 2010-09-15 21:08:26 \n",
+ "1 24.0 2012-11-12 09:21:54 \n",
+ "2 18.0 2013-05-27 14:48:36 \n",
+ "3 23.0 2010-09-08 03:00:19 \n",
+ "4 23.0 2010-07-19 19:21:15 \n",
+ "... ... ... \n",
+ "91971 805.0 2014-09-14 02:05:41 \n",
+ "91972 49365.0 2014-09-14 02:09:23 \n",
+ "91973 55746.0 2014-09-14 01:40:55 \n",
+ "91974 805.0 2014-09-14 02:54:13 \n",
+ "91975 7250.0 2014-09-14 02:09:23 \n",
+ "\n",
+ " Title ... AnswerCount \\\n",
+ "0 Eliciting priors from experts ... 5.0 \n",
+ "1 What is normality? ... 7.0 \n",
+ "2 What are some valuable Statistical Analysis op... ... 19.0 \n",
+ "3 Assessing the significance of differences in d... ... 5.0 \n",
+ "4 None ... NaN \n",
+ "... ... ... ... \n",
+ "91971 None ... NaN \n",
+ "91972 Detecting a consistent pattern in a dataset vi... ... 1.0 \n",
+ "91973 How to project video viewcount based on histor... ... 0.0 \n",
+ "91974 None ... NaN \n",
+ "91975 None ... NaN \n",
+ "\n",
+ " CommentCount FavoriteCount LastEditorUserId LastEditDate \\\n",
+ "0 1 14.0 NaN NaT \n",
+ "1 1 8.0 88.0 2010-08-07 17:56:44 \n",
+ "2 4 36.0 183.0 2011-02-12 05:50:03 \n",
+ "3 2 2.0 NaN NaT \n",
+ "4 3 NaN 23.0 2010-07-19 19:21:15 \n",
+ "... ... ... ... ... \n",
+ "91971 2 NaN 805.0 2014-09-14 02:05:41 \n",
+ "91972 0 NaN NaN NaT \n",
+ "91973 2 NaN 7290.0 2014-09-14 01:40:55 \n",
+ "91974 0 NaN 805.0 2014-09-14 02:54:13 \n",
+ "91975 0 NaN NaN NaT \n",
+ "\n",
+ " CommunityOwnedDate ParentId ClosedDate OwnerDisplayName \\\n",
+ "0 NaT NaN NaT None \n",
+ "1 NaT NaN NaT None \n",
+ "2 2010-07-19 19:13:28 NaN NaT None \n",
+ "3 NaT NaN NaT None \n",
+ "4 2010-07-19 19:14:43 3.0 NaT None \n",
+ "... ... ... ... ... \n",
+ "91971 NaT 115367.0 NaT None \n",
+ "91972 NaT NaN NaT None \n",
+ "91973 NaT NaN NaT None \n",
+ "91974 NaT 115358.0 NaT None \n",
+ "91975 NaT 115375.0 NaT None \n",
+ "\n",
+ " LastEditorDisplayName \n",
+ "0 None \n",
+ "1 None \n",
+ "2 None \n",
+ "3 None \n",
+ "4 None \n",
+ "... ... \n",
+ "91971 None \n",
+ "91972 None \n",
+ "91973 None \n",
+ "91974 None \n",
+ "91975 None \n",
+ "\n",
+ "[91976 rows x 21 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "posts_tab = pd.read_sql_query('SELECT * FROM stats.posts', \n",
+ " engine)\n",
+ "posts_tab"
+ ]
},
{
"cell_type": "markdown",
@@ -93,10 +1142,414 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 9,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " 91976 rows × 21 columns How should I elicit prior distributions fro... 8.0 \n",
+ "1 8198.0 In many different statistical methods there... 24.0 \n",
+ "2 3613.0 What are some valuable Statistical Analysis... 18.0 \n",
+ "3 5224.0 I have two groups of data. Each with a dif... 23.0 \n",
+ "4 NaN The R-project This grew too long for a comment, but I thi... 805.0 \n",
+ "91972 9.0 Assume a classification problem where there... 49365.0 \n",
+ "91973 5.0 My goal is to create a formula that can giv... 55746.0 \n",
+ "91974 NaN As a practical answer to the real questions... 805.0 \n",
+ "91975 NaN Decision trees are notoriously unst... 7250.0 \n",
+ "\n",
+ " LasActivityDate Title \\\n",
+ "0 2010-09-15 21:08:26 Eliciting priors from experts \n",
+ "1 2012-11-12 09:21:54 What is normality? \n",
+ "2 2013-05-27 14:48:36 What are some valuable Statistical Analysis op... \n",
+ "3 2010-09-08 03:00:19 Assessing the significance of differences in d... \n",
+ "4 2010-07-19 19:21:15 None \n",
+ "... ... ... \n",
+ "91971 2014-09-14 02:05:41 None \n",
+ "91972 2014-09-14 02:09:23 Detecting a consistent pattern in a dataset vi... \n",
+ "91973 2014-09-14 01:40:55 How to project video viewcount based on histor... \n",
+ "91974 2014-09-14 02:54:13 None \n",
+ "91975 2014-09-14 02:09:23 None \n",
+ "\n",
+ " ... AnswerCount CommentCount FavoriteCount LastEditorUserId \\\n",
+ "0 ... 5.0 1 14.0 NaN \n",
+ "1 ... 7.0 1 8.0 88.0 \n",
+ "2 ... 19.0 4 36.0 183.0 \n",
+ "3 ... 5.0 2 2.0 NaN \n",
+ "4 ... NaN 3 NaN 23.0 \n",
+ "... ... ... ... ... ... \n",
+ "91971 ... NaN 2 NaN 805.0 \n",
+ "91972 ... 1.0 0 NaN NaN \n",
+ "91973 ... 0.0 2 NaN 7290.0 \n",
+ "91974 ... NaN 0 NaN 805.0 \n",
+ "91975 ... NaN 0 NaN NaN \n",
+ "\n",
+ " LastEditDate CommunityOwnedDate ParentId ClosedDate \\\n",
+ "0 NaT NaT NaN NaT \n",
+ "1 2010-08-07 17:56:44 NaT NaN NaT \n",
+ "2 2011-02-12 05:50:03 2010-07-19 19:13:28 NaN NaT \n",
+ "3 NaT NaT NaN NaT \n",
+ "4 2010-07-19 19:21:15 2010-07-19 19:14:43 3.0 NaT \n",
+ "... ... ... ... ... \n",
+ "91971 2014-09-14 02:05:41 NaT 115367.0 NaT \n",
+ "91972 NaT NaT NaN NaT \n",
+ "91973 2014-09-14 01:40:55 NaT NaN NaT \n",
+ "91974 2014-09-14 02:54:13 NaT 115358.0 NaT \n",
+ "91975 NaT NaT 115375.0 NaT \n",
+ "\n",
+ " OwnerDisplayName LastEditorDisplayName \n",
+ "0 None None \n",
+ "1 None None \n",
+ "2 None None \n",
+ "3 None None \n",
+ "4 None None \n",
+ "... ... ... \n",
+ "91971 None None \n",
+ "91972 None None \n",
+ "91973 None None \n",
+ "91974 None None \n",
+ "91975 None None \n",
+ "\n",
+ "[91976 rows x 21 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "posts_tab.rename(columns={'Id':'postId', 'OwnerUserId':'userId'}, inplace=True)\n",
+ "posts_tab"
+ ]
},
{
"cell_type": "markdown",
@@ -109,10 +1562,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "users_columns = user_tab[['userId', 'Reputation', 'Views', 'UpVotes', 'DownVotes']]\n",
+ "posts_columns = posts_tab[['postId', 'Score', 'userId', 'ViewCount', 'CommentCount']]"
+ ]
},
{
"cell_type": "markdown",
@@ -126,8 +1582,217 @@
"cell_type": "code",
"execution_count": 11,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " 90584 rows × 9 columns 40325 rows × 14 columns Hi, I'm not really a person. I'm ... 0 5007 \n",
+ "1 Developer on the StackOverflow team. Find ... 25 3 \n",
+ "2 form... 11 0 \n",
+ "4 Quantitative researcher focusing on statist... 1145 662 \n",
+ "... ... ... ... \n",
+ "40320 None 0 0 \n",
+ "40321 None 1 0 \n",
+ "40322 None 0 0 \n",
+ "40323 Stats noobie, product, marketing & medi... 1 0 \n",
+ "40324 Maths Enthusiast 40325 rows × 14 columns Hi, I'm not really a person. I'm ... 0 5007 \n",
+ "1 Developer on the StackOverflow team. Find ... 25 3 \n",
+ "2 form... 11 0 \n",
+ "4 Quantitative researcher focusing on statist... 1145 662 \n",
+ "... ... ... ... \n",
+ "40320 None 0 0 \n",
+ "40321 None 1 0 \n",
+ "40322 None 0 0 \n",
+ "40323 Stats noobie, product, marketing & medi... 1 0 \n",
+ "40324 Maths Enthusiast 91976 rows × 21 columns How should I elicit prior distributions fro... \n",
+ "1 8198.0 In many different statistical methods there... \n",
+ "2 3613.0 What are some valuable Statistical Analysis... \n",
+ "3 5224.0 I have two groups of data. Each with a dif... \n",
+ "4 NaN The R-project\n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " postId \n",
+ " PostTypeId \n",
+ " AcceptedAnswerId \n",
+ " CreaionDate \n",
+ " Score \n",
+ " ViewCount \n",
+ " Body \n",
+ " userId \n",
+ " LasActivityDate \n",
+ " Title \n",
+ " ... \n",
+ " AnswerCount \n",
+ " CommentCount \n",
+ " FavoriteCount \n",
+ " LastEditorUserId \n",
+ " LastEditDate \n",
+ " CommunityOwnedDate \n",
+ " ParentId \n",
+ " ClosedDate \n",
+ " OwnerDisplayName \n",
+ " LastEditorDisplayName \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 1 \n",
+ " 1 \n",
+ " 15.0 \n",
+ " 2010-07-19 19:12:12 \n",
+ " 23 \n",
+ " 1278.0 \n",
+ " <p>How should I elicit prior distributions fro... \n",
+ " 8.0 \n",
+ " 2010-09-15 21:08:26 \n",
+ " Eliciting priors from experts \n",
+ " ... \n",
+ " 5.0 \n",
+ " 1 \n",
+ " 14.0 \n",
+ " NaN \n",
+ " NaT \n",
+ " NaT \n",
+ " NaN \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2 \n",
+ " 1 \n",
+ " 59.0 \n",
+ " 2010-07-19 19:12:57 \n",
+ " 22 \n",
+ " 8198.0 \n",
+ " <p>In many different statistical methods there... \n",
+ " 24.0 \n",
+ " 2012-11-12 09:21:54 \n",
+ " What is normality? \n",
+ " ... \n",
+ " 7.0 \n",
+ " 1 \n",
+ " 8.0 \n",
+ " 88.0 \n",
+ " 2010-08-07 17:56:44 \n",
+ " NaT \n",
+ " NaN \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 3 \n",
+ " 1 \n",
+ " 5.0 \n",
+ " 2010-07-19 19:13:28 \n",
+ " 54 \n",
+ " 3613.0 \n",
+ " <p>What are some valuable Statistical Analysis... \n",
+ " 18.0 \n",
+ " 2013-05-27 14:48:36 \n",
+ " What are some valuable Statistical Analysis op... \n",
+ " ... \n",
+ " 19.0 \n",
+ " 4 \n",
+ " 36.0 \n",
+ " 183.0 \n",
+ " 2011-02-12 05:50:03 \n",
+ " 2010-07-19 19:13:28 \n",
+ " NaN \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 4 \n",
+ " 1 \n",
+ " 135.0 \n",
+ " 2010-07-19 19:13:31 \n",
+ " 13 \n",
+ " 5224.0 \n",
+ " <p>I have two groups of data. Each with a dif... \n",
+ " 23.0 \n",
+ " 2010-09-08 03:00:19 \n",
+ " Assessing the significance of differences in d... \n",
+ " ... \n",
+ " 5.0 \n",
+ " 2 \n",
+ " 2.0 \n",
+ " NaN \n",
+ " NaT \n",
+ " NaT \n",
+ " NaN \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 5 \n",
+ " 2 \n",
+ " NaN \n",
+ " 2010-07-19 19:14:43 \n",
+ " 81 \n",
+ " NaN \n",
+ " <p>The R-project</p>\\n\\n<p><a href=\"http://www... \n",
+ " 23.0 \n",
+ " 2010-07-19 19:21:15 \n",
+ " None \n",
+ " ... \n",
+ " NaN \n",
+ " 3 \n",
+ " NaN \n",
+ " 23.0 \n",
+ " 2010-07-19 19:21:15 \n",
+ " 2010-07-19 19:14:43 \n",
+ " 3.0 \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 91971 \n",
+ " 115374 \n",
+ " 2 \n",
+ " NaN \n",
+ " 2014-09-13 23:45:39 \n",
+ " 2 \n",
+ " NaN \n",
+ " <p>This grew too long for a comment, but I thi... \n",
+ " 805.0 \n",
+ " 2014-09-14 02:05:41 \n",
+ " None \n",
+ " ... \n",
+ " NaN \n",
+ " 2 \n",
+ " NaN \n",
+ " 805.0 \n",
+ " 2014-09-14 02:05:41 \n",
+ " NaT \n",
+ " 115367.0 \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 91972 \n",
+ " 115375 \n",
+ " 1 \n",
+ " NaN \n",
+ " 2014-09-13 23:46:05 \n",
+ " 0 \n",
+ " 9.0 \n",
+ " <p>Assume a classification problem where there... \n",
+ " 49365.0 \n",
+ " 2014-09-14 02:09:23 \n",
+ " Detecting a consistent pattern in a dataset vi... \n",
+ " ... \n",
+ " 1.0 \n",
+ " 0 \n",
+ " NaN \n",
+ " NaN \n",
+ " NaT \n",
+ " NaT \n",
+ " NaN \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 91973 \n",
+ " 115376 \n",
+ " 1 \n",
+ " NaN \n",
+ " 2014-09-14 01:27:54 \n",
+ " 1 \n",
+ " 5.0 \n",
+ " <p>My goal is to create a formula that can giv... \n",
+ " 55746.0 \n",
+ " 2014-09-14 01:40:55 \n",
+ " How to project video viewcount based on histor... \n",
+ " ... \n",
+ " 0.0 \n",
+ " 2 \n",
+ " NaN \n",
+ " 7290.0 \n",
+ " 2014-09-14 01:40:55 \n",
+ " NaT \n",
+ " NaN \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 91974 \n",
+ " 115377 \n",
+ " 2 \n",
+ " NaN \n",
+ " 2014-09-14 02:03:28 \n",
+ " 0 \n",
+ " NaN \n",
+ " <p>As a practical answer to the real questions... \n",
+ " 805.0 \n",
+ " 2014-09-14 02:54:13 \n",
+ " None \n",
+ " ... \n",
+ " NaN \n",
+ " 0 \n",
+ " NaN \n",
+ " 805.0 \n",
+ " 2014-09-14 02:54:13 \n",
+ " NaT \n",
+ " 115358.0 \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " \n",
+ "91975 \n",
+ " 115378 \n",
+ " 2 \n",
+ " NaN \n",
+ " 2014-09-14 02:09:23 \n",
+ " 0 \n",
+ " NaN \n",
+ " <p>Decision trees are notoriously <strong>unst... \n",
+ " 7250.0 \n",
+ " 2014-09-14 02:09:23 \n",
+ " None \n",
+ " ... \n",
+ " NaN \n",
+ " 0 \n",
+ " NaN \n",
+ " NaN \n",
+ " NaT \n",
+ " NaT \n",
+ " 115375.0 \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " userId \n",
+ " Reputation \n",
+ " Views \n",
+ " UpVotes \n",
+ " DownVotes \n",
+ " postId \n",
+ " Score \n",
+ " ViewCount \n",
+ " CommentCount \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " -1 \n",
+ " 1 \n",
+ " 0 \n",
+ " 5007 \n",
+ " 1920 \n",
+ " 2175 \n",
+ " 0 \n",
+ " NaN \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " -1 \n",
+ " 1 \n",
+ " 0 \n",
+ " 5007 \n",
+ " 1920 \n",
+ " 8576 \n",
+ " 0 \n",
+ " NaN \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " -1 \n",
+ " 1 \n",
+ " 0 \n",
+ " 5007 \n",
+ " 1920 \n",
+ " 8578 \n",
+ " 0 \n",
+ " NaN \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " -1 \n",
+ " 1 \n",
+ " 0 \n",
+ " 5007 \n",
+ " 1920 \n",
+ " 8981 \n",
+ " 0 \n",
+ " NaN \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " -1 \n",
+ " 1 \n",
+ " 0 \n",
+ " 5007 \n",
+ " 1920 \n",
+ " 8982 \n",
+ " 0 \n",
+ " NaN \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 90579 \n",
+ " 55734 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 115352 \n",
+ " 0 \n",
+ " 16.0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 90580 \n",
+ " 55738 \n",
+ " 11 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 115360 \n",
+ " 2 \n",
+ " 40.0 \n",
+ " 4 \n",
+ " \n",
+ " \n",
+ " 90581 \n",
+ " 55742 \n",
+ " 6 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 115366 \n",
+ " 1 \n",
+ " 17.0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 90582 \n",
+ " 55744 \n",
+ " 6 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 115370 \n",
+ " 1 \n",
+ " 13.0 \n",
+ " 2 \n",
+ " \n",
+ " \n",
+ " \n",
+ "90583 \n",
+ " 55746 \n",
+ " 106 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 115376 \n",
+ " 1 \n",
+ " 5.0 \n",
+ " 2 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Id \n",
+ " Reputation \n",
+ " CreationDate \n",
+ " DisplayName \n",
+ " LastAccessDate \n",
+ " WebsiteUrl \n",
+ " Location \n",
+ " AboutMe \n",
+ " Views \n",
+ " UpVotes \n",
+ " DownVotes \n",
+ " AccountId \n",
+ " Age \n",
+ " ProfileImageUrl \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " -1 \n",
+ " 1 \n",
+ " 2010-07-19 06:55:26 \n",
+ " Community \n",
+ " 2010-07-19 06:55:26 \n",
+ " http://meta.stackexchange.com/ \n",
+ " on the server farm \n",
+ " <p>Hi, I'm not really a person.</p>\\n\\n<p>I'm ... \n",
+ " 0 \n",
+ " 5007 \n",
+ " 1920 \n",
+ " -1 \n",
+ " NaN \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2 \n",
+ " 101 \n",
+ " 2010-07-19 14:01:36 \n",
+ " Geoff Dalgas \n",
+ " 2013-11-12 22:07:23 \n",
+ " http://stackoverflow.com \n",
+ " Corvallis, OR \n",
+ " <p>Developer on the StackOverflow team. Find ... \n",
+ " 25 \n",
+ " 3 \n",
+ " 0 \n",
+ " 2 \n",
+ " 37.0 \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 3 \n",
+ " 101 \n",
+ " 2010-07-19 15:34:50 \n",
+ " Jarrod Dixon \n",
+ " 2014-08-08 06:42:58 \n",
+ " http://stackoverflow.com \n",
+ " New York, NY \n",
+ " <p><a href=\"http://blog.stackoverflow.com/2009... \n",
+ " 22 \n",
+ " 19 \n",
+ " 0 \n",
+ " 3 \n",
+ " 35.0 \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 4 \n",
+ " 101 \n",
+ " 2010-07-19 19:03:27 \n",
+ " Emmett \n",
+ " 2014-01-02 09:31:02 \n",
+ " http://minesweeperonline.com \n",
+ " San Francisco, CA \n",
+ " <p>currently at a startup in SF</p>\\n\\n<p>form... \n",
+ " 11 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1998 \n",
+ " 28.0 \n",
+ " http://i.stack.imgur.com/d1oHX.jpg \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 5 \n",
+ " 6792 \n",
+ " 2010-07-19 19:03:57 \n",
+ " Shane \n",
+ " 2014-08-13 00:23:47 \n",
+ " http://www.statalgo.com \n",
+ " New York, NY \n",
+ " <p>Quantitative researcher focusing on statist... \n",
+ " 1145 \n",
+ " 662 \n",
+ " 5 \n",
+ " 54503 \n",
+ " 35.0 \n",
+ " None \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 40320 \n",
+ " 55743 \n",
+ " 1 \n",
+ " 2014-09-13 21:03:50 \n",
+ " AussieMeg \n",
+ " 2014-09-13 21:18:52 \n",
+ " None \n",
+ " None \n",
+ " None \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 5026902 \n",
+ " NaN \n",
+ " http://graph.facebook.com/665821703/picture?ty... \n",
+ " \n",
+ " \n",
+ " 40321 \n",
+ " 55744 \n",
+ " 6 \n",
+ " 2014-09-13 21:39:30 \n",
+ " Mia Maria \n",
+ " 2014-09-13 21:39:30 \n",
+ " None \n",
+ " None \n",
+ " None \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 5026998 \n",
+ " NaN \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 40322 \n",
+ " 55745 \n",
+ " 101 \n",
+ " 2014-09-13 23:45:27 \n",
+ " tronbabylove \n",
+ " 2014-09-13 23:45:27 \n",
+ " None \n",
+ " United States \n",
+ " None \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 481766 \n",
+ " NaN \n",
+ " https://www.gravatar.com/avatar/faa7a3fdbd8308... \n",
+ " \n",
+ " \n",
+ " 40323 \n",
+ " 55746 \n",
+ " 106 \n",
+ " 2014-09-14 00:29:41 \n",
+ " GPP \n",
+ " 2014-09-14 02:05:17 \n",
+ " None \n",
+ " None \n",
+ " <p>Stats noobie, product, marketing & medi... \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 976289 \n",
+ " NaN \n",
+ " https://www.gravatar.com/avatar/6d9e9fa6b783a3... \n",
+ " \n",
+ " \n",
+ " \n",
+ "40324 \n",
+ " 55747 \n",
+ " 1 \n",
+ " 2014-09-14 01:01:44 \n",
+ " Shivam Agrawal \n",
+ " 2014-09-14 01:19:04 \n",
+ " None \n",
+ " India \n",
+ " <p>Maths Enthusiast </p>\\n \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 5027354 \n",
+ " NaN \n",
+ " https://lh4.googleusercontent.com/-ZsXhwVaFmiY... \n",
+ " \n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " userId \n",
+ " Reputation \n",
+ " CreationDate \n",
+ " DisplayName \n",
+ " LastAccessDate \n",
+ " WebsiteUrl \n",
+ " Location \n",
+ " AboutMe \n",
+ " Views \n",
+ " UpVotes \n",
+ " DownVotes \n",
+ " AccountId \n",
+ " Age \n",
+ " ProfileImageUrl \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " -1 \n",
+ " 1 \n",
+ " 2010-07-19 06:55:26 \n",
+ " Community \n",
+ " 2010-07-19 06:55:26 \n",
+ " http://meta.stackexchange.com/ \n",
+ " on the server farm \n",
+ " <p>Hi, I'm not really a person.</p>\\n\\n<p>I'm ... \n",
+ " 0 \n",
+ " 5007 \n",
+ " 1920 \n",
+ " -1 \n",
+ " NaN \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2 \n",
+ " 101 \n",
+ " 2010-07-19 14:01:36 \n",
+ " Geoff Dalgas \n",
+ " 2013-11-12 22:07:23 \n",
+ " http://stackoverflow.com \n",
+ " Corvallis, OR \n",
+ " <p>Developer on the StackOverflow team. Find ... \n",
+ " 25 \n",
+ " 3 \n",
+ " 0 \n",
+ " 2 \n",
+ " 37.0 \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 3 \n",
+ " 101 \n",
+ " 2010-07-19 15:34:50 \n",
+ " Jarrod Dixon \n",
+ " 2014-08-08 06:42:58 \n",
+ " http://stackoverflow.com \n",
+ " New York, NY \n",
+ " <p><a href=\"http://blog.stackoverflow.com/2009... \n",
+ " 22 \n",
+ " 19 \n",
+ " 0 \n",
+ " 3 \n",
+ " 35.0 \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 4 \n",
+ " 101 \n",
+ " 2010-07-19 19:03:27 \n",
+ " Emmett \n",
+ " 2014-01-02 09:31:02 \n",
+ " http://minesweeperonline.com \n",
+ " San Francisco, CA \n",
+ " <p>currently at a startup in SF</p>\\n\\n<p>form... \n",
+ " 11 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1998 \n",
+ " 28.0 \n",
+ " http://i.stack.imgur.com/d1oHX.jpg \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 5 \n",
+ " 6792 \n",
+ " 2010-07-19 19:03:57 \n",
+ " Shane \n",
+ " 2014-08-13 00:23:47 \n",
+ " http://www.statalgo.com \n",
+ " New York, NY \n",
+ " <p>Quantitative researcher focusing on statist... \n",
+ " 1145 \n",
+ " 662 \n",
+ " 5 \n",
+ " 54503 \n",
+ " 35.0 \n",
+ " None \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 40320 \n",
+ " 55743 \n",
+ " 1 \n",
+ " 2014-09-13 21:03:50 \n",
+ " AussieMeg \n",
+ " 2014-09-13 21:18:52 \n",
+ " None \n",
+ " None \n",
+ " None \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 5026902 \n",
+ " NaN \n",
+ " http://graph.facebook.com/665821703/picture?ty... \n",
+ " \n",
+ " \n",
+ " 40321 \n",
+ " 55744 \n",
+ " 6 \n",
+ " 2014-09-13 21:39:30 \n",
+ " Mia Maria \n",
+ " 2014-09-13 21:39:30 \n",
+ " None \n",
+ " None \n",
+ " None \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 5026998 \n",
+ " NaN \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 40322 \n",
+ " 55745 \n",
+ " 101 \n",
+ " 2014-09-13 23:45:27 \n",
+ " tronbabylove \n",
+ " 2014-09-13 23:45:27 \n",
+ " None \n",
+ " United States \n",
+ " None \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 481766 \n",
+ " NaN \n",
+ " https://www.gravatar.com/avatar/faa7a3fdbd8308... \n",
+ " \n",
+ " \n",
+ " 40323 \n",
+ " 55746 \n",
+ " 106 \n",
+ " 2014-09-14 00:29:41 \n",
+ " GPP \n",
+ " 2014-09-14 02:05:17 \n",
+ " None \n",
+ " None \n",
+ " <p>Stats noobie, product, marketing & medi... \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 976289 \n",
+ " NaN \n",
+ " https://www.gravatar.com/avatar/6d9e9fa6b783a3... \n",
+ " \n",
+ " \n",
+ " \n",
+ "40324 \n",
+ " 55747 \n",
+ " 1 \n",
+ " 2014-09-14 01:01:44 \n",
+ " Shivam Agrawal \n",
+ " 2014-09-14 01:19:04 \n",
+ " None \n",
+ " India \n",
+ " <p>Maths Enthusiast </p>\\n \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 5027354 \n",
+ " NaN \n",
+ " https://lh4.googleusercontent.com/-ZsXhwVaFmiY... \n",
+ " \n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Id \n",
+ " PostTypeId \n",
+ " AcceptedAnswerId \n",
+ " CreaionDate \n",
+ " Score \n",
+ " ViewCount \n",
+ " Body \n",
+ " OwnerUserId \n",
+ " LasActivityDate \n",
+ " Title \n",
+ " ... \n",
+ " AnswerCount \n",
+ " CommentCount \n",
+ " FavoriteCount \n",
+ " LastEditorUserId \n",
+ " LastEditDate \n",
+ " CommunityOwnedDate \n",
+ " ParentId \n",
+ " ClosedDate \n",
+ " OwnerDisplayName \n",
+ " LastEditorDisplayName \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 1 \n",
+ " 1 \n",
+ " 15.0 \n",
+ " 2010-07-19 19:12:12 \n",
+ " 23 \n",
+ " 1278.0 \n",
+ " <p>How should I elicit prior distributions fro... \n",
+ " 8.0 \n",
+ " 2010-09-15 21:08:26 \n",
+ " Eliciting priors from experts \n",
+ " ... \n",
+ " 5.0 \n",
+ " 1 \n",
+ " 14.0 \n",
+ " NaN \n",
+ " NaT \n",
+ " NaT \n",
+ " NaN \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2 \n",
+ " 1 \n",
+ " 59.0 \n",
+ " 2010-07-19 19:12:57 \n",
+ " 22 \n",
+ " 8198.0 \n",
+ " <p>In many different statistical methods there... \n",
+ " 24.0 \n",
+ " 2012-11-12 09:21:54 \n",
+ " What is normality? \n",
+ " ... \n",
+ " 7.0 \n",
+ " 1 \n",
+ " 8.0 \n",
+ " 88.0 \n",
+ " 2010-08-07 17:56:44 \n",
+ " NaT \n",
+ " NaN \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 3 \n",
+ " 1 \n",
+ " 5.0 \n",
+ " 2010-07-19 19:13:28 \n",
+ " 54 \n",
+ " 3613.0 \n",
+ " <p>What are some valuable Statistical Analysis... \n",
+ " 18.0 \n",
+ " 2013-05-27 14:48:36 \n",
+ " What are some valuable Statistical Analysis op... \n",
+ " ... \n",
+ " 19.0 \n",
+ " 4 \n",
+ " 36.0 \n",
+ " 183.0 \n",
+ " 2011-02-12 05:50:03 \n",
+ " 2010-07-19 19:13:28 \n",
+ " NaN \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 4 \n",
+ " 1 \n",
+ " 135.0 \n",
+ " 2010-07-19 19:13:31 \n",
+ " 13 \n",
+ " 5224.0 \n",
+ " <p>I have two groups of data. Each with a dif... \n",
+ " 23.0 \n",
+ " 2010-09-08 03:00:19 \n",
+ " Assessing the significance of differences in d... \n",
+ " ... \n",
+ " 5.0 \n",
+ " 2 \n",
+ " 2.0 \n",
+ " NaN \n",
+ " NaT \n",
+ " NaT \n",
+ " NaN \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 5 \n",
+ " 2 \n",
+ " NaN \n",
+ " 2010-07-19 19:14:43 \n",
+ " 81 \n",
+ " NaN \n",
+ " <p>The R-project</p>\\n\\n<p><a href=\"http://www... \n",
+ " 23.0 \n",
+ " 2010-07-19 19:21:15 \n",
+ " None \n",
+ " ... \n",
+ " NaN \n",
+ " 3 \n",
+ " NaN \n",
+ " 23.0 \n",
+ " 2010-07-19 19:21:15 \n",
+ " 2010-07-19 19:14:43 \n",
+ " 3.0 \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " ... \n",
+ " \n",
+ " \n",
+ " 91971 \n",
+ " 115374 \n",
+ " 2 \n",
+ " NaN \n",
+ " 2014-09-13 23:45:39 \n",
+ " 2 \n",
+ " NaN \n",
+ " <p>This grew too long for a comment, but I thi... \n",
+ " 805.0 \n",
+ " 2014-09-14 02:05:41 \n",
+ " None \n",
+ " ... \n",
+ " NaN \n",
+ " 2 \n",
+ " NaN \n",
+ " 805.0 \n",
+ " 2014-09-14 02:05:41 \n",
+ " NaT \n",
+ " 115367.0 \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 91972 \n",
+ " 115375 \n",
+ " 1 \n",
+ " NaN \n",
+ " 2014-09-13 23:46:05 \n",
+ " 0 \n",
+ " 9.0 \n",
+ " <p>Assume a classification problem where there... \n",
+ " 49365.0 \n",
+ " 2014-09-14 02:09:23 \n",
+ " Detecting a consistent pattern in a dataset vi... \n",
+ " ... \n",
+ " 1.0 \n",
+ " 0 \n",
+ " NaN \n",
+ " NaN \n",
+ " NaT \n",
+ " NaT \n",
+ " NaN \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 91973 \n",
+ " 115376 \n",
+ " 1 \n",
+ " NaN \n",
+ " 2014-09-14 01:27:54 \n",
+ " 1 \n",
+ " 5.0 \n",
+ " <p>My goal is to create a formula that can giv... \n",
+ " 55746.0 \n",
+ " 2014-09-14 01:40:55 \n",
+ " How to project video viewcount based on histor... \n",
+ " ... \n",
+ " 0.0 \n",
+ " 2 \n",
+ " NaN \n",
+ " 7290.0 \n",
+ " 2014-09-14 01:40:55 \n",
+ " NaT \n",
+ " NaN \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " 91974 \n",
+ " 115377 \n",
+ " 2 \n",
+ " NaN \n",
+ " 2014-09-14 02:03:28 \n",
+ " 0 \n",
+ " NaN \n",
+ " <p>As a practical answer to the real questions... \n",
+ " 805.0 \n",
+ " 2014-09-14 02:54:13 \n",
+ " None \n",
+ " ... \n",
+ " NaN \n",
+ " 0 \n",
+ " NaN \n",
+ " 805.0 \n",
+ " 2014-09-14 02:54:13 \n",
+ " NaT \n",
+ " 115358.0 \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ " \n",
+ " \n",
+ " \n",
+ "91975 \n",
+ " 115378 \n",
+ " 2 \n",
+ " NaN \n",
+ " 2014-09-14 02:09:23 \n",
+ " 0 \n",
+ " NaN \n",
+ " <p>Decision trees are notoriously <strong>unst... \n",
+ " 7250.0 \n",
+ " 2014-09-14 02:09:23 \n",
+ " None \n",
+ " ... \n",
+ " NaN \n",
+ " 0 \n",
+ " NaN \n",
+ " NaN \n",
+ " NaT \n",
+ " NaT \n",
+ " 115375.0 \n",
+ " NaT \n",
+ " None \n",
+ " None \n",
+ "