diff --git a/your-code/main.ipynb b/your-code/main.ipynb index bad6d94..9ed2015 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -9,10 +9,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import pandas as pd" + ] }, { "cell_type": "markdown", @@ -23,10 +25,112 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | Id | \n", + "Reputation | \n", + "CreationDate | \n", + "DisplayName | \n", + "LastAccessDate | \n", + "WebsiteUrl | \n", + "Location | \n", + "AboutMe | \n", + "Views | \n", + "UpVotes | \n", + "DownVotes | \n", + "AccountId | \n", + "Age | \n", + "ProfileImageUrl | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "-1 | \n", + "1 | \n", + "2010-07-19 06:55:26 | \n", + "Community | \n", + "2010-07-19 06:55:26 | \n", + "http://meta.stackexchange.com/ | \n", + "on the server farm | \n", + "<p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>... | \n", + "0 | \n", + "5007 | \n", + "1920 | \n", + "-1 | \n", + "NaN | \n", + "NaN | \n", + "
| 1 | \n", + "2 | \n", + "101 | \n", + "2010-07-19 14:01:36 | \n", + "Geoff Dalgas | \n", + "2013-11-12 22:07:23 | \n", + "http://stackoverflow.com | \n", + "Corvallis, OR | \n", + "<p>Developer on the StackOverflow team. Find ... | \n", + "25 | \n", + "3 | \n", + "0 | \n", + "2 | \n", + "37.0 | \n", + "NaN | \n", + "
Hi, I'm not really a person.
\\r\\n\\r\\n... 0 5007 \n", + "1
Developer on the StackOverflow team. Find ... 25 3 \n", + "\n", + " DownVotes AccountId Age ProfileImageUrl \n", + "0 1920 -1 NaN NaN \n", + "1 0 2 37.0 NaN " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users= pd.read_csv(\"C:/Users/milena.xavier/Desktop/Iron/Labs/week2/lab-data-cleaning/your-code/users_table.csv\")\n", + "users.head(2)" + ] }, { "cell_type": "markdown", @@ -37,10 +141,91 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | userId | \n", + "Reputation | \n", + "CreationDate | \n", + "DisplayName | \n", + "LastAccessDate | \n", + "WebsiteUrl | \n", + "Location | \n", + "AboutMe | \n", + "Views | \n", + "UpVotes | \n", + "DownVotes | \n", + "AccountId | \n", + "Age | \n", + "ProfileImageUrl | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "-1 | \n", + "1 | \n", + "2010-07-19 06:55:26 | \n", + "Community | \n", + "2010-07-19 06:55:26 | \n", + "http://meta.stackexchange.com/ | \n", + "on the server farm | \n", + "<p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>... | \n", + "0 | \n", + "5007 | \n", + "1920 | \n", + "-1 | \n", + "NaN | \n", + "NaN | \n", + "
Hi, I'm not really a person.
\\r\\n\\r\\n... 0 5007 \n", + "\n", + " DownVotes AccountId Age ProfileImageUrl \n", + "0 1920 -1 NaN NaN " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users.rename(columns = {'Id':'userId'}, inplace = True)\n", + "users.head(1)" + ] }, { "cell_type": "markdown", @@ -51,10 +236,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "posts=pd.read_csv(\"C:/Users/milena.xavier/Desktop/Iron/Labs/week2/lab-data-cleaning/your-code/posts_table.csv\")" + ] }, { "cell_type": "markdown", @@ -65,10 +252,114 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | postId | \n", + "PostTypeId | \n", + "AcceptedAnswerId | \n", + "CreaionDate | \n", + "Score | \n", + "ViewCount | \n", + "Body | \n", + "userId | \n", + "LasActivityDate | \n", + "Title | \n", + "... | \n", + "AnswerCount | \n", + "CommentCount | \n", + "FavoriteCount | \n", + "LastEditorUserId | \n", + "LastEditDate | \n", + "CommunityOwnedDate | \n", + "ParentId | \n", + "ClosedDate | \n", + "OwnerDisplayName | \n", + "LastEditorDisplayName | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "1 | \n", + "15.0 | \n", + "2010-07-19 19:12:12 | \n", + "23 | \n", + "1278.0 | \n", + "<p>How should I elicit prior distributions fro... | \n", + "8.0 | \n", + "2010-09-15 21:08:26 | \n", + "Eliciting priors from experts | \n", + "... | \n", + "5.0 | \n", + "1 | \n", + "14.0 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "
1 rows × 21 columns
\n", + "How should I elicit prior distributions fro... 8.0 \n", + "\n", + " LasActivityDate Title ... AnswerCount \\\n", + "0 2010-09-15 21:08:26 Eliciting priors from experts ... 5.0 \n", + "\n", + " CommentCount FavoriteCount LastEditorUserId LastEditDate \\\n", + "0 1 14.0 NaN NaN \n", + "\n", + " CommunityOwnedDate ParentId ClosedDate OwnerDisplayName \\\n", + "0 NaN NaN NaN NaN \n", + "\n", + " LastEditorDisplayName \n", + "0 NaN \n", + "\n", + "[1 rows x 21 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "posts.rename(columns = {'Id':'postId', 'OwnerUserId': 'userId'}, inplace = True)\n", + "posts.head(1)" + ] }, { "cell_type": "markdown", @@ -81,10 +372,135 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | userId | \n", + "Reputation | \n", + "Views | \n", + "UpVotes | \n", + "DownVotes | \n", + "postId | \n", + "Score | \n", + "ViewCount | \n", + "CommentCount | \n", + "
|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "-1 | \n", + "1 | \n", + "0 | \n", + "5007 | \n", + "1920 | \n", + "2175 | \n", + "0 | \n", + "NaN | \n", + "0 | \n", + "
| 1 | \n", + "-1 | \n", + "1 | \n", + "0 | \n", + "5007 | \n", + "1920 | \n", + "8576 | \n", + "0 | \n", + "NaN | \n", + "0 | \n", + "
| 2 | \n", + "-1 | \n", + "1 | \n", + "0 | \n", + "5007 | \n", + "1920 | \n", + "8578 | \n", + "0 | \n", + "NaN | \n", + "0 | \n", + "
| 3 | \n", + "-1 | \n", + "1 | \n", + "0 | \n", + "5007 | \n", + "1920 | \n", + "8981 | \n", + "0 | \n", + "NaN | \n", + "0 | \n", + "
| 4 | \n", + "-1 | \n", + "1 | \n", + "0 | \n", + "5007 | \n", + "1920 | \n", + "8982 | \n", + "0 | \n", + "NaN | \n", + "0 | \n", + "