From 48898026b350a3cb330a3bb8aec42a3b310b732b Mon Sep 17 00:00:00 2001 From: Anna Date: Wed, 1 May 2024 22:31:37 +0200 Subject: [PATCH] Completed lab --- your-code/main.ipynb | 2270 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 2101 insertions(+), 169 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 7900997..8c3ea8b 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -1,169 +1,2101 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 1. Import pandas library" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 2. Import users table:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3. Rename Id column to userId" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 4. Import posts table:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 5. Rename Id column to postId and OwnerUserId to userId" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 6. Define new dataframes for users and posts with the following selected columns:\n", - " **users columns**: userId, Reputation,Views,UpVotes,DownVotes\n", - " **posts columns**: postId, Score,userId,ViewCount,CommentCount" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 7. Merge both dataframes, users and posts. \n", - "You will need to make a [merge](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html) of posts and users dataframes." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 8. How many missing values do you have in your merged dataframe? On which columns?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 9. You will need to make something with missing values. Will you clean or filling them? Explain. \n", - "**Remember** to check the results of your code before passing to the next step" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 10. Adjust the data types in order to avoid future issues. Which ones should be changed? " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1. Import pandas library" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2. Import users table:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdReputationCreationDateDisplayNameLastAccessDateWebsiteUrlLocationAboutMeViewsUpVotesDownVotesAccountIdAgeProfileImageUrl
0-112010-07-19 06:55:26Community2010-07-19 06:55:26http://meta.stackexchange.com/on the server farm<p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>...050071920-1NaNNaN
121012010-07-19 14:01:36Geoff Dalgas2013-11-12 22:07:23http://stackoverflow.comCorvallis, OR<p>Developer on the StackOverflow team. Find ...2530237.0NaN
231012010-07-19 15:34:50Jarrod Dixon2014-08-08 06:42:58http://stackoverflow.comNew York, NY<p><a href=\"http://blog.stackoverflow.com/2009...22190335.0NaN
341012010-07-19 19:03:27Emmett2014-01-02 09:31:02http://minesweeperonline.comSan Francisco, CA<p>currently at a startup in SF</p>\\r\\n\\r\\n<p>...1100199828.0http://i.stack.imgur.com/d1oHX.jpg
4567922010-07-19 19:03:57Shane2014-08-13 00:23:47http://www.statalgo.comNew York, NY<p>Quantitative researcher focusing on statist...114566255450335.0NaN
\n", + "
" + ], + "text/plain": [ + " Id Reputation CreationDate DisplayName LastAccessDate \\\n", + "0 -1 1 2010-07-19 06:55:26 Community 2010-07-19 06:55:26 \n", + "1 2 101 2010-07-19 14:01:36 Geoff Dalgas 2013-11-12 22:07:23 \n", + "2 3 101 2010-07-19 15:34:50 Jarrod Dixon 2014-08-08 06:42:58 \n", + "3 4 101 2010-07-19 19:03:27 Emmett 2014-01-02 09:31:02 \n", + "4 5 6792 2010-07-19 19:03:57 Shane 2014-08-13 00:23:47 \n", + "\n", + " WebsiteUrl Location \\\n", + "0 http://meta.stackexchange.com/ on the server farm \n", + "1 http://stackoverflow.com Corvallis, OR \n", + "2 http://stackoverflow.com New York, NY \n", + "3 http://minesweeperonline.com San Francisco, CA \n", + "4 http://www.statalgo.com New York, NY \n", + "\n", + " AboutMe Views UpVotes \\\n", + "0

Hi, I'm not really a person.

\\r\\n\\r\\n

... 0 5007 \n", + "1

Developer on the StackOverflow team. Find ... 25 3 \n", + "2

currently at a startup in SF

\\r\\n\\r\\n

... 11 0 \n", + "4

Quantitative researcher focusing on statist... 1145 662 \n", + "\n", + " DownVotes AccountId Age ProfileImageUrl \n", + "0 1920 -1 NaN NaN \n", + "1 0 2 37.0 NaN \n", + "2 0 3 35.0 NaN \n", + "3 0 1998 28.0 http://i.stack.imgur.com/d1oHX.jpg \n", + "4 5 54503 35.0 NaN " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users_table = pd.read_csv('/Users/anna/iron_hack/lab-data-cleaning/your-code/users_table.csv')\n", + "users_table.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 3. Rename Id column to userId" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userIdReputationCreationDateDisplayNameLastAccessDateWebsiteUrlLocationAboutMeViewsUpVotesDownVotesAccountIdAgeProfileImageUrl
0-112010-07-19 06:55:26Community2010-07-19 06:55:26http://meta.stackexchange.com/on the server farm<p>Hi, I'm not really a person.</p>\\r\\n\\r\\n<p>...050071920-1NaNNaN
121012010-07-19 14:01:36Geoff Dalgas2013-11-12 22:07:23http://stackoverflow.comCorvallis, OR<p>Developer on the StackOverflow team. Find ...2530237.0NaN
231012010-07-19 15:34:50Jarrod Dixon2014-08-08 06:42:58http://stackoverflow.comNew York, NY<p><a href=\"http://blog.stackoverflow.com/2009...22190335.0NaN
341012010-07-19 19:03:27Emmett2014-01-02 09:31:02http://minesweeperonline.comSan Francisco, CA<p>currently at a startup in SF</p>\\r\\n\\r\\n<p>...1100199828.0http://i.stack.imgur.com/d1oHX.jpg
4567922010-07-19 19:03:57Shane2014-08-13 00:23:47http://www.statalgo.comNew York, NY<p>Quantitative researcher focusing on statist...114566255450335.0NaN
\n", + "
" + ], + "text/plain": [ + " userId Reputation CreationDate DisplayName LastAccessDate \\\n", + "0 -1 1 2010-07-19 06:55:26 Community 2010-07-19 06:55:26 \n", + "1 2 101 2010-07-19 14:01:36 Geoff Dalgas 2013-11-12 22:07:23 \n", + "2 3 101 2010-07-19 15:34:50 Jarrod Dixon 2014-08-08 06:42:58 \n", + "3 4 101 2010-07-19 19:03:27 Emmett 2014-01-02 09:31:02 \n", + "4 5 6792 2010-07-19 19:03:57 Shane 2014-08-13 00:23:47 \n", + "\n", + " WebsiteUrl Location \\\n", + "0 http://meta.stackexchange.com/ on the server farm \n", + "1 http://stackoverflow.com Corvallis, OR \n", + "2 http://stackoverflow.com New York, NY \n", + "3 http://minesweeperonline.com San Francisco, CA \n", + "4 http://www.statalgo.com New York, NY \n", + "\n", + " AboutMe Views UpVotes \\\n", + "0

Hi, I'm not really a person.

\\r\\n\\r\\n

... 0 5007 \n", + "1

Developer on the StackOverflow team. Find ... 25 3 \n", + "2

currently at a startup in SF

\\r\\n\\r\\n

... 11 0 \n", + "4

Quantitative researcher focusing on statist... 1145 662 \n", + "\n", + " DownVotes AccountId Age ProfileImageUrl \n", + "0 1920 -1 NaN NaN \n", + "1 0 2 37.0 NaN \n", + "2 0 3 35.0 NaN \n", + "3 0 1998 28.0 http://i.stack.imgur.com/d1oHX.jpg \n", + "4 5 54503 35.0 NaN " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users_table.rename(columns={'Id': 'userId'}, inplace=True)\n", + "users_table.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 4. Import posts table:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdPostTypeIdAcceptedAnswerIdCreaionDateScoreViewCountBodyOwnerUserIdLasActivityDateTitle...AnswerCountCommentCountFavoriteCountLastEditorUserIdLastEditDateCommunityOwnedDateParentIdClosedDateOwnerDisplayNameLastEditorDisplayName
01115.02010-07-19 19:12:12231278.0<p>How should I elicit prior distributions fro...8.02010-09-15 21:08:26Eliciting priors from experts...5.0114.0NaNNaNNaNNaNNaNNaNNaN
12159.02010-07-19 19:12:57228198.0<p>In many different statistical methods there...24.02012-11-12 09:21:54What is normality?...7.018.088.02010-08-07 17:56:44NaNNaNNaNNaNNaN
2315.02010-07-19 19:13:28543613.0<p>What are some valuable Statistical Analysis...18.02013-05-27 14:48:36What are some valuable Statistical Analysis op......19.0436.0183.02011-02-12 05:50:032010-07-19 19:13:28NaNNaNNaNNaN
341135.02010-07-19 19:13:31135224.0<p>I have two groups of data. Each with a dif...23.02010-09-08 03:00:19Assessing the significance of differences in d......5.022.0NaNNaNNaNNaNNaNNaNNaN
452NaN2010-07-19 19:14:4381NaN<p>The R-project</p>\\n\\n<p><a href=\"http://www...23.02010-07-19 19:21:15NaN...NaN3NaN23.02010-07-19 19:21:152010-07-19 19:14:433.0NaNNaNNaN
\n", + "

5 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " Id PostTypeId AcceptedAnswerId CreaionDate Score ViewCount \\\n", + "0 1 1 15.0 2010-07-19 19:12:12 23 1278.0 \n", + "1 2 1 59.0 2010-07-19 19:12:57 22 8198.0 \n", + "2 3 1 5.0 2010-07-19 19:13:28 54 3613.0 \n", + "3 4 1 135.0 2010-07-19 19:13:31 13 5224.0 \n", + "4 5 2 NaN 2010-07-19 19:14:43 81 NaN \n", + "\n", + " Body OwnerUserId \\\n", + "0

How should I elicit prior distributions fro... 8.0 \n", + "1

In many different statistical methods there... 24.0 \n", + "2

What are some valuable Statistical Analysis... 18.0 \n", + "3

I have two groups of data. Each with a dif... 23.0 \n", + "4

The R-project

\\n\\n

\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postIdPostTypeIdAcceptedAnswerIdCreaionDateScoreViewCountBodyuserIdLasActivityDateTitle...AnswerCountCommentCountFavoriteCountLastEditorUserIdLastEditDateCommunityOwnedDateParentIdClosedDateOwnerDisplayNameLastEditorDisplayName
01115.02010-07-19 19:12:12231278.0<p>How should I elicit prior distributions fro...8.02010-09-15 21:08:26Eliciting priors from experts...5.0114.0NaNNaNNaNNaNNaNNaNNaN
12159.02010-07-19 19:12:57228198.0<p>In many different statistical methods there...24.02012-11-12 09:21:54What is normality?...7.018.088.02010-08-07 17:56:44NaNNaNNaNNaNNaN
2315.02010-07-19 19:13:28543613.0<p>What are some valuable Statistical Analysis...18.02013-05-27 14:48:36What are some valuable Statistical Analysis op......19.0436.0183.02011-02-12 05:50:032010-07-19 19:13:28NaNNaNNaNNaN
341135.02010-07-19 19:13:31135224.0<p>I have two groups of data. Each with a dif...23.02010-09-08 03:00:19Assessing the significance of differences in d......5.022.0NaNNaNNaNNaNNaNNaNNaN
452NaN2010-07-19 19:14:4381NaN<p>The R-project</p>\\n\\n<p><a href=\"http://www...23.02010-07-19 19:21:15NaN...NaN3NaN23.02010-07-19 19:21:152010-07-19 19:14:433.0NaNNaNNaN
\n", + "

5 rows × 21 columns

\n", + "" + ], + "text/plain": [ + " postId PostTypeId AcceptedAnswerId CreaionDate Score \\\n", + "0 1 1 15.0 2010-07-19 19:12:12 23 \n", + "1 2 1 59.0 2010-07-19 19:12:57 22 \n", + "2 3 1 5.0 2010-07-19 19:13:28 54 \n", + "3 4 1 135.0 2010-07-19 19:13:31 13 \n", + "4 5 2 NaN 2010-07-19 19:14:43 81 \n", + "\n", + " ViewCount Body userId \\\n", + "0 1278.0

How should I elicit prior distributions fro... 8.0 \n", + "1 8198.0

In many different statistical methods there... 24.0 \n", + "2 3613.0

What are some valuable Statistical Analysis... 18.0 \n", + "3 5224.0

I have two groups of data. Each with a dif... 23.0 \n", + "4 NaN

The R-project

\\n\\n

\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userIdReputationViewsUpVotesDownVotes
0-11050071920
121012530
2310122190
341011100
45679211456625
..................
40320557431000
40321557446100
4032255745101000
4032355746106100
40324557471000
\n", + "

40325 rows × 5 columns

\n", + "" + ], + "text/plain": [ + " userId Reputation Views UpVotes DownVotes\n", + "0 -1 1 0 5007 1920\n", + "1 2 101 25 3 0\n", + "2 3 101 22 19 0\n", + "3 4 101 11 0 0\n", + "4 5 6792 1145 662 5\n", + "... ... ... ... ... ...\n", + "40320 55743 1 0 0 0\n", + "40321 55744 6 1 0 0\n", + "40322 55745 101 0 0 0\n", + "40323 55746 106 1 0 0\n", + "40324 55747 1 0 0 0\n", + "\n", + "[40325 rows x 5 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users_table_new = users_table[['userId', 'Reputation', 'Views' ,'UpVotes', 'DownVotes']]\n", + "users_table_new" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
postIdScoreuserIdViewCountCommentCount
01238.01278.01
122224.08198.01
235418.03613.04
341323.05224.02
458123.0NaN3
..................
3999548321019966.0NaN0
39996483223892.0NaN2
399974832312020.0NaN0
3999848324319914.0NaN0
3999948325-119968.0116.04
\n", + "

40000 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " postId Score userId ViewCount CommentCount\n", + "0 1 23 8.0 1278.0 1\n", + "1 2 22 24.0 8198.0 1\n", + "2 3 54 18.0 3613.0 4\n", + "3 4 13 23.0 5224.0 2\n", + "4 5 81 23.0 NaN 3\n", + "... ... ... ... ... ...\n", + "39995 48321 0 19966.0 NaN 0\n", + "39996 48322 3 892.0 NaN 2\n", + "39997 48323 1 2020.0 NaN 0\n", + "39998 48324 3 19914.0 NaN 0\n", + "39999 48325 -1 19968.0 116.0 4\n", + "\n", + "[40000 rows x 5 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "posts_table_new = posts_table[['postId', 'Score', 'userId' ,'ViewCount', 'CommentCount']]\n", + "posts_table_new" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 7. Merge both dataframes, users and posts. \n", + "You will need to make a [merge](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html) of posts and users dataframes." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userIdReputationViewsUpVotesDownVotespostIdScoreViewCountCommentCount
0-1105007192021750NaN0
1-1105007192085760NaN0
2-1105007192085780NaN0
3-1105007192089810NaN0
4-1105007192089820NaN0
..............................
389574593411100340031115.02
389584619236100406675326.02
389594652223513271174613166.00
389605237122120027237243357.05
3896155226119230161761NaN0
\n", + "

38962 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " userId Reputation Views UpVotes DownVotes postId Score \\\n", + "0 -1 1 0 5007 1920 2175 0 \n", + "1 -1 1 0 5007 1920 8576 0 \n", + "2 -1 1 0 5007 1920 8578 0 \n", + "3 -1 1 0 5007 1920 8981 0 \n", + "4 -1 1 0 5007 1920 8982 0 \n", + "... ... ... ... ... ... ... ... \n", + "38957 45934 11 1 0 0 34003 1 \n", + "38958 46192 36 1 0 0 40667 5 \n", + "38959 46522 235 13 27 1 17461 3 \n", + "38960 52371 221 2 0 0 27237 24 \n", + "38961 55226 119 2 3 0 16176 1 \n", + "\n", + " ViewCount CommentCount \n", + "0 NaN 0 \n", + "1 NaN 0 \n", + "2 NaN 0 \n", + "3 NaN 0 \n", + "4 NaN 0 \n", + "... ... ... \n", + "38957 115.0 2 \n", + "38958 326.0 2 \n", + "38959 166.0 0 \n", + "38960 3357.0 5 \n", + "38961 NaN 0 \n", + "\n", + "[38962 rows x 9 columns]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df = users_table_new.merge(posts_table_new,\n", + " how = 'inner',\n", + " on='userId')\n", + "\n", + "merged_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 8. How many missing values do you have in your merged dataframe? On which columns?" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "userId 0\n", + "Reputation 0\n", + "Views 0\n", + "UpVotes 0\n", + "DownVotes 0\n", + "postId 0\n", + "Score 0\n", + "ViewCount 23572\n", + "CommentCount 0\n", + "dtype: int64" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "missing_values = merged_df.isnull().sum()\n", + "missing_values" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "userId 38962\n", + "Reputation 38962\n", + "Views 38962\n", + "UpVotes 38962\n", + "DownVotes 38962\n", + "postId 38962\n", + "Score 38962\n", + "ViewCount 15390\n", + "CommentCount 38962\n", + "dtype: int64" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df.count()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ViewCount has 60.50 % of missing values\n" + ] + } + ], + "source": [ + "print(f\"ViewCount has {missing_values['ViewCount']/(missing_values['ViewCount'] + merged_df.count()['ViewCount'])*100:.2f} % of missing values\")" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userIdReputationViewsUpVotesDownVotespostIdScoreViewCountCommentCount
count38962.00000038962.00000038962.00000038962.00000038962.00000038962.00000038962.00000015390.00000038962.000000
mean6079.0630877281.0916791400.648016914.79967743.84105022960.7996514.0830811196.2259912.014630
std5224.89643515164.5277143423.8868872296.527060161.79707913696.9324716.5618433742.9528152.674018
min-1.0000001.0000000.0000000.0000000.0000001.000000-19.00000014.0000000.000000
25%1317.000000147.00000016.0000004.0000000.00000011325.2500001.000000173.0000000.000000
50%4856.000000909.000000124.00000065.0000001.00000022373.5000002.000000385.0000001.000000
75%9651.0000007931.0000001050.000000582.00000016.00000033688.5000005.000000967.0000003.000000
max55226.00000087393.00000020932.00000011442.0000001920.00000048325.000000192.000000175495.00000045.000000
\n", + "
" + ], + "text/plain": [ + " userId Reputation Views UpVotes DownVotes \\\n", + "count 38962.000000 38962.000000 38962.000000 38962.000000 38962.000000 \n", + "mean 6079.063087 7281.091679 1400.648016 914.799677 43.841050 \n", + "std 5224.896435 15164.527714 3423.886887 2296.527060 161.797079 \n", + "min -1.000000 1.000000 0.000000 0.000000 0.000000 \n", + "25% 1317.000000 147.000000 16.000000 4.000000 0.000000 \n", + "50% 4856.000000 909.000000 124.000000 65.000000 1.000000 \n", + "75% 9651.000000 7931.000000 1050.000000 582.000000 16.000000 \n", + "max 55226.000000 87393.000000 20932.000000 11442.000000 1920.000000 \n", + "\n", + " postId Score ViewCount CommentCount \n", + "count 38962.000000 38962.000000 15390.000000 38962.000000 \n", + "mean 22960.799651 4.083081 1196.225991 2.014630 \n", + "std 13696.932471 6.561843 3742.952815 2.674018 \n", + "min 1.000000 -19.000000 14.000000 0.000000 \n", + "25% 11325.250000 1.000000 173.000000 0.000000 \n", + "50% 22373.500000 2.000000 385.000000 1.000000 \n", + "75% 33688.500000 5.000000 967.000000 3.000000 \n", + "max 48325.000000 192.000000 175495.000000 45.000000 " + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkUAAAHHCAYAAACx7iyPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAABCr0lEQVR4nO3de3zP9f//8fvb7GhmZjNWY8ghhwijk1M5S3SQUI2EakKjg0/fj0MHlEKfWtHnE6tPiXSgPqIYsqgQI5bzItEQM5vYe9vz94fL3j9v22x7e2/vbe/b9XLZhffz9Xy/Xo/n8/3C3ev0thhjjAAAANxcJVcXAAAAUBYQigAAAEQoAgAAkEQoAgAAkEQoAgAAkEQoAgAAkEQoAgAAkEQoAgAAkEQoAgAAkEQoApwuIiJCQ4cOdXUZFd7MmTNVv359eXh4qFWrVk5Z59ChQxUREeGUdSF//PlAWUYoAq4gLi5OFotFW7ZsyXd5586d1bx586veztdff60pU6Zc9XrcxbfffqtnnnlGt956qxYsWKBp06bl6WO1WhUcHKzbbrutwPUYYxQeHq7WrVuXZLmFSktL09SpU9WyZUv5+/vL19dXzZs317PPPqujR4+6tLZcrt5Hjx49qilTpigxMdFlNaDiq+zqAoCKZs+ePapUqXj/3/j6668VGxtLMCqiNWvWqFKlSnrvvffk5eWVbx9PT08NGDBA8+bN06FDh1S3bt08fdavX68jR47oqaeekiT9+9//Vk5OTonWfrmDBw+qa9euOnz4sAYMGKCRI0fKy8tLO3bs0HvvvacvvvhCe/fuLdWa8uPqffTo0aOaOnWqIiIinHZkELgcoQhwMm9vb1eXUGwZGRmqUqWKq8sosuPHj8vX17fAQJRryJAhmjt3rj7++GM999xzeZYvXLhQlSpV0gMPPCDpYpAqTVlZWbrnnnuUkpKidevW5Tmq9fLLL+uVV14p1ZoAd8bpM8DJLr9mwmq1aurUqWrYsKF8fHxUo0YN3XbbbVq1apWki9exxMbGSpIsFovtJ1dGRobGjx+v8PBweXt7q3HjxnrttddkjLHb7t9//60xY8YoODhYVatW1V133aU//vhDFovF7n/3U6ZMkcViUVJSkgYPHqzq1avb/jHesWOHhg4dqvr168vHx0e1atXSI488or/++stuW7nr2Lt3rx588EFVq1ZNISEh+uc//yljjH7//Xf169dPAQEBqlWrll5//fUizV1WVpZefPFFNWjQQN7e3oqIiNA//vEPXbhwwdbHYrFowYIFysjIsM1VXFxcvuu79dZbFRERoYULF+ZZZrVa9emnn6pLly4KCwuzfRaXX1OUk5OjOXPmqFmzZvLx8VFoaKhGjRql06dP2/rExMSoRo0adp/Jk08+KYvFon/961+2tpSUFFksFr3zzjuSpM8++0zbt2/X888/n+9pvoCAAL388st2bUuWLFGbNm3k6+ur4OBgPfjgg/rjjz/s+nTu3FmdO3fOs77Lx/fbb7/JYrHotdde07vvvmub98jISG3evNnufVfaR/NjjNFLL72ka6+9Vn5+furSpYt27dqVp9+pU6c0YcIEtWjRQv7+/goICFCvXr20fft2W59169YpMjJSkjRs2LA8n3tCQoIGDBigOnXqyNvbW+Hh4Xrqqaf0999/X7FG4HIcKQKK4MyZMzp58mSedqvVWuh7p0yZounTp+vRRx9Vu3btlJaWpi1btmjr1q3q1q2bRo0apaNHj2rVqlX673//a/deY4zuuusurV27VsOHD1erVq30zTff6Omnn9Yff/yh2bNn2/oOHTpUn3zyiR566CHddNNN+u6779SnT58C6xowYIAaNmyoadOm2f4xX7VqlQ4ePKhhw4apVq1a2rVrl959913t2rVLP/74Y55/CAcOHKjrr79eM2bM0PLly/XSSy8pKChI8+bN0+23365XXnlFH330kSZMmKDIyEh17NjxinP16KOP6v3339d9992n8ePH66efftL06dP166+/6osvvpAk/fe//9W7776rTZs26T//+Y8k6ZZbbsl3fRaLRYMHD9a0adO0a9cuNWvWzLZs5cqVOnXqlIYMGXLFmkaNGqW4uDgNGzZMY8aMUXJyst566y1t27ZNGzZskKenpzp06KDZs2dr165dtmvMEhISVKlSJSUkJGjMmDG2Nkm2efjyyy8lSQ899NAVa8iVW0dkZKSmT5+ulJQUvfHGG9qwYYO2bdumwMDAIq3ncgsXLtTZs2c1atQoWSwWvfrqq7rnnnt08OBBeXp6XnEfLcikSZP00ksvqXfv3urdu7e2bt2q7t27KzMz067fwYMHtXTpUg0YMED16tVTSkqK5s2bp06dOikpKUlhYWG6/vrr9cILL2jSpEkaOXKkOnToIOn/f+5LlizRuXPn9Pjjj6tGjRratGmT3nzzTR05ckRLlixxaE7gpgyAAi1YsMBIuuJPs2bN7N5Tt25dExUVZXvdsmVL06dPnytuJzo62uT3x3Hp0qVGknnppZfs2u+77z5jsVjM/v37jTHG/Pzzz0aSGTdunF2/oUOHGklm8uTJtrbJkycbSWbQoEF5tnfu3Lk8bR9//LGRZNavX59nHSNHjrS1ZWVlmWuvvdZYLBYzY8YMW/vp06eNr6+v3ZzkJzEx0Ugyjz76qF37hAkTjCSzZs0aW1tUVJSpUqXKFdeXa9euXUaSmThxol37Aw88YHx8fMyZM2fs1lu3bl3b64SEBCPJfPTRR3bvXblypV378ePHjSTz9ttvG2OMSU1NNZUqVTIDBgwwoaGhtveNGTPGBAUFmZycHGOMMTfeeKOpVq1akcaRmZlpatasaZo3b27+/vtvW/v//vc/I8lMmjTJ1tapUyfTqVOnPOu4fHzJyclGkqlRo4Y5deqUrX3ZsmVGkvnqq69sbQXto/k5fvy48fLyMn369LGN1Rhj/vGPfxhJdvvC+fPnTXZ2tt37k5OTjbe3t3nhhRdsbZs3bzaSzIIFC/JsL7/9dvr06cZisZhDhw4VqWbAGGM4fQYUQWxsrFatWpXn54Ybbij0vYGBgdq1a5f27dtX7O1+/fXX8vDwsB1pyDV+/HgZY7RixQpJF496SNITTzxh1+/JJ58scN2PPfZYnjZfX1/b78+fP6+TJ0/qpptukiRt3bo1T/9HH33U9nsPDw+1bdtWxhgNHz7c1h4YGKjGjRvr4MGDBdYiXRyrdPFU1KXGjx8vSVq+fPkV31+Qpk2b6sYbb9SiRYtsbRkZGfryyy915513KiAgoMD3LlmyRNWqVVO3bt108uRJ20+bNm3k7++vtWvXSpJCQkLUpEkTrV+/XpK0YcMGeXh46Omnn1ZKSorts09ISNBtt91mO+KWlpamqlWrFmkcW7Zs0fHjx/XEE0/Ix8fH1t6nTx81adLE4fmRLh7xq169uu117pGYwj6zgqxevVqZmZm2U4i5xo0bl6evt7e37caE7Oxs/fXXX/L391fjxo3z3efyc+l+m5GRoZMnT+qWW26RMUbbtm1zaAxwT4QioAjatWunrl275vm59B+SgrzwwgtKTU1Vo0aN1KJFCz399NPasWNHkbZ76NAhhYWF5fmH8/rrr7ctz/21UqVKqlevnl2/6667rsB1X95Xunh9x9ixYxUaGipfX1+FhITY+p05cyZP/zp16ti9rlatmnx8fBQcHJyn/dJrcPKTO4bLa65Vq5YCAwNtY3XEkCFDlJycrI0bN0qSli5dqnPnzhV66mzfvn06c+aMatasqZCQELuf9PR0HT9+3Na3Q4cOttNjCQkJatu2rdq2baugoCAlJCQoLS1N27dvtwUO6eI1Q2fPni3SGHLH37hx4zzLmjRpclXzc/nnmLtfF/aZFSS3loYNG9q1h4SE5Pkzk5OTo9mzZ6thw4by9vZWcHCwQkJCtGPHjnz3ufwcPnxYQ4cOVVBQkPz9/RUSEqJOnTpJyn+/BQrCNUVACevYsaMOHDigZcuW6dtvv9V//vMfzZ49W3PnzrU70lLaLv3fda77779fGzdu1NNPP61WrVrJ399fOTk56tmzZ763qnt4eBSpTVKeC8MLUtgFvI4YNGiQnnnmGS1cuFC33HKLFi5cqOrVq6t3795XfF9OTo5q1qypjz76KN/lISEhtt/fdttt+ve//62DBw8qISFBHTp0kMVi0W233aaEhASFhYUpJyfHLhQ1adJE27Zt0++//67w8HDnDFYX5zC/+c7Ozs63/9V+Zldj2rRp+uc//6lHHnlEL774ooKCglSpUiWNGzeuSI9HyM7OVrdu3XTq1Ck9++yzatKkiapUqaI//vhDQ4cOLfVHLKB8IxQBpSAoKEjDhg3TsGHDlJ6ero4dO2rKlCm2UFRQEKhbt65Wr16ts2fP2h0t2r17t2157q85OTlKTk62+9/5/v37i1zj6dOnFR8fr6lTp2rSpEm2dkdO+zkidwz79u2zHQmTLt6xlZqamu9zhooqLCxMXbp00ZIlS/TPf/5Tq1at0tChQwu9pb9BgwZavXq1br311nxD5KVyw86qVau0efNm2yMAOnbsqHfeeUdhYWGqUqWK2rRpY3tP37599fHHH+vDDz/UxIkTr7j+3PHv2bNHt99+u92yPXv22M1P9erV8z31dTVHk4oTVnNr2bdvn+rXr29rP3HiRJ6jT7l3AL733nt27ampqXZHHAva/i+//KK9e/fq/fff18MPP2xrz727EygOTp8BJezy29n9/f113XXX2d1mnvuMoNTUVLu+vXv3VnZ2tt566y279tmzZ8tisahXr16SpB49ekiS3n77bbt+b775ZpHrzD1acPnRgTlz5hR5HVcj96jN5dubNWuWJF3xTrqiGDJkiI4fP65Ro0bJarUWeupMunjkLDs7Wy+++GKeZVlZWXafV7169XTNNddo9uzZslqtuvXWWyVdDEsHDhzQp59+qptuukmVK////4ved999atGihV5++WX98MMPebZx9uxZPf/885Kktm3bqmbNmpo7d67dvrNixQr9+uuvdvPToEED7d69WydOnLC1bd++XRs2bCh0zAUpaB/NT9euXeXp6ak333zTbn/Kb1/y8PDIs88tWbIkz2MGCtp+fvutMUZvvPFGoXUCl+NIEVDCmjZtqs6dO6tNmzYKCgrSli1b9Omnn2r06NG2PrlHD8aMGaMePXrIw8NDDzzwgPr27asuXbro+eef12+//aaWLVvq22+/1bJlyzRu3Dg1aNDA9v57771Xc+bM0V9//WW7JT/3SchF+V9+QECAOnbsqFdffVVWq1XXXHONvv32WyUnJ5fArOTVsmVLRUVF6d1331Vqaqo6deqkTZs26f3331f//v3VpUuXq1r/vffeqyeeeELLli1TeHh4oY8HkKROnTpp1KhRmj59uhITE9W9e3d5enpq3759WrJkid544w3dd999tv4dOnTQokWL1KJFC9u1M61bt1aVKlW0d+9eDR482G79np6e+vzzz9W1a1d17NhR999/v2699VZ5enpq165dttN8L7/8sjw9PfXKK69o2LBh6tSpkwYNGmS7JT8iIsL2VG5JeuSRRzRr1iz16NFDw4cP1/HjxzV37lw1a9ZMaWlpDs1fQftofkJCQjRhwgRNnz5dd955p3r37q1t27ZpxYoVea43u/POO/XCCy9o2LBhuuWWW/TLL7/oo48+sjvCJF0MeoGBgZo7d66qVq2qKlWqqH379mrSpIkaNGigCRMm6I8//lBAQIA+++wzh6+Hgptz0V1vQLmQe0v+5s2b813eqVOnQm/Jf+mll0y7du1MYGCg8fX1NU2aNDEvv/yyyczMtPXJysoyTz75pAkJCTEWi8Xu1uezZ8+ap556yoSFhRlPT0/TsGFDM3PmTLtbnY0xJiMjw0RHR5ugoCDj7+9v+vfvb/bs2WMk2d0in3s7/YkTJ/KM58iRI+buu+82gYGBplq1ambAgAHm6NGjBd7Wf/k6CrpVPr95yo/VajVTp0419erVM56eniY8PNxMnDjRnD9/vkjbKcyAAQOMJPPMM8/ku/zyW9Zzvfvuu6ZNmzbG19fXVK1a1bRo0cI888wz5ujRo3b9YmNjjSTz+OOP27V37drVSDLx8fH5bvf06dNm0qRJpkWLFsbPz8/4+PiY5s2bm4kTJ5pjx47Z9V28eLG58cYbjbe3twkKCjJDhgwxR44cybPODz/80NSvX994eXmZVq1amW+++abAW/JnzpyZ5/2Xf+ZX2kfzk52dbaZOnWpq165tfH19TefOnc3OnTvz/Pk4f/68GT9+vK3frbfean744Yd8HyuwbNky07RpU1O5cmW72/OTkpJM165djb+/vwkODjYjRoww27dvL/AWfqAgFmNK4Uo6AC6RmJioG2+8UR9++GGRThcBgDvjmiKggsjvKw3mzJmjSpUqFelUEQC4O64pAiqIV199VT///LO6dOmiypUra8WKFVqxYoVGjhzp1Nu9AaCi4vQZUEGsWrVKU6dOVVJSktLT01WnTh099NBDev755+3ueAIA5I9QBAAAIK4pAgAAkEQoAgAAkMSF1oXKycnR0aNHVbVq1RL5TiYAAOB8xhidPXtWYWFhqlSpaMeACEWFOHr0KHfuAABQTv3++++69tpri9SXUFSI3C/h/P333xUQEOC09VqtVn377be2rw1wR+4+B4yf8TN+9x2/xByU9PjT0tIUHh5u92XahSEUFSL3lFlAQIDTQ5Gfn58CAgLc8g+DxBwwfsbP+N13/BJzUFrjL86lL1xoDQAAIEJRgWJjY9W0aVNFRka6uhQAAFAKCEUFiI6OVlJSkjZv3uzqUgAAQCkgFAEAAIhQBAAAIIlQBAAAIIlQBAAAIIlQVCDuPgMAwL0QigrA3WcAALgXQhEAAIAIRQAAAJIIRQAAAJIIRQAAAJIIRQXi7jMAANxLZVcXUFZFR0crOjpaaWlpqlatWoltZ9yKccqyZNlez+s7r8S2BQAACsaRIgAAABGKAAAAJBGKAAAAJBGKAAAAJBGKAAAAJBGKAAAAJBGKCsRzigAAcC+EogJER0crKSlJmzdvdnUpAACgFBCKAAAARCgCAACQRCgCAACQRCgCAACQRCgCAACQRCgCAACQRCgCAACQRCgCAACQRCgqEE+0BgDAvRCKCsATrQEAcC+EIgAAABGKAAAAJBGKAAAAJBGKAAAAJBGKAAAAJBGKAAAAJBGKAAAAJBGKAAAAJBGKAAAAJBGKAAAAJBGKAAAAJBGKAAAAJBGKAAAAJBGKChQbG6umTZsqMjLS1aUAAIBSQCgqQHR0tJKSkrR582ZXlwIAAEoBoQgAAECEIgAAAEmEIgAAAEmEIgAAAEmEIgAAAEmEIgAAAEmEIgAAAEmEIgAAAEmEIgAAAEmEIgAAAEmEIgAAAEmEIgAAAEmEIgAAAEmEIgAAAEmEIgAAAEmEIgAAAEluFIrOnTununXrasKECa4uBQAAlEFuE4pefvll3XTTTa4uAwAAlFFuEYr27dun3bt3q1evXq4uBQAAlFEuD0Xr169X3759FRYWJovFoqVLl+bpExsbq4iICPn4+Kh9+/batGlTsbYxYcIETZ8+3UkVAwCAisjloSgjI0MtW7ZUbGxsvssXL16smJgYTZ48WVu3blXLli3Vo0cPHT9+3NanVatWat68eZ6fo0ePatmyZWrUqJEaNWpUWkMCAADlUGVXF9CrV68rntaaNWuWRowYoWHDhkmS5s6dq+XLl2v+/Pl67rnnJEmJiYkFvv/HH3/UokWLtGTJEqWnp8tqtSogIECTJk3Kt/+FCxd04cIF2+u0tDRJktVqldVqLe7wCpS7Lg/jkW+7O8gdqzuN+VKMn/Ff+qu7cffxS8xBSY/fkfVajDGmBGpxiMVi0RdffKH+/ftLkjIzM+Xn56dPP/3U1iZJUVFRSk1N1bJly4q1/ri4OO3cuVOvvfZagX2mTJmiqVOn5mlfuHCh/Pz8irU9AADgGufOndPgwYN15swZBQQEFOk9Lj9SdCUnT55Udna2QkND7dpDQ0O1e/fuEtnmxIkTFRMTY3udlpam8PBwde/evciTWhRWq1WrVq1SvOKVbcm2tc/pNcdp2yjrcuegW7du8vT0dHU5pY7xM37G777jl5iDkh5/7pme4ijTocjZhg4dWmgfb29veXt752n39PQskQ8t25KtLEuW3XbcTUnNbXnB+Bk/43ff8UvMQUmN35F1uvxC6ysJDg6Wh4eHUlJS7NpTUlJUq1YtF1UFAAAqojIdiry8vNSmTRvFx8fb2nJychQfH6+bb765RLcdGxurpk2bKjIyskS3AwAAygaXnz5LT0/X/v37ba+Tk5OVmJiooKAg1alTRzExMYqKilLbtm3Vrl07zZkzRxkZGba70UpKdHS0oqOjlZaWpmrVqpXotgAAgOu5PBRt2bJFXbp0sb3Ovcg5KipKcXFxGjhwoE6cOKFJkybpzz//VKtWrbRy5co8F18DAABcDZeHos6dO6uwpwKMHj1ao0ePLqWKAACAOyrT1xS5EtcUAQDgXghFBYiOjlZSUpI2b97s6lIAAEApIBQBAACIUAQAACCJUAQAACCJUFQgLrQGAMC9EIoKwIXWAAC4F0IRAACACEUAAACSCEUAAACSCEUF4kJrAADcC6GoAFxoDQCAeyEUAQAAiFAEAAAgiVAEAAAgiVAEAAAgiVAEAAAgiVBUIG7JBwDAvRCKCsAt+QAAuBdCEQAAgKTKri4A9kZ9Ncru9by+81xUCQAA7oUjRQAAACIUAQAASCIUAQAASCIUAQAASCIUFYjnFAEA4F4IRQXgOUUAALgXQhEAAIAIRQAAAJIIRQAAAJIIRQAAAJIIRQAAAJIIRQAAAJIIRQAAAJIIRQAAAJIIRQXiidYAALgXQlEBeKI1AADuhVAEAAAgQhEAAIAkQhEAAIAkQhEAAIAkQhEAAIAkQhEAAIAkQhEAAIAkQhEAAIAkQhEAAIAkQhEAAIAkQhEAAIAkQhEAAIAkQlGBYmNj1bRpU0VGRrq6FAAAUAoIRQWIjo5WUlKSNm/e7OpSAABAKSAUAQAAiFAEAAAgiVAEAAAgiVAEAAAgiVAEAAAgiVAEAAAgiVAEAAAgSars6gJwZaO+GpWnbV7feS6oBACAio0jRQAAACIUAQAASCIUAQAASCIUAQAASCIUAQAASCIUAQAASCIUAQAASCIUAQAASCIUAQAASHKTJ1pHREQoICBAlSpVUvXq1bV27VpXlwQAAMoYtwhFkrRx40b5+/u7ugwAAFBGcfoMAABADoaigwcPOq2A9evXq2/fvgoLC5PFYtHSpUvz9ImNjVVERIR8fHzUvn17bdq0qVjbsFgs6tSpkyIjI/XRRx85qXIAAFCROBSKrrvuOnXp0kUffvihzp8/f1UFZGRkqGXLloqNjc13+eLFixUTE6PJkydr69atatmypXr06KHjx4/b+rRq1UrNmzfP83P06FFJ0vfff6+ff/5ZX375paZNm6YdO3ZcVc0AAKDiceiaoq1bt2rBggWKiYnR6NGjNXDgQA0fPlzt2rUr9rp69eqlXr16Fbh81qxZGjFihIYNGyZJmjt3rpYvX6758+frueeekyQlJiZecRvXXHONJKl27drq3bu3tm7dqhtuuCHfvhcuXNCFCxdsr9PS0iRJVqtVVqu1yOMqTO66PIyHw+8t73LHUVHGU1yMn/Ff+qu7cffxS8xBSY/fkfVajDHG0Q1mZWXpyy+/VFxcnFauXKlGjRrpkUce0UMPPaSQkJDiF2Ox6IsvvlD//v0lSZmZmfLz89Onn35qa5OkqKgopaamatmyZYWuMyMjQzk5OapatarS09PVqVMnzZ07V5GRkfn2nzJliqZOnZqnfeHChfLz8yv2mAAAQOk7d+6cBg8erDNnziggIKBI77mqUJTrwoULevvttzVx4kRlZmbKy8tL999/v1555RXVrl27yOu5PBQdPXpU11xzjTZu3Kibb77Z1u+ZZ57Rd999p59++qnQdR48eFB33323JCk7O1sjRozQ2LFjrziWy48UhYeH6+TJk0We1KKwWq1atWqV4hWvbEt2sd47p9ccp9XhSrlz0K1bN3l6erq6nFLH+Bk/43ff8UvMQUmPPy0tTcHBwcUKRVd1S/6WLVs0f/58LVq0SFWqVNGECRM0fPhwHTlyRFOnTlW/fv2KfVG0s9WvX1/bt28vcn9vb295e3vnaff09CyRDy3bkq0sS1ax3lPR/vCU1NyWF4yf8TN+9x2/xByU1PgdWadDoWjWrFlasGCB9uzZo969e+uDDz5Q7969VanSxeu269Wrp7i4OEVERDiyepvg4GB5eHgoJSXFrj0lJUW1atW6qnUXJjY2VrGxscrOLt5RHAAAUD45dPfZO++8o8GDB+vQoUNaunSp7rzzTlsgylWzZk299957V1Wcl5eX2rRpo/j4eFtbTk6O4uPj7U6nlYTo6GglJSVp8+bNJbodAABQNjh0pGjfvn2F9vHy8lJUVFSh/dLT07V//37b6+TkZCUmJiooKEh16tRRTEyMoqKi1LZtW7Vr105z5sxRRkaG7W40AAAAZ3AoFC1YsED+/v4aMGCAXfuSJUt07ty5IoWhXFu2bFGXLl1sr2NiYiRdvMMsLi5OAwcO1IkTJzRp0iT9+eefatWqlVauXKnQ0FBHSgcAAMiXQ6fPpk+fruDg4DztNWvW1LRp04q1rs6dO8sYk+cnLi7O1mf06NE6dOiQLly4oJ9++knt27d3pGwAAIACORSKDh8+rHr16uVpr1u3rg4fPnzVRZUFsbGxatq0aYHPMwIAABWLQ6GoZs2a+X5Vxvbt21WjRo2rLqos4EJrAADci0OhaNCgQRozZozWrl2r7OxsZWdna82aNRo7dqweeOABZ9cIAABQ4hy60PrFF1/Ub7/9pjvuuEOVK19cRU5Ojh5++OFiX1MEAABQFjgUiry8vLR48WK9+OKL2r59u3x9fdWiRQvVrVvX2fW5DA9vBADAvVzV13w0atRIjRo1clYtZUp0dLSio6OVlpamatWqubocAABQwhwKRdnZ2YqLi1N8fLyOHz+unJwcu+Vr1qxxSnEAAAClxaFQNHbsWMXFxalPnz5q3ry5LBaLs+sCAAAoVQ6FokWLFumTTz5R7969nV0PAACASzh8ofV1113n7FpQRKO+GmX3el7feS6qBACAisOh5xSNHz9eb7zxhowxzq6nzOCJ1gAAuBeHjhR9//33Wrt2rVasWKFmzZrJ09PTbvnnn3/ulOJcibvPAABwLw6FosDAQN19993OrgUAAMBlHApFCxYscHYdAAAALuXQNUWSlJWVpdWrV2vevHk6e/asJOno0aNKT093WnEAAAClxaEjRYcOHVLPnj11+PBhXbhwQd26dVPVqlX1yiuv6MKFC5o7d66z6wQAAChRDh0pGjt2rNq2bavTp0/L19fX1n733XcrPj7eacW5EnefAQDgXhw6UpSQkKCNGzfKy8vLrj0iIkJ//PGHUwpzNe4+AwDAvTgUinJycvL99vgjR46oatWqV10UiufyhzlKPNARAIDicuj0Wffu3TVnzhzba4vFovT0dE2ePJmv/gAAAOWSQ0eKXn/9dfXo0UNNmzbV+fPnNXjwYO3bt0/BwcH6+OOPnV0jAABAiXMoFF177bXavn27Fi1apB07dig9PV3Dhw/XkCFD7C68BgAAKC8cCkWSVLlyZT344IPOrAUAAMBlHApFH3zwwRWXP/zwww4VAwAA4CoOhaKxY8favbZarTp37py8vLzk5+dXIUJRbGysYmNj873LDgAAVDwO3X12+vRpu5/09HTt2bNHt912W4W50Do6OlpJSUnavHmzq0sBAAClwOHvPrtcw4YNNWPGjDxHkQAAAMoDp4Ui6eLF10ePHnXmKgEAAEqFQ9cUffnll3avjTE6duyY3nrrLd16661OKQwAAKA0ORSK+vfvb/faYrEoJCREt99+u15//XVn1AUAAFCqHP7uMwAAgIrEqdcUAQAAlFcOHSmKiYkpct9Zs2Y5sgkAAIBS5VAo2rZtm7Zt2yar1arGjRtLkvbu3SsPDw+1bt3a1s9isTinSgAAgBLmUCjq27evqlatqvfff1/Vq1eXdPGBjsOGDVOHDh00fvx4pxbpCjzRGgAA9+LQNUWvv/66pk+fbgtEklS9enW99NJLFebuM55oDQCAe3EoFKWlpenEiRN52k+cOKGzZ89edVEAAAClzaFQdPfdd2vYsGH6/PPPdeTIER05ckSfffaZhg8frnvuucfZNQIAAJQ4h64pmjt3riZMmKDBgwfLarVeXFHlyho+fLhmzpzp1AIBAABKg0OhyM/PT2+//bZmzpypAwcOSJIaNGigKlWqOLU4AACA0nJVD288duyYjh07poYNG6pKlSoyxjirLgAAgFLlUCj666+/dMcdd6hRo0bq3bu3jh07JkkaPnx4hbgdHwAAuB+HTp899dRT8vT01OHDh3X99dfb2gcOHKiYmJgKc1t+eTbqq1F2r+f1neeiSgAAKB8cCkXffvutvvnmG1177bV27Q0bNtShQ4ecUhgAAEBpcuj0WUZGhvz8/PK0nzp1St7e3lddFAAAQGlzKBR16NBBH3zwge21xWJRTk6OXn31VXXp0sVpxQEAAJQWh06fvfrqq7rjjju0ZcsWZWZm6plnntGuXbt06tQpbdiwwdk1AgAAlDiHjhQ1b95ce/fu1W233aZ+/fopIyND99xzj7Zt26YGDRo4u0YAAIASV+wjRVarVT179tTcuXP1/PPPl0RNAAAApa7YR4o8PT21Y8eOkqilTImNjVXTpk0VGRnp6lIAAEApcOj02YMPPqj33nvP2bWUKdHR0UpKStLmzZtdXQoAACgFDl1onZWVpfnz52v16tVq06ZNnu88mzVrllOKAwAAKC3FCkUHDx5URESEdu7cqdatW0uS9u7da9fHYrE4rzoAAIBSUqxQ1LBhQx07dkxr166VdPFrPf71r38pNDS0RIoDAAAoLcW6psgYY/d6xYoVysjIcGpBAAAAruDQhda5Lg9JAAAA5VWxQpHFYslzzRDXEAEAgIqgWNcUGWM0dOhQ25e+nj9/Xo899lieu88+//xz51UIAABQCooViqKiouxeP/jgg04tBgAAwFWKFYoWLFhQUnUAAAC41FVdaA0AAFBROPREa5Q/o74aladtXt95LqgEAICyiSNFAAAAIhQBAABIIhQBAABIIhQBAABIIhQBAABIcpO7z5KTk/XII48oJSVFHh4e+vHHH/M8hRvcoQYAcG9uEYqGDh2ql156SR06dNCpU6dsX1MCAACQq8KHol27dsnT01MdOnSQJAUFBbm4IgAAUBa5/Jqi9evXq2/fvgoLC5PFYtHSpUvz9ImNjVVERIR8fHzUvn17bdq0qcjr37dvn/z9/dW3b1+1bt1a06ZNc2L1AACgonD5kaKMjAy1bNlSjzzyiO655548yxcvXqyYmBjNnTtX7du315w5c9SjRw/t2bNHNWvWlCS1atVKWVlZed777bffKisrSwkJCUpMTFTNmjXVs2dPRUZGqlu3biU+NgAAUH64PBT16tVLvXr1KnD5rFmzNGLECA0bNkySNHfuXC1fvlzz58/Xc889J0lKTEws8P3XXHON2rZtq/DwcElS7969lZiYWGAounDhgi5cuGB7nZaWJkmyWq2yWq3FGtuV5K7Lw3g4bZ2O1pCrssm7OzhzzAWtuyS3UZYxfsZ/6a/uxt3HLzEHJT1+R9ZrMcaYEqjFIRaLRV988YX69+8vScrMzJSfn58+/fRTW5skRUVFKTU1VcuWLSt0nVlZWYqMjNSaNWtUrVo19evXT6NGjdKdd96Zb/8pU6Zo6tSpedoXLlwoPz8/h8YFAABK17lz5zR48GCdOXNGAQEBRXqPy48UXcnJkyeVnZ2t0NBQu/bQ0FDt3r27SOuoXLmypk2bpo4dO8oYo+7duxcYiCRp4sSJiomJsb1OS0tTeHi4unfvXuRJLQqr1apVq1YpXvHKtmQ7bb3FMafXHLvX41aMK7SPM+XOQbdu3eTp6Vli2ymrGD/jZ/zuO36JOSjp8eee6SmOMh2KnKWwU3SX8vb2zveWfU9PzxL50LIt2cqy5L0eqjRcPp786iiNP6glNbflBeNn/IzffccvMQclNX5H1lmmQ1FwcLA8PDyUkpJi156SkqJatWq5qCr3cvkDHXmYIwCgoirTocjLy0tt2rRRfHy87ZqinJwcxcfHa/To0SW67djYWMXGxio72zWntkpDfk+wBgDAXbk8FKWnp2v//v2218nJyUpMTFRQUJDq1KmjmJgYRUVFqW3btmrXrp3mzJmjjIwM291oJSU6OlrR0dFKS0tTtWrVSnRbAADA9VweirZs2aIuXbrYXude5BwVFaW4uDgNHDhQJ06c0KRJk/Tnn3+qVatWWrlyZZ6LrwEAAK6Gy0NR586dVdhTAUaPHl3ip8sAAIB7c/nXfJRVsbGxatq0qSIjI11dCgAAKAWEogJER0crKSlJmzdvdnUpAACgFBCKAAAARCgCAACQRCgCAACQRCgqEBdaAwDgXghFBeBCawAA3AuhCAAAQIQiAAAASYQiAAAASYSiAnGhNQAA7oVQVAAutAYAwL0QigAAAEQoAgAAkEQoAgAAkEQoAgAAkEQoAgAAkEQoKhC35AMA4F4IRQXglnwAANwLoQgAAECEIgAAAEmEIgAAAEmEIgAAAEmEIgAAAEmEIgAAAEmEogLxnCIAANwLoagAPKcIAAD3UtnVBcA9jPpqlN3reX3nFfs9RX0fAACO4EgRAACACEUAAACSCEUAAACSuKYIJSC/a4EAACjrOFIEAAAgQhEAAIAkQhEAAIAkrikqUGxsrGJjY5Wdne3qUiqkUV+NUmVTWd3VXeNWjFOWJYtnEAEAXIojRQXgidYAALgXQhEAAIAIRQAAAJIIRQAAAJK40BrFxIMZAQAVFUeKAAAARCgCAACQRCgCAACQRCgCAACQxIXWqIDyuxicp2UDAArDkSIAAAARigAAACQRigAAACQRigoUGxurpk2bKjIy0tWlAACAUkAoKkB0dLSSkpK0efNmV5cCAABKAaEIAABAhCIAAABJhCIAAABJhCIAAABJhCIAAABJhCIAAABJhCIAAABJhCIAAABJhCIAAABJUmVXFwC4wqivRhXaZ17feaVQCQCgrOBIEQAAgAhFAAAAkghFAAAAkghFAAAAkghFAAAAkghFAAAAkghFAAAAktwgFO3Zs0etWrWy/fj6+mrp0qWuLgsAAJQxFf7hjY0bN1ZiYqIkKT09XREREerWrZtriwIAAGVOhQ9Fl/ryyy91xx13qEqVKq4uBfkoylOmy+K63cXlc8gTvwFUNC4/fbZ+/Xr17dtXYWFhslgs+Z7aio2NVUREhHx8fNS+fXtt2rTJoW198sknGjhw4FVWDAAAKiKXh6KMjAy1bNlSsbGx+S5fvHixYmJiNHnyZG3dulUtW7ZUjx49dPz4cVufVq1aqXnz5nl+jh49auuTlpamjRs3qnfv3iU+JgAAUP64/PRZr1691KtXrwKXz5o1SyNGjNCwYcMkSXPnztXy5cs1f/58Pffcc5Jku2boSpYtW6bu3bvLx8fniv0uXLigCxcu2F6npaVJkqxWq6xWa6HbKarcdXkYD6ets7zJHXtx5qAon0Fl45zd2pmf95XWX9LbcZbL5/Vq6y5v43c2xu/e45eYg5IevyPrtRhjTAnU4hCLxaIvvvhC/fv3lyRlZmbKz89Pn376qa1NkqKiopSamqply5YVed19+/bVyJEj1bdv3yv2mzJliqZOnZqnfeHChfLz8yvy9gAAgOucO3dOgwcP1pkzZxQQEFCk97j8SNGVnDx5UtnZ2QoNDbVrDw0N1e7du4u8njNnzmjTpk367LPPCu07ceJExcTE2F6npaUpPDxc3bt3L/KkFoXVatWqVasUr3hlW7Kdtt7yxMN46A7dUaw5mNNrTqF9xq0Yd3WFFWNbVyN3H+jWrZs8PT1LdFvOcPm8Xu38lLfxOxvjd+/xS8xBSY8/90xPcZTpUOQs1apVU0pKSpH6ent7y9vbO0+7p6dniXxo2ZZsZVmynL7ecsMUbw6K8hk4az5L6y+pktq3nO3yeXVWzeVl/CWF8bv3+CXmoKTG78g6XX6h9ZUEBwfLw8MjT6BJSUlRrVq1SnTbsbGxatq0qSIjI0t0OwAAoGwo06HIy8tLbdq0UXx8vK0tJydH8fHxuvnmm0t029HR0UpKStLmzZtLdDsAAKBscPnps/T0dO3fv9/2Ojk5WYmJiQoKClKdOnUUExOjqKgotW3bVu3atdOcOXOUkZFhuxsNAADAGVweirZs2aIuXbrYXude5BwVFaW4uDgNHDhQJ06c0KRJk/Tnn3+qVatWWrlyZZ6LrwEAAK6Gy0NR586dVdhTAUaPHq3Ro0eXUkUAAMAdlelrilyJC60BAHAvhKICcKE1AADuxeWnz4DiKM1var98WyW9PZRdpbnfAXAdjhQBAACIUFQgrikCAMC9EIoKwDVFAAC4F0IRAACACEUAAACSCEUAAACSCEUAAACSCEUF4u4zAADcC6GoANx9BgCAeyEUAQAAiFAEAAAgiVAEAAAgiVAEAAAgiVBUIO4+AwDAvRCKCsDdZwAAuBdCEQAAgAhFAAAAkghFAAAAkghFAAAAkghFAAAAkghFAAAAkghFBeI5RQAAuBdCUQF4ThEAAO6FUAQAACCpsqsLAK7GqK9Glbntzes7r0jrqWwqq7u6a9yKcYq9K7ZEt1XYe5w1j8XZVu74Xa0oNTtjvc5cN1CelbW/Ay7FkSIAAAARigAAACQRigAAACQRigAAACQRigAAACQRigAAACQRigrEE60BAHAvhKIC8ERrAADcC6EIAABAhCIAAABJhCIAAABJhCIAAABJhCIAAABJhCIAAABJhCIAAABJUmVXF1DWGWMkSWlpaU5dr9Vq1blz55RpyVSWJcup6y4vckyOzpmKNwdF2Vcyz2XajT+/92Sey3Tatgp7T1G2VZRtF2dbueNPS0uTp6dnsbfvLI7MjyPzfvn7cv8OcPX4XcXdxy+57xyU1t8BuX/ecv8dLwqLKU5vN3TkyBGFh4e7ugwAAOCA33//Xddee22R+hKKCpGTk6OjR4+qatWqslgsTltvWlqawsPD9fvvvysgIMBp6y1P3H0OGD/jZ/zuO36JOSjp8RtjdPbsWYWFhalSpaJdLcTps0JUqlSpyAnTEQEBAW75h+FS7j4HjJ/xM373Hb/EHJTk+KtVq1as/lxoDQAAIEIRAACAJEKRy3h7e2vy5Mny9vZ2dSku4+5zwPgZP+N33/FLzEFZHD8XWgMAAIgjRQAAAJIIRQAAAJIIRQAAAJIIRQAAAJIIRS4TGxuriIgI+fj4qH379tq0aZOrSyrU9OnTFRkZqapVq6pmzZrq37+/9uzZY9enc+fOslgsdj+PPfaYXZ/Dhw+rT58+8vPzU82aNfX0008rK8v+u8/WrVun1q1by9vbW9ddd53i4uLy1FPaczhlypQ8Y2vSpIlt+fnz5xUdHa0aNWrI399f9957r1JSUuzWUV7HLkkRERF5xm+xWBQdHS2p4n3269evV9++fRUWFiaLxaKlS5faLTfGaNKkSapdu7Z8fX3VtWtX7du3z67PqVOnNGTIEAUEBCgwMFDDhw9Xenq6XZ8dO3aoQ4cO8vHxUXh4uF599dU8tSxZskRNmjSRj4+PWrRooa+//rrYtTh7DqxWq5599lm1aNFCVapUUVhYmB5++GEdPXrUbh357TczZswoF3NQ2D4wdOjQPGPr2bOnXZ/yvA8UNv78/j6wWCyaOXOmrU+5+/wNSt2iRYuMl5eXmT9/vtm1a5cZMWKECQwMNCkpKa4u7Yp69OhhFixYYHbu3GkSExNN7969TZ06dUx6erqtT6dOncyIESPMsWPHbD9nzpyxLc/KyjLNmzc3Xbt2Ndu2bTNff/21CQ4ONhMnTrT1OXjwoPHz8zMxMTEmKSnJvPnmm8bDw8OsXLnS1scVczh58mTTrFkzu7GdOHHCtvyxxx4z4eHhJj4+3mzZssXcdNNN5pZbbqkQYzfGmOPHj9uNfdWqVUaSWbt2rTGm4n32X3/9tXn++efN559/biSZL774wm75jBkzTLVq1czSpUvN9u3bzV133WXq1atn/v77b1ufnj17mpYtW5off/zRJCQkmOuuu84MGjTItvzMmTMmNDTUDBkyxOzcudN8/PHHxtfX18ybN8/WZ8OGDcbDw8O8+uqrJikpyfzf//2f8fT0NL/88kuxanH2HKSmppquXbuaxYsXm927d5sffvjBtGvXzrRp08ZuHXXr1jUvvPCC3X5x6d8ZZXkOCtsHoqKiTM+ePe3GdurUKbs+5XkfKGz8l4772LFjZv78+cZisZgDBw7Y+pS3z59Q5ALt2rUz0dHRttfZ2dkmLCzMTJ8+3YVVFd/x48eNJPPdd9/Z2jp16mTGjh1b4Hu+/vprU6lSJfPnn3/a2t555x0TEBBgLly4YIwx5plnnjHNmjWze9/AgQNNjx49bK9dMYeTJ082LVu2zHdZamqq8fT0NEuWLLG1/frrr0aS+eGHH4wx5Xvs+Rk7dqxp0KCBycnJMcZU7M/+8n8QcnJyTK1atczMmTNtbampqcbb29t8/PHHxhhjkpKSjCSzefNmW58VK1YYi8Vi/vjjD2OMMW+//bapXr26bfzGGPPss8+axo0b217ff//9pk+fPnb1tG/f3owaNarItThDfv8oXm7Tpk1Gkjl06JCtrW7dumb27NkFvqe8zEFBoahfv34Fvqci7QNF+fz79etnbr/9dru28vb5c/qslGVmZurnn39W165dbW2VKlVS165d9cMPP7iwsuI7c+aMJCkoKMiu/aOPPlJwcLCaN2+uiRMn6ty5c7ZlP/zwg1q0aKHQ0FBbW48ePZSWlqZdu3bZ+lw6P7l9cufHlXO4b98+hYWFqX79+hoyZIgOHz4sSfr5559ltVrtamrSpInq1Kljq6m8j/1SmZmZ+vDDD/XII4/YfVFyRf7sL5WcnKw///zTro5q1aqpffv2dp93YGCg2rZta+vTtWtXVapUST/99JOtT8eOHeXl5WXr06NHD+3Zs0enT5+29bnSnBSlltJy5swZWSwWBQYG2rXPmDFDNWrU0I033qiZM2fanTIt73Owbt061axZU40bN9bjjz+uv/76y7bMnfaBlJQULV++XMOHD8+zrDx9/nwhbCk7efKksrOz7f5hkKTQ0FDt3r3bRVUVX05OjsaNG6dbb71VzZs3t7UPHjxYdevWVVhYmHbs2KFnn31We/bs0eeffy5J+vPPP/Mde+6yK/VJS0vT33//rdOnT7tkDtu3b6+4uDg1btxYx44d09SpU9WhQwft3LlTf/75p7y8vPL8YxAaGlrouHKXXamPq8d+uaVLlyo1NVVDhw61tVXkz/5yufXmV8elY6lZs6bd8sqVKysoKMiuT7169fKsI3dZ9erVC5yTS9dRWC2l4fz583r22Wc1aNAguy/3HDNmjFq3bq2goCBt3LhREydO1LFjxzRr1ixb/eV1Dnr27Kl77rlH9erV04EDB/SPf/xDvXr10g8//CAPDw+32gfef/99Va1aVffcc49de3n7/AlFcEh0dLR27typ77//3q595MiRtt+3aNFCtWvX1h133KEDBw6oQYMGpV2mU/Xq1cv2+xtuuEHt27dX3bp19cknn8jX19eFlZW+9957T7169VJYWJitrSJ/9rgyq9Wq+++/X8YYvfPOO3bLYmJibL+/4YYb5OXlpVGjRmn69Oll6usdHPHAAw/Yft+iRQvdcMMNatCggdatW6c77rjDhZWVvvnz52vIkCHy8fGxay9vnz+nz0pZcHCwPDw88tyVlJKSolq1armoquIZPXq0/ve//2nt2rW69tprr9i3ffv2kqT9+/dLkmrVqpXv2HOXXalPQECAfH19y8wcBgYGqlGjRtq/f79q1aqlzMxMpaamFlhTRRn7oUOHtHr1aj366KNX7FeRP/vcbV2pjlq1aun48eN2y7OysnTq1Cmn7BOXLi+slpKUG4gOHTqkVatW2R0lyk/79u2VlZWl3377TVLFmINc9evXV3BwsN0+7w77QEJCgvbs2VPo3wlS2f/8CUWlzMvLS23atFF8fLytLScnR/Hx8br55ptdWFnhjDEaPXq0vvjiC61ZsybPIc/8JCYmSpJq164tSbr55pv1yy+/2P1FkfsXadOmTW19Lp2f3D6581NW5jA9PV0HDhxQ7dq11aZNG3l6etrVtGfPHh0+fNhWU0UZ+4IFC1SzZk316dPniv0q8mdfr1491apVy66OtLQ0/fTTT3afd2pqqn7++WdbnzVr1ignJ8cWGG+++WatX79eVqvV1mfVqlVq3LixqlevbutzpTkpSi0lJTcQ7du3T6tXr1aNGjUKfU9iYqIqVapkO61U3ufgUkeOHNFff/1lt89X9H1AunjkuE2bNmrZsmWhfcv851+sy7LhFIsWLTLe3t4mLi7OJCUlmZEjR5rAwEC7u3LKoscff9xUq1bNrFu3zu72ynPnzhljjNm/f7954YUXzJYtW0xycrJZtmyZqV+/vunYsaNtHbm3ZXfv3t0kJiaalStXmpCQkHxvy3766afNr7/+amJjY/O9Lbu053D8+PFm3bp1Jjk52WzYsMF07drVBAcHm+PHjxtjLt6SX6dOHbNmzRqzZcsWc/PNN5ubb765Qow9V3Z2tqlTp4559tln7dor4md/9uxZs23bNrNt2zYjycyaNcts27bNdmfVjBkzTGBgoFm2bJnZsWOH6devX7635N94443mp59+Mt9//71p2LCh3e3YqampJjQ01Dz00ENm586dZtGiRcbPzy/P7ciVK1c2r732mvn111/N5MmT870dubBanD0HmZmZ5q677jLXXnutSUxMtPs7IfdOoo0bN5rZs2ebxMREc+DAAfPhhx+akJAQ8/DDD5eLObjS+M+ePWsmTJhgfvjhB5OcnGxWr15tWrdubRo2bGjOnz9vW0d53gcK+zNgzMVb6v38/Mw777yT5/3l8fMnFLnIm2++aerUqWO8vLxMu3btzI8//ujqkgolKd+fBQsWGGOMOXz4sOnYsaMJCgoy3t7e5rrrrjNPP/203bNqjDHmt99+M7169TK+vr4mODjYjB8/3litVrs+a9euNa1atTJeXl6mfv36tm1cqrTncODAgaZ27drGy8vLXHPNNWbgwIFm//79tuV///23eeKJJ0z16tWNn5+fufvuu82xY8fs1lFex57rm2++MZLMnj177Nor4me/du3afPf3qKgoY8zF24D/+c9/mtDQUOPt7W3uuOOOPPPy119/mUGDBhl/f38TEBBghg0bZs6ePWvXZ/v27ea2224z3t7e5pprrjEzZszIU8snn3xiGjVqZLy8vEyzZs3M8uXL7ZYXpRZnz0FycnKBfyfkPrvq559/Nu3btzfVqlUzPj4+5vrrrzfTpk2zCw1leQ6uNP5z586Z7t27m5CQEOPp6Wnq1q1rRowYkSecl+d9oLA/A8YYM2/ePOPr62tSU1PzvL88fv4WY4wp3rElAACAiodrigAAAEQoAgAAkEQoAgAAkEQoAgAAkEQoAgAAkEQoAgAAkEQoAgAAkEQoAuCmOnfurHHjxrm6DABlCKEIQLnTt29f9ezZM99lCQkJslgs2rFjRylXBaC8IxQBKHeGDx+uVatW6ciRI3mWLViwQG3bttUNN9zggsoAlGeEIgDlzp133qmQkBDFxcXZtaenp2vJkiXq37+/Bg0apGuuuUZ+fn5q0aKFPv744yuu02KxaOnSpXZtgYGBdtv4/fffdf/99yswMFBBQUHq16+ffvvtN+cMCoDLEYoAlDuVK1fWww8/rLi4OF369Y1LlixRdna2HnzwQbVp00bLly/Xzp07NXLkSD300EPatGmTw9u0Wq3q0aOHqlatqoSEBG3YsEH+/v7q2bOnMjMznTEsAC5GKAJQLj3yyCM6cOCAvvvuO1vbggULdO+996pu3bqaMGGCWrVqpfr16+vJJ59Uz5499cknnzi8vcWLFysnJ0f/+c9/1KJFC11//fVasGCBDh8+rHXr1jlhRABcjVAEoFxq0qSJbrnlFs2fP1+StH//fiUkJGj48OHKzs7Wiy++qBYtWigoKEj+/v765ptvdPjwYYe3t337du3fv19Vq1aVv7+//P39FRQUpPPnz+vAgQPOGhYAF6rs6gIAwFHDhw/Xk08+qdjYWC1YsEANGjRQp06d9Morr+iNN97QnDlz1KJFC1WpUkXjxo274mkui8VidypOunjKLFd6erratGmjjz76KM97Q0JCnDcoAC5DKAJQbt1///0aO3asFi5cqA8++ECPP/64LBaLNmzYoH79+unBBx+UJOXk5Gjv3r1q2rRpgesKCQnRsWPHbK/37dunc+fO2V63bt1aixcvVs2aNRUQEFBygwLgMpw+A1Bu+fv7a+DAgZo4caKOHTumoUOHSpIaNmyoVatWaePGjfr11181atQopaSkXHFdt99+u9566y1t27ZNW7Zs0WOPPSZPT0/b8iFDhig4OFj9+vVTQkKCkpOTtW7dOo0ZMybfRwMAKH8IRQDKteHDh+v06dPq0aOHwsLCJEn/93//p9atW6tHjx7q3LmzatWqpf79+19xPa+//rrCw8PVoUMHDR48WBMmTJCfn59tuZ+fn9avX686deronnvu0fXXX6/hw4fr/PnzHDkCKgiLufwkOgAAgBviSBEAAIAIRQAAAJIIRQAAAJIIRQAAAJIIRQAAAJIIRQAAAJIIRQAAAJIIRQAAAJIIRQAAAJIIRQAAAJIIRQAAAJIIRQAAAJKk/wdWaqOTP+YkmgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "\n", + "plt.hist(merged_df['ViewCount'], bins=100, density=True, alpha=0.6, color='g')\n", + "plt.xlabel('Value')\n", + "plt.yscale('log') # Set y-axis to logarithmic scale\n", + "plt.ylabel('Frequency')\n", + "plt.title('Histogram of ViewCount data')\n", + "plt.grid(True)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "385.0" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df['ViewCount'].median()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 9. You will need to make something with missing values. Will you clean or filling them? Explain. \n", + "**Remember** to check the results of your code before passing to the next step" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "median_viewcount = merged_df['ViewCount'].median()\n", + "merged_df.loc[merged_df['ViewCount'].isnull(), 'ViewCount'] = median_viewcount" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
userIdReputationViewsUpVotesDownVotespostIdScoreViewCountCommentCount
0-1105007192021750385.00
1-1105007192085760385.00
2-1105007192085780385.00
3-1105007192089810385.00
4-1105007192089820385.00
..............................
389574593411100340031115.02
389584619236100406675326.02
389594652223513271174613166.00
389605237122120027237243357.05
3896155226119230161761385.00
\n", + "

38962 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " userId Reputation Views UpVotes DownVotes postId Score \\\n", + "0 -1 1 0 5007 1920 2175 0 \n", + "1 -1 1 0 5007 1920 8576 0 \n", + "2 -1 1 0 5007 1920 8578 0 \n", + "3 -1 1 0 5007 1920 8981 0 \n", + "4 -1 1 0 5007 1920 8982 0 \n", + "... ... ... ... ... ... ... ... \n", + "38957 45934 11 1 0 0 34003 1 \n", + "38958 46192 36 1 0 0 40667 5 \n", + "38959 46522 235 13 27 1 17461 3 \n", + "38960 52371 221 2 0 0 27237 24 \n", + "38961 55226 119 2 3 0 16176 1 \n", + "\n", + " ViewCount CommentCount \n", + "0 385.0 0 \n", + "1 385.0 0 \n", + "2 385.0 0 \n", + "3 385.0 0 \n", + "4 385.0 0 \n", + "... ... ... \n", + "38957 115.0 2 \n", + "38958 326.0 2 \n", + "38959 166.0 0 \n", + "38960 3357.0 5 \n", + "38961 385.0 0 \n", + "\n", + "[38962 rows x 9 columns]" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "userId 38962\n", + "Reputation 38962\n", + "Views 38962\n", + "UpVotes 38962\n", + "DownVotes 38962\n", + "postId 38962\n", + "Score 38962\n", + "ViewCount 38962\n", + "CommentCount 38962\n", + "dtype: int64" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df.count()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 10. Adjust the data types in order to avoid future issues. Which ones should be changed? " + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "userId int64\n", + "Reputation int64\n", + "Views int64\n", + "UpVotes int64\n", + "DownVotes int64\n", + "postId int64\n", + "Score int64\n", + "ViewCount float64\n", + "CommentCount int64\n", + "dtype: object" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "merged_df = merged_df.astype('int64')" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "userId int64\n", + "Reputation int64\n", + "Views int64\n", + "UpVotes int64\n", + "DownVotes int64\n", + "postId int64\n", + "Score int64\n", + "ViewCount int64\n", + "CommentCount int64\n", + "dtype: object" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merged_df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}