From 13ec4258962fba028cf4249f2d87c8f32ececfef Mon Sep 17 00:00:00 2001 From: Alberto Rodriguez Date: Sat, 7 Mar 2020 13:58:19 +0100 Subject: [PATCH 1/5] Pull request --- module-3/lab-supervised-learning-feature-extraction/pull | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 module-3/lab-supervised-learning-feature-extraction/pull diff --git a/module-3/lab-supervised-learning-feature-extraction/pull b/module-3/lab-supervised-learning-feature-extraction/pull new file mode 100644 index 00000000..e69de29b From 198802bd41fcd2a6100a52f2151aa20621214c77 Mon Sep 17 00:00:00 2001 From: Alberto Rodriguez Date: Sat, 7 Mar 2020 14:20:24 +0100 Subject: [PATCH 2/5] Pandas finished --- .../pull | 0 .../your-code/Pandas-concat-merge-join.ipynb | 318 +++++++++++++++++- 2 files changed, 302 insertions(+), 16 deletions(-) delete mode 100644 module-3/lab-supervised-learning-feature-extraction/pull diff --git a/module-3/lab-supervised-learning-feature-extraction/pull b/module-3/lab-supervised-learning-feature-extraction/pull deleted file mode 100644 index e69de29b..00000000 diff --git a/module-3/lab-supervised-learning-feature-extraction/your-code/Pandas-concat-merge-join.ipynb b/module-3/lab-supervised-learning-feature-extraction/your-code/Pandas-concat-merge-join.ipynb index 61cf9b9f..73eb6eda 100644 --- a/module-3/lab-supervised-learning-feature-extraction/your-code/Pandas-concat-merge-join.ipynb +++ b/module-3/lab-supervised-learning-feature-extraction/your-code/Pandas-concat-merge-join.ipynb @@ -48,7 +48,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "scrolled": true }, @@ -112,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -192,7 +192,7 @@ "5 a5 b5 c5" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -203,7 +203,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -283,7 +283,7 @@ "5 d5 e5 f5" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -305,7 +305,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -466,7 +466,7 @@ "5 NaN NaN NaN d5 e5 f5" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -491,20 +491,185 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDEF
0a0b0c0NaNNaNNaN
1a1b1c1NaNNaNNaN
2a2b2c2NaNNaNNaN
3a3b3c3NaNNaNNaN
4a4b4c4NaNNaNNaN
5a5b5c5NaNNaNNaN
6NaNNaNNaNd0e0f0
7NaNNaNNaNd1e1f1
8NaNNaNNaNd2e2f2
9NaNNaNNaNd3e3f3
10NaNNaNNaNd4e4f4
11NaNNaNNaNd5e5f5
\n", + "
" + ], + "text/plain": [ + " A B C D E F\n", + "0 a0 b0 c0 NaN NaN NaN\n", + "1 a1 b1 c1 NaN NaN NaN\n", + "2 a2 b2 c2 NaN NaN NaN\n", + "3 a3 b3 c3 NaN NaN NaN\n", + "4 a4 b4 c4 NaN NaN NaN\n", + "5 a5 b5 c5 NaN NaN NaN\n", + "6 NaN NaN NaN d0 e0 f0\n", + "7 NaN NaN NaN d1 e1 f1\n", + "8 NaN NaN NaN d2 e2 f2\n", + "9 NaN NaN NaN d3 e3 f3\n", + "10 NaN NaN NaN d4 e4 f4\n", + "11 NaN NaN NaN d5 e5 f5" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here" + "# Your code here\n", + "pd.concat([df1, df2, df3, df4], sort=False, ignore_index=True)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "# Your comment here" + "# Your comment here\n", + "#Row indexes are now unique and consecutive" ] }, { @@ -869,12 +1034,133 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "# Your code here\n" + "# Your code here\n", + "df1_1 = pd.concat([df1, df2])\n", + "df2_2 = pd.concat([df3, df4])" ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDEF
0a0b0c0d0e0f0
1a1b1c1d1e1f1
2a2b2c2d2e2f2
3a3b3c3d3e3f3
4a4b4c4d4e4f4
5a5b5c5d5e5f5
\n", + "
" + ], + "text/plain": [ + " A B C D E F\n", + "0 a0 b0 c0 d0 e0 f0\n", + "1 a1 b1 c1 d1 e1 f1\n", + "2 a2 b2 c2 d2 e2 f2\n", + "3 a3 b3 c3 d3 e3 f3\n", + "4 a4 b4 c4 d4 e4 f4\n", + "5 a5 b5 c5 d5 e5 f5" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat([df1_1, df2_2], axis=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -893,9 +1179,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.1" + "version": "3.7.5" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } From f73cfd6bc2bedaa2c71a0acddd244575a61c8ae3 Mon Sep 17 00:00:00 2001 From: Alberto Rodriguez Date: Sat, 21 Mar 2020 13:05:33 +0100 Subject: [PATCH 3/5] npi --- .../your-code/main.ipynb | 208 ++++++++++++++++-- 1 file changed, 186 insertions(+), 22 deletions(-) diff --git a/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb b/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb index 01f76271..2f31c91e 100644 --- a/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb +++ b/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -79,11 +79,36 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "App object\n", + "Category object\n", + "Rating float64\n", + "Reviews object\n", + "Size object\n", + "Installs object\n", + "Type object\n", + "Price object\n", + "Content Rating object\n", + "Genres object\n", + "Last Updated object\n", + "Current Ver object\n", + "Android Ver object\n", + "dtype: object" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_play.dtypes" ] }, { @@ -95,11 +120,104 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AppCategoryRatingReviewsSizeInstallsTypePriceContent RatingGenresLast UpdatedCurrent VerAndroid Ver
0Photo Editor & Candy Camera & Grid & ScrapBookART_AND_DESIGN4.115919M10,000+Free0EveryoneArt & DesignJanuary 7, 20181.0.04.0.3 and up
1Coloring book moanaART_AND_DESIGN3.996714M500,000+Free0EveryoneArt & Design;Pretend PlayJanuary 15, 20182.0.04.0.3 and up
\n", + "
" + ], + "text/plain": [ + " App Category Rating \\\n", + "0 Photo Editor & Candy Camera & Grid & ScrapBook ART_AND_DESIGN 4.1 \n", + "1 Coloring book moana ART_AND_DESIGN 3.9 \n", + "\n", + " Reviews Size Installs Type Price Content Rating \\\n", + "0 159 19M 10,000+ Free 0 Everyone \n", + "1 967 14M 500,000+ Free 0 Everyone \n", + "\n", + " Genres Last Updated Current Ver Android Ver \n", + "0 Art & Design January 7, 2018 1.0.0 4.0.3 and up \n", + "1 Art & Design;Pretend Play January 15, 2018 2.0.0 4.0.3 and up " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_play.head(2)" ] }, { @@ -115,11 +233,34 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 NaN\n", + "1 NaN\n", + "2 NaN\n", + "3 NaN\n", + "4 NaN\n", + " ..\n", + "10836 NaN\n", + "10837 NaN\n", + "10838 NaN\n", + "10839 NaN\n", + "10840 NaN\n", + "Name: Reviews, Length: 10841, dtype: float64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "pd.to_numeric(google_play['Reviews'],errors='coerce')" ] }, { @@ -131,11 +272,34 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 3.0M\n", + "1 3.0M\n", + "2 3.0M\n", + "3 3.0M\n", + "4 3.0M\n", + " ... \n", + "10836 3.0M\n", + "10837 3.0M\n", + "10838 3.0M\n", + "10839 3.0M\n", + "10840 3.0M\n", + "Name: Reviews, Length: 10841, dtype: object" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_play['Reviews']" ] }, { @@ -756,9 +920,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python [conda env:data_env]", "language": "python", - "name": "python3" + "name": "conda-env-data_env-py" }, "language_info": { "codemirror_mode": { @@ -770,9 +934,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.7.5" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } From ad65130264093023feeaa1e5374089962dd263d6 Mon Sep 17 00:00:00 2001 From: Alberto Rodriguez Date: Sat, 21 Mar 2020 14:51:17 +0100 Subject: [PATCH 4/5] With Challenge 2 --- .../your-code/main.ipynb | 786 +++++++++++++++--- 1 file changed, 663 insertions(+), 123 deletions(-) diff --git a/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb b/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb index 2f31c91e..65910523 100644 --- a/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb +++ b/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 122, "metadata": {}, "outputs": [], "source": [ @@ -61,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 123, "metadata": {}, "outputs": [], "source": [ @@ -79,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 124, "metadata": {}, "outputs": [ { @@ -101,7 +101,7 @@ "dtype: object" ] }, - "execution_count": 5, + "execution_count": 124, "metadata": {}, "output_type": "execute_result" } @@ -120,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 125, "metadata": {}, "outputs": [ { @@ -210,7 +210,7 @@ "1 Art & Design;Pretend Play January 15, 2018 2.0.0 4.0.3 and up " ] }, - "execution_count": 7, + "execution_count": 125, "metadata": {}, "output_type": "execute_result" } @@ -233,34 +233,12 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 126, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 NaN\n", - "1 NaN\n", - "2 NaN\n", - "3 NaN\n", - "4 NaN\n", - " ..\n", - "10836 NaN\n", - "10837 NaN\n", - "10838 NaN\n", - "10839 NaN\n", - "10840 NaN\n", - "Name: Reviews, Length: 10841, dtype: float64" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Your code here:\n", - "pd.to_numeric(google_play['Reviews'],errors='coerce')" + "google_play['Reviews_numeric'] = pd.to_numeric(google_play['Reviews'],errors='coerce')" ] }, { @@ -272,34 +250,12 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 127, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 3.0M\n", - "1 3.0M\n", - "2 3.0M\n", - "3 3.0M\n", - "4 3.0M\n", - " ... \n", - "10836 3.0M\n", - "10837 3.0M\n", - "10838 3.0M\n", - "10839 3.0M\n", - "10840 3.0M\n", - "Name: Reviews, Length: 10841, dtype: object" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Your code here:\n", - "google_play['Reviews']" + "google_play['Reviews_isnull'] = google_play['Reviews_numeric'].isnull()" ] }, { @@ -315,11 +271,89 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 128, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AppCategoryRatingReviewsSizeInstallsTypePriceContent RatingGenresLast UpdatedCurrent VerAndroid VerReviews_numericReviews_isnull
10472Life Made WI-Fi Touchscreen Photo Frame1.919.03.0M1,000+Free0EveryoneNaNFebruary 11, 20181.0.194.0 and upNaNNaNTrue
\n", + "
" + ], + "text/plain": [ + " App Category Rating Reviews \\\n", + "10472 Life Made WI-Fi Touchscreen Photo Frame 1.9 19.0 3.0M \n", + "\n", + " Size Installs Type Price Content Rating Genres \\\n", + "10472 1,000+ Free 0 Everyone NaN February 11, 2018 \n", + "\n", + " Last Updated Current Ver Android Ver Reviews_numeric Reviews_isnull \n", + "10472 1.0.19 4.0 and up NaN NaN True " + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_play[google_play['Reviews_isnull'] == True]" ] }, { @@ -339,28 +373,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 129, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 129, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here\n", "\n", "def convert_string_to_numeric(s):\n", - " \"\"\"\n", - " Convert a string value to numeric. If the last character of the string is `M`, obtain the \n", - " numeric part of the string, multiply it with 1,000,000, then return the result. Otherwise, \n", - " convert the string to numeric value and return the result.\n", - " \n", - " Args:\n", - " s: The Reviews score in string format.\n", + " if 'M' in s:\n", + " s = s.replace ('M', '00000')\n", + " s = s.replace('.','')\n", + " else:\n", + " pass\n", + " return float(s)\n", "\n", - " Returns:\n", - " The correct numeric value of the Reviews score.\n", - " \"\"\"\n", - " return np.NaN\n", "\n", - "test_string = '4.0M'\n", "\n", + "test_string = '4.0M'\n", "convert_string_to_numeric(test_string) == 4000000" ] }, @@ -373,11 +413,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 130, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_play['Reviews'] = google_play['Reviews'].apply(convert_string_to_numeric)\n" ] }, { @@ -391,11 +432,89 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 131, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AppCategoryRatingReviewsSizeInstallsTypePriceContent RatingGenresLast UpdatedCurrent VerAndroid VerReviews_numericReviews_isnull
10472Life Made WI-Fi Touchscreen Photo Frame1.919.03000000.01,000+Free0EveryoneNaNFebruary 11, 20181.0.194.0 and upNaNNaNTrue
\n", + "
" + ], + "text/plain": [ + " App Category Rating Reviews \\\n", + "10472 Life Made WI-Fi Touchscreen Photo Frame 1.9 19.0 3000000.0 \n", + "\n", + " Size Installs Type Price Content Rating Genres \\\n", + "10472 1,000+ Free 0 Everyone NaN February 11, 2018 \n", + "\n", + " Last Updated Current Ver Android Ver Reviews_numeric Reviews_isnull \n", + "10472 1.0.19 4.0 and up NaN NaN True " + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "# Your code here\n", + "google_play.loc[[10472]]" ] }, { @@ -407,11 +526,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 132, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "App object\n", + "Category object\n", + "Rating float64\n", + "Reviews float64\n", + "Size object\n", + "Installs object\n", + "Type object\n", + "Price object\n", + "Content Rating object\n", + "Genres object\n", + "Last Updated object\n", + "Current Ver object\n", + "Android Ver object\n", + "Reviews_numeric float64\n", + "Reviews_isnull bool\n", + "dtype: object" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_play['Reviews'] = google_play['Reviews'].astype(float)\n", + "google_play.dtypes" ] }, { @@ -425,11 +572,79 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 133, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array(['19M', '14M', '8.7M', '25M', '2.8M', '5.6M', '29M', '33M', '3.1M',\n", + " '28M', '12M', '20M', '21M', '37M', '2.7M', '5.5M', '17M', '39M',\n", + " '31M', '4.2M', '7.0M', '23M', '6.0M', '6.1M', '4.6M', '9.2M',\n", + " '5.2M', '11M', '24M', 'Varies with device', '9.4M', '15M', '10M',\n", + " '1.2M', '26M', '8.0M', '7.9M', '56M', '57M', '35M', '54M', '201k',\n", + " '3.6M', '5.7M', '8.6M', '2.4M', '27M', '2.5M', '16M', '3.4M',\n", + " '8.9M', '3.9M', '2.9M', '38M', '32M', '5.4M', '18M', '1.1M',\n", + " '2.2M', '4.5M', '9.8M', '52M', '9.0M', '6.7M', '30M', '2.6M',\n", + " '7.1M', '3.7M', '22M', '7.4M', '6.4M', '3.2M', '8.2M', '9.9M',\n", + " '4.9M', '9.5M', '5.0M', '5.9M', '13M', '73M', '6.8M', '3.5M',\n", + " '4.0M', '2.3M', '7.2M', '2.1M', '42M', '7.3M', '9.1M', '55M',\n", + " '23k', '6.5M', '1.5M', '7.5M', '51M', '41M', '48M', '8.5M', '46M',\n", + " '8.3M', '4.3M', '4.7M', '3.3M', '40M', '7.8M', '8.8M', '6.6M',\n", + " '5.1M', '61M', '66M', '79k', '8.4M', '118k', '44M', '695k', '1.6M',\n", + " '6.2M', '18k', '53M', '1.4M', '3.0M', '5.8M', '3.8M', '9.6M',\n", + " '45M', '63M', '49M', '77M', '4.4M', '4.8M', '70M', '6.9M', '9.3M',\n", + " '10.0M', '8.1M', '36M', '84M', '97M', '2.0M', '1.9M', '1.8M',\n", + " '5.3M', '47M', '556k', '526k', '76M', '7.6M', '59M', '9.7M', '78M',\n", + " '72M', '43M', '7.7M', '6.3M', '334k', '34M', '93M', '65M', '79M',\n", + " '100M', '58M', '50M', '68M', '64M', '67M', '60M', '94M', '232k',\n", + " '99M', '624k', '95M', '8.5k', '41k', '292k', '11k', '80M', '1.7M',\n", + " '74M', '62M', '69M', '75M', '98M', '85M', '82M', '96M', '87M',\n", + " '71M', '86M', '91M', '81M', '92M', '83M', '88M', '704k', '862k',\n", + " '899k', '378k', '266k', '375k', '1.3M', '975k', '980k', '4.1M',\n", + " '89M', '696k', '544k', '525k', '920k', '779k', '853k', '720k',\n", + " '713k', '772k', '318k', '58k', '241k', '196k', '857k', '51k',\n", + " '953k', '865k', '251k', '930k', '540k', '313k', '746k', '203k',\n", + " '26k', '314k', '239k', '371k', '220k', '730k', '756k', '91k',\n", + " '293k', '17k', '74k', '14k', '317k', '78k', '924k', '902k', '818k',\n", + " '81k', '939k', '169k', '45k', '475k', '965k', '90M', '545k', '61k',\n", + " '283k', '655k', '714k', '93k', '872k', '121k', '322k', '1.0M',\n", + " '976k', '172k', '238k', '549k', '206k', '954k', '444k', '717k',\n", + " '210k', '609k', '308k', '705k', '306k', '904k', '473k', '175k',\n", + " '350k', '383k', '454k', '421k', '70k', '812k', '442k', '842k',\n", + " '417k', '412k', '459k', '478k', '335k', '782k', '721k', '430k',\n", + " '429k', '192k', '200k', '460k', '728k', '496k', '816k', '414k',\n", + " '506k', '887k', '613k', '243k', '569k', '778k', '683k', '592k',\n", + " '319k', '186k', '840k', '647k', '191k', '373k', '437k', '598k',\n", + " '716k', '585k', '982k', '222k', '219k', '55k', '948k', '323k',\n", + " '691k', '511k', '951k', '963k', '25k', '554k', '351k', '27k',\n", + " '82k', '208k', '913k', '514k', '551k', '29k', '103k', '898k',\n", + " '743k', '116k', '153k', '209k', '353k', '499k', '173k', '597k',\n", + " '809k', '122k', '411k', '400k', '801k', '787k', '237k', '50k',\n", + " '643k', '986k', '97k', '516k', '837k', '780k', '961k', '269k',\n", + " '20k', '498k', '600k', '749k', '642k', '881k', '72k', '656k',\n", + " '601k', '221k', '228k', '108k', '940k', '176k', '33k', '663k',\n", + " '34k', '942k', '259k', '164k', '458k', '245k', '629k', '28k',\n", + " '288k', '775k', '785k', '636k', '916k', '994k', '309k', '485k',\n", + " '914k', '903k', '608k', '500k', '54k', '562k', '847k', '957k',\n", + " '688k', '811k', '270k', '48k', '329k', '523k', '921k', '874k',\n", + " '981k', '784k', '280k', '24k', '518k', '754k', '892k', '154k',\n", + " '860k', '364k', '387k', '626k', '161k', '879k', '39k', '970k',\n", + " '170k', '141k', '160k', '144k', '143k', '190k', '376k', '193k',\n", + " '246k', '73k', '658k', '992k', '253k', '420k', '404k', '1,000+',\n", + " '470k', '226k', '240k', '89k', '234k', '257k', '861k', '467k',\n", + " '157k', '44k', '676k', '67k', '552k', '885k', '1020k', '582k',\n", + " '619k'], dtype=object)" + ] + }, + "execution_count": 133, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_play['Size'].unique()" ] }, { @@ -445,11 +660,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 134, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Varies with device 15.635089\n", + "11M 1.826400\n", + "12M 1.807951\n", + "14M 1.789503\n", + "13M 1.761830\n", + " ... \n", + "186k 0.009224\n", + "91k 0.009224\n", + "554k 0.009224\n", + "549k 0.009224\n", + "154k 0.009224\n", + "Name: Size, Length: 462, dtype: float64" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_play['Size'].value_counts()/len(google_play) *100\n" ] }, { @@ -465,11 +703,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 135, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_play.drop(columns = ['Size'], inplace = True)" ] }, { @@ -485,11 +724,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 136, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "App 0\n", + "Category 0\n", + "Rating 1474\n", + "Reviews 0\n", + "Installs 0\n", + "Type 1\n", + "Price 0\n", + "Content Rating 1\n", + "Genres 0\n", + "Last Updated 0\n", + "Current Ver 8\n", + "Android Ver 3\n", + "Reviews_numeric 1\n", + "Reviews_isnull 0\n", + "dtype: int64" + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_play.isna().sum()" ] }, { @@ -505,11 +770,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 138, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "App 0.000000\n", + "Category 0.000000\n", + "Rating 13.596532\n", + "Reviews 0.000000\n", + "Installs 0.000000\n", + "Type 0.009224\n", + "Price 0.000000\n", + "Content Rating 0.009224\n", + "Genres 0.000000\n", + "Last Updated 0.000000\n", + "Current Ver 0.073794\n", + "Android Ver 0.027673\n", + "Reviews_numeric 0.009224\n", + "Reviews_isnull 0.000000\n", + "dtype: float64" + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_play.isna().sum()/len(google_play)*100" ] }, { @@ -531,11 +822,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 139, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_missing_removed = google_play.dropna()" ] }, { @@ -551,11 +843,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 140, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 2018-01-07\n", + "1 2018-01-15\n", + "2 2018-08-01\n", + "3 2018-06-08\n", + "4 2018-06-20\n", + " ... \n", + "10834 2017-06-18\n", + "10836 2017-07-25\n", + "10837 2018-07-06\n", + "10839 2015-01-19\n", + "10840 2018-07-25\n", + "Name: Last Updated, Length: 9360, dtype: datetime64[ns]" + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here\n", + "pd.to_datetime(google_missing_removed['Last Updated'])" ] }, { @@ -569,11 +884,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 141, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array(['0', '$4.99', '$3.99', '$6.99', '$7.99', '$5.99', '$2.99', '$3.49',\n", + " '$1.99', '$9.99', '$7.49', '$0.99', '$9.00', '$5.49', '$10.00',\n", + " '$24.99', '$11.99', '$79.99', '$16.99', '$14.99', '$29.99',\n", + " '$12.99', '$2.49', '$10.99', '$1.50', '$19.99', '$15.99', '$33.99',\n", + " '$39.99', '$3.95', '$4.49', '$1.70', '$8.99', '$1.49', '$3.88',\n", + " '$399.99', '$17.99', '$400.00', '$3.02', '$1.76', '$4.84', '$4.77',\n", + " '$1.61', '$2.50', '$1.59', '$6.49', '$1.29', '$299.99', '$379.99',\n", + " '$37.99', '$18.99', '$389.99', '$8.49', '$1.75', '$14.00', '$2.00',\n", + " '$3.08', '$2.59', '$19.40', '$3.90', '$4.59', '$15.46', '$3.04',\n", + " '$13.99', '$4.29', '$3.28', '$4.60', '$1.00', '$2.95', '$2.90',\n", + " '$1.97', '$2.56', '$1.20'], dtype=object)" + ] + }, + "execution_count": 141, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_missing_removed['Price'].unique()" ] }, { @@ -589,11 +926,54 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 144, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/alberto/miniconda3/envs/data_env/lib/python3.7/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " \n" + ] + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_missing_removed['Price'] = google_missing_removed.Price.apply(lambda x : x.replace('$',''))" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['0', '4.99', '3.99', '6.99', '7.99', '5.99', '2.99', '3.49',\n", + " '1.99', '9.99', '7.49', '0.99', '9.00', '5.49', '10.00', '24.99',\n", + " '11.99', '79.99', '16.99', '14.99', '29.99', '12.99', '2.49',\n", + " '10.99', '1.50', '19.99', '15.99', '33.99', '39.99', '3.95',\n", + " '4.49', '1.70', '8.99', '1.49', '3.88', '399.99', '17.99',\n", + " '400.00', '3.02', '1.76', '4.84', '4.77', '1.61', '2.50', '1.59',\n", + " '6.49', '1.29', '299.99', '379.99', '37.99', '18.99', '389.99',\n", + " '8.49', '1.75', '14.00', '2.00', '3.08', '2.59', '19.40', '3.90',\n", + " '4.59', '15.46', '3.04', '13.99', '4.29', '3.28', '4.60', '1.00',\n", + " '2.95', '2.90', '1.97', '2.56', '1.20'], dtype=object)" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "google_missing_removed['Price'].unique()" ] }, { @@ -605,11 +985,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 146, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/alberto/miniconda3/envs/data_env/lib/python3.7/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " \n" + ] + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_missing_removed['Price Numerical'] = pd.to_numeric(google_missing_removed['Price'],errors='coerce')" ] }, { @@ -621,11 +1015,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 147, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_missing_removed.drop(columns = ['Price'], inplace = True)" ] }, { @@ -641,11 +1036,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 148, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "App object\n", + "Category object\n", + "Rating float64\n", + "Reviews float64\n", + "Installs object\n", + "Type object\n", + "Content Rating object\n", + "Genres object\n", + "Last Updated object\n", + "Current Ver object\n", + "Android Ver object\n", + "Reviews_numeric float64\n", + "Reviews_isnull bool\n", + "Price Numerical float64\n", + "dtype: object" + ] + }, + "execution_count": 148, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here" + "# Your code here\n", + "google_missing_removed.dtypes" ] }, { @@ -664,7 +1085,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 149, "metadata": {}, "outputs": [], "source": [ @@ -684,11 +1105,106 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 150, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AppTranslated_ReviewSentimentSentiment_PolaritySentiment_Subjectivity
010 Best Foods for YouI like eat delicious food. That's I'm cooking ...Positive1.000.533333
110 Best Foods for YouThis help eating healthy exercise regular basisPositive0.250.288462
210 Best Foods for YouNaNNaNNaNNaN
310 Best Foods for YouWorks great especially going grocery storePositive0.400.875000
410 Best Foods for YouBest idea usPositive1.000.300000
\n", + "
" + ], + "text/plain": [ + " App Translated_Review \\\n", + "0 10 Best Foods for You I like eat delicious food. That's I'm cooking ... \n", + "1 10 Best Foods for You This help eating healthy exercise regular basis \n", + "2 10 Best Foods for You NaN \n", + "3 10 Best Foods for You Works great especially going grocery store \n", + "4 10 Best Foods for You Best idea us \n", + "\n", + " Sentiment Sentiment_Polarity Sentiment_Subjectivity \n", + "0 Positive 1.00 0.533333 \n", + "1 Positive 0.25 0.288462 \n", + "2 NaN NaN NaN \n", + "3 Positive 0.40 0.875000 \n", + "4 Positive 1.00 0.300000 " + ] + }, + "execution_count": 150, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "# Your code here\n", + "google_review.head()" ] }, { @@ -715,11 +1231,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 151, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "review_missing_removed = google_review.dropna()" ] }, { @@ -731,11 +1248,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 154, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Bowmasters 312\n", + "Angry Birds Classic 273\n", + "Helix Jump 273\n", + "Calorie Counter - MyFitnessPal 254\n", + "Candy Crush Saga 240\n", + " ... \n", + "Draw In 1\n", + "Calculator - unit converter 1\n", + "All-in-One Mahjong 3 FREE 1\n", + "Bed Time Fan - White Noise Sleep Sounds 1\n", + "HomeWork 1\n", + "Name: App, Length: 865, dtype: int64" + ] + }, + "execution_count": 154, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "review_missing_removed['App'].value_counts()" ] }, { From e81a90da70aa31b5ae985b1f1e1d375c49a5a80f Mon Sep 17 00:00:00 2001 From: Alberto Rodriguez Date: Sat, 21 Mar 2020 20:03:55 +0100 Subject: [PATCH 5/5] Lab Finished --- .../your-code/main.ipynb | 478 +++++++++++++----- 1 file changed, 356 insertions(+), 122 deletions(-) diff --git a/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb b/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb index 65910523..028d8a65 100644 --- a/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb +++ b/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb @@ -12,12 +12,11 @@ }, { "cell_type": "code", - "execution_count": 122, + "execution_count": 184, "metadata": {}, "outputs": [], "source": [ "#Import your libraries\n", - "\n", "import numpy as np\n", "import pandas as pd" ] @@ -61,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 123, + "execution_count": 185, "metadata": {}, "outputs": [], "source": [ @@ -79,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 124, + "execution_count": 186, "metadata": {}, "outputs": [ { @@ -101,7 +100,7 @@ "dtype: object" ] }, - "execution_count": 124, + "execution_count": 186, "metadata": {}, "output_type": "execute_result" } @@ -120,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": 125, + "execution_count": 187, "metadata": {}, "outputs": [ { @@ -210,7 +209,7 @@ "1 Art & Design;Pretend Play January 15, 2018 2.0.0 4.0.3 and up " ] }, - "execution_count": 125, + "execution_count": 187, "metadata": {}, "output_type": "execute_result" } @@ -233,7 +232,7 @@ }, { "cell_type": "code", - "execution_count": 126, + "execution_count": 188, "metadata": {}, "outputs": [], "source": [ @@ -250,7 +249,7 @@ }, { "cell_type": "code", - "execution_count": 127, + "execution_count": 189, "metadata": {}, "outputs": [], "source": [ @@ -271,7 +270,7 @@ }, { "cell_type": "code", - "execution_count": 128, + "execution_count": 190, "metadata": {}, "outputs": [ { @@ -346,7 +345,7 @@ "10472 1.0.19 4.0 and up NaN NaN True " ] }, - "execution_count": 128, + "execution_count": 190, "metadata": {}, "output_type": "execute_result" } @@ -373,7 +372,7 @@ }, { "cell_type": "code", - "execution_count": 129, + "execution_count": 191, "metadata": {}, "outputs": [ { @@ -382,7 +381,7 @@ "True" ] }, - "execution_count": 129, + "execution_count": 191, "metadata": {}, "output_type": "execute_result" } @@ -413,7 +412,7 @@ }, { "cell_type": "code", - "execution_count": 130, + "execution_count": 192, "metadata": {}, "outputs": [], "source": [ @@ -432,7 +431,7 @@ }, { "cell_type": "code", - "execution_count": 131, + "execution_count": 193, "metadata": {}, "outputs": [ { @@ -507,7 +506,7 @@ "10472 1.0.19 4.0 and up NaN NaN True " ] }, - "execution_count": 131, + "execution_count": 193, "metadata": {}, "output_type": "execute_result" } @@ -526,7 +525,7 @@ }, { "cell_type": "code", - "execution_count": 132, + "execution_count": 194, "metadata": {}, "outputs": [ { @@ -550,7 +549,7 @@ "dtype: object" ] }, - "execution_count": 132, + "execution_count": 194, "metadata": {}, "output_type": "execute_result" } @@ -572,7 +571,7 @@ }, { "cell_type": "code", - "execution_count": 133, + "execution_count": 195, "metadata": {}, "outputs": [ { @@ -637,7 +636,7 @@ " '619k'], dtype=object)" ] }, - "execution_count": 133, + "execution_count": 195, "metadata": {}, "output_type": "execute_result" } @@ -660,7 +659,7 @@ }, { "cell_type": "code", - "execution_count": 134, + "execution_count": 196, "metadata": {}, "outputs": [ { @@ -672,15 +671,15 @@ "14M 1.789503\n", "13M 1.761830\n", " ... \n", - "186k 0.009224\n", - "91k 0.009224\n", - "554k 0.009224\n", - "549k 0.009224\n", - "154k 0.009224\n", + "412k 0.009224\n", + "421k 0.009224\n", + "720k 0.009224\n", + "314k 0.009224\n", + "97k 0.009224\n", "Name: Size, Length: 462, dtype: float64" ] }, - "execution_count": 134, + "execution_count": 196, "metadata": {}, "output_type": "execute_result" } @@ -703,7 +702,7 @@ }, { "cell_type": "code", - "execution_count": 135, + "execution_count": 197, "metadata": {}, "outputs": [], "source": [ @@ -724,7 +723,7 @@ }, { "cell_type": "code", - "execution_count": 136, + "execution_count": 198, "metadata": {}, "outputs": [ { @@ -747,7 +746,7 @@ "dtype: int64" ] }, - "execution_count": 136, + "execution_count": 198, "metadata": {}, "output_type": "execute_result" } @@ -770,7 +769,7 @@ }, { "cell_type": "code", - "execution_count": 138, + "execution_count": 199, "metadata": {}, "outputs": [ { @@ -793,7 +792,7 @@ "dtype: float64" ] }, - "execution_count": 138, + "execution_count": 199, "metadata": {}, "output_type": "execute_result" } @@ -822,7 +821,7 @@ }, { "cell_type": "code", - "execution_count": 139, + "execution_count": 200, "metadata": {}, "outputs": [], "source": [ @@ -843,34 +842,25 @@ }, { "cell_type": "code", - "execution_count": 140, + "execution_count": 207, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "0 2018-01-07\n", - "1 2018-01-15\n", - "2 2018-08-01\n", - "3 2018-06-08\n", - "4 2018-06-20\n", - " ... \n", - "10834 2017-06-18\n", - "10836 2017-07-25\n", - "10837 2018-07-06\n", - "10839 2015-01-19\n", - "10840 2018-07-25\n", - "Name: Last Updated, Length: 9360, dtype: datetime64[ns]" - ] - }, - "execution_count": 140, - "metadata": {}, - "output_type": "execute_result" + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/alberto/miniconda3/envs/data_env/lib/python3.7/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " \n" + ] } ], "source": [ "# Your code here\n", - "pd.to_datetime(google_missing_removed['Last Updated'])" + "google_missing_removed['Last Updated'] = pd.to_datetime(google_missing_removed['Last Updated'])" ] }, { @@ -884,7 +874,7 @@ }, { "cell_type": "code", - "execution_count": 141, + "execution_count": 209, "metadata": {}, "outputs": [ { @@ -903,7 +893,7 @@ " '$1.97', '$2.56', '$1.20'], dtype=object)" ] }, - "execution_count": 141, + "execution_count": 209, "metadata": {}, "output_type": "execute_result" } @@ -926,7 +916,7 @@ }, { "cell_type": "code", - "execution_count": 144, + "execution_count": 210, "metadata": {}, "outputs": [ { @@ -949,7 +939,7 @@ }, { "cell_type": "code", - "execution_count": 145, + "execution_count": 211, "metadata": {}, "outputs": [ { @@ -967,7 +957,7 @@ " '2.95', '2.90', '1.97', '2.56', '1.20'], dtype=object)" ] }, - "execution_count": 145, + "execution_count": 211, "metadata": {}, "output_type": "execute_result" } @@ -985,7 +975,7 @@ }, { "cell_type": "code", - "execution_count": 146, + "execution_count": 212, "metadata": {}, "outputs": [ { @@ -1015,7 +1005,7 @@ }, { "cell_type": "code", - "execution_count": 147, + "execution_count": 213, "metadata": {}, "outputs": [], "source": [ @@ -1036,30 +1026,30 @@ }, { "cell_type": "code", - "execution_count": 148, + "execution_count": 214, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "App object\n", - "Category object\n", - "Rating float64\n", - "Reviews float64\n", - "Installs object\n", - "Type object\n", - "Content Rating object\n", - "Genres object\n", - "Last Updated object\n", - "Current Ver object\n", - "Android Ver object\n", - "Reviews_numeric float64\n", - "Reviews_isnull bool\n", - "Price Numerical float64\n", + "App object\n", + "Category object\n", + "Rating float64\n", + "Reviews float64\n", + "Installs object\n", + "Type object\n", + "Content Rating object\n", + "Genres object\n", + "Last Updated datetime64[ns]\n", + "Current Ver object\n", + "Android Ver object\n", + "Reviews_numeric float64\n", + "Reviews_isnull bool\n", + "Price Numerical float64\n", "dtype: object" ] }, - "execution_count": 148, + "execution_count": 214, "metadata": {}, "output_type": "execute_result" } @@ -1085,7 +1075,7 @@ }, { "cell_type": "code", - "execution_count": 149, + "execution_count": 215, "metadata": {}, "outputs": [], "source": [ @@ -1105,7 +1095,7 @@ }, { "cell_type": "code", - "execution_count": 150, + "execution_count": 216, "metadata": {}, "outputs": [ { @@ -1197,7 +1187,7 @@ "4 Positive 1.00 0.300000 " ] }, - "execution_count": 150, + "execution_count": 216, "metadata": {}, "output_type": "execute_result" } @@ -1231,7 +1221,7 @@ }, { "cell_type": "code", - "execution_count": 151, + "execution_count": 217, "metadata": {}, "outputs": [], "source": [ @@ -1248,27 +1238,27 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 218, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Bowmasters 312\n", - "Angry Birds Classic 273\n", - "Helix Jump 273\n", - "Calorie Counter - MyFitnessPal 254\n", - "Candy Crush Saga 240\n", - " ... \n", - "Draw In 1\n", - "Calculator - unit converter 1\n", - "All-in-One Mahjong 3 FREE 1\n", - "Bed Time Fan - White Noise Sleep Sounds 1\n", - "HomeWork 1\n", + "Bowmasters 312\n", + "Angry Birds Classic 273\n", + "Helix Jump 273\n", + "Calorie Counter - MyFitnessPal 254\n", + "Duolingo: Learn Languages Free 240\n", + " ... \n", + "CallApp: Caller ID, Blocker & Phone Call Recorder 1\n", + "Apartment Decorating Ideas 1\n", + "Draw A Stickman 1\n", + "Daily Workouts - Exercise Fitness Routine Trainer 1\n", + "CBS News 1\n", "Name: App, Length: 865, dtype: int64" ] }, - "execution_count": 154, + "execution_count": 218, "metadata": {}, "output_type": "execute_result" } @@ -1304,23 +1294,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 219, "metadata": {}, "outputs": [], "source": [ "# Your code below\n", "\n", "def positive_function(x):\n", - " \"\"\"\n", - " Count how many times the string `Positive` appears in a column (exact string match).\n", - " \n", - " Args:\n", - " x: data column\n", - " \n", - " Returns:\n", - " The number of occurrences of `Positive` in the column data.\n", - " \"\"\"\n", - " return 0" + " return len(x[(np.where(x == 'Positive', 1,0) == 1)])" + ] + }, + { + "cell_type": "code", + "execution_count": 220, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "23998" + ] + }, + "execution_count": 220, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "positive_function(review_missing_removed['Sentiment'])" ] }, { @@ -1344,11 +1345,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 221, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_agg = review_missing_removed.groupby('App').agg({'Sentiment':[positive_function],'App':'count'})\n", + "google_agg.columns = google_agg.columns.droplevel()\n", + "google_agg = google_agg.rename(columns={'positive_function':'Positive','count':'Total'})" ] }, { @@ -1360,11 +1364,87 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 222, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PositiveTotal
App
10 Best Foods for You162194
104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室3140
11st2339
1800 Contacts - Lens Store6480
1LINE – One Line with One Touch2738
\n", + "
" + ], + "text/plain": [ + " Positive Total\n", + "App \n", + "10 Best Foods for You 162 194\n", + "104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 31 40\n", + "11st 23 39\n", + "1800 Contacts - Lens Store 64 80\n", + "1LINE – One Line with One Touch 27 38" + ] + }, + "execution_count": 222, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "# Your code here\n", + "google_agg.head()" ] }, { @@ -1378,11 +1458,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 223, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_agg['Positive Ratio'] = (google_agg['Positive']/google_agg['Total'])" ] }, { @@ -1394,11 +1475,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 224, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_agg.drop(['Positive','Total'], axis = 1, inplace=True)" ] }, { @@ -1412,11 +1494,80 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 225, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Positive Ratio
App
10 Best Foods for You0.835052
104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室0.775000
11st0.589744
1800 Contacts - Lens Store0.800000
1LINE – One Line with One Touch0.710526
\n", + "
" + ], + "text/plain": [ + " Positive Ratio\n", + "App \n", + "10 Best Foods for You 0.835052\n", + "104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 0.775000\n", + "11st 0.589744\n", + "1800 Contacts - Lens Store 0.800000\n", + "1LINE – One Line with One Touch 0.710526" + ] + }, + "execution_count": 225, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "google_agg.head(5)" ] }, { @@ -1432,11 +1583,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 234, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "final = google_missing_removed.join(google_agg, on='App').dropna()\n", + "final = final.drop(columns =['Reviews_numeric','Reviews_isnull'])" ] }, { @@ -1450,12 +1603,93 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 236, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AppCategoryRatingReviewsInstallsTypeContent RatingGenresLast UpdatedCurrent VerAndroid VerPrice NumericalPositive Ratio
1Coloring book moanaART_AND_DESIGN3.9967.0500,000+FreeEveryoneArt & Design;Pretend Play2018-01-152.0.04.0.3 and up0.00.590909
\n", + "
" + ], + "text/plain": [ + " App Category Rating Reviews Installs Type \\\n", + "1 Coloring book moana ART_AND_DESIGN 3.9 967.0 500,000+ Free \n", + "\n", + " Content Rating Genres Last Updated Current Ver \\\n", + "1 Everyone Art & Design;Pretend Play 2018-01-15 2.0.0 \n", + "\n", + " Android Ver Price Numerical Positive Ratio \n", + "1 4.0.3 and up 0.0 0.590909 " + ] + }, + "execution_count": 236, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "final.head(1)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {