diff --git a/module-3/lab-supervised-learning-feature-extraction/your-code/Pandas-concat-merge-join.ipynb b/module-3/lab-supervised-learning-feature-extraction/your-code/Pandas-concat-merge-join.ipynb
index 61cf9b9f..73eb6eda 100644
--- a/module-3/lab-supervised-learning-feature-extraction/your-code/Pandas-concat-merge-join.ipynb
+++ b/module-3/lab-supervised-learning-feature-extraction/your-code/Pandas-concat-merge-join.ipynb
@@ -48,7 +48,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 3,
"metadata": {
"scrolled": true
},
@@ -112,7 +112,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -192,7 +192,7 @@
"5 a5 b5 c5"
]
},
- "execution_count": 3,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -203,7 +203,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -283,7 +283,7 @@
"5 d5 e5 f5"
]
},
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -305,7 +305,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -466,7 +466,7 @@
"5 NaN NaN NaN d5 e5 f5"
]
},
- "execution_count": 5,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -491,20 +491,185 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " A | \n",
+ " B | \n",
+ " C | \n",
+ " D | \n",
+ " E | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " a0 | \n",
+ " b0 | \n",
+ " c0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " a1 | \n",
+ " b1 | \n",
+ " c1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " a2 | \n",
+ " b2 | \n",
+ " c2 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " a3 | \n",
+ " b3 | \n",
+ " c3 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " a4 | \n",
+ " b4 | \n",
+ " c4 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " a5 | \n",
+ " b5 | \n",
+ " c5 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " d0 | \n",
+ " e0 | \n",
+ " f0 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " d1 | \n",
+ " e1 | \n",
+ " f1 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " d2 | \n",
+ " e2 | \n",
+ " f2 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " d3 | \n",
+ " e3 | \n",
+ " f3 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " d4 | \n",
+ " e4 | \n",
+ " f4 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " d5 | \n",
+ " e5 | \n",
+ " f5 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " A B C D E F\n",
+ "0 a0 b0 c0 NaN NaN NaN\n",
+ "1 a1 b1 c1 NaN NaN NaN\n",
+ "2 a2 b2 c2 NaN NaN NaN\n",
+ "3 a3 b3 c3 NaN NaN NaN\n",
+ "4 a4 b4 c4 NaN NaN NaN\n",
+ "5 a5 b5 c5 NaN NaN NaN\n",
+ "6 NaN NaN NaN d0 e0 f0\n",
+ "7 NaN NaN NaN d1 e1 f1\n",
+ "8 NaN NaN NaN d2 e2 f2\n",
+ "9 NaN NaN NaN d3 e3 f3\n",
+ "10 NaN NaN NaN d4 e4 f4\n",
+ "11 NaN NaN NaN d5 e5 f5"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here"
+ "# Your code here\n",
+ "pd.concat([df1, df2, df3, df4], sort=False, ignore_index=True)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
- "# Your comment here"
+ "# Your comment here\n",
+ "#Row indexes are now unique and consecutive"
]
},
{
@@ -869,12 +1034,133 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here\n"
+ "# Your code here\n",
+ "df1_1 = pd.concat([df1, df2])\n",
+ "df2_2 = pd.concat([df3, df4])"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " A | \n",
+ " B | \n",
+ " C | \n",
+ " D | \n",
+ " E | \n",
+ " F | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " a0 | \n",
+ " b0 | \n",
+ " c0 | \n",
+ " d0 | \n",
+ " e0 | \n",
+ " f0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " a1 | \n",
+ " b1 | \n",
+ " c1 | \n",
+ " d1 | \n",
+ " e1 | \n",
+ " f1 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " a2 | \n",
+ " b2 | \n",
+ " c2 | \n",
+ " d2 | \n",
+ " e2 | \n",
+ " f2 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " a3 | \n",
+ " b3 | \n",
+ " c3 | \n",
+ " d3 | \n",
+ " e3 | \n",
+ " f3 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " a4 | \n",
+ " b4 | \n",
+ " c4 | \n",
+ " d4 | \n",
+ " e4 | \n",
+ " f4 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " a5 | \n",
+ " b5 | \n",
+ " c5 | \n",
+ " d5 | \n",
+ " e5 | \n",
+ " f5 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " A B C D E F\n",
+ "0 a0 b0 c0 d0 e0 f0\n",
+ "1 a1 b1 c1 d1 e1 f1\n",
+ "2 a2 b2 c2 d2 e2 f2\n",
+ "3 a3 b3 c3 d3 e3 f3\n",
+ "4 a4 b4 c4 d4 e4 f4\n",
+ "5 a5 b5 c5 d5 e5 f5"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.concat([df1_1, df2_2], axis=1)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
@@ -893,9 +1179,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.1"
+ "version": "3.7.5"
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}
diff --git a/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb b/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb
index 01f76271..028d8a65 100644
--- a/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb
+++ b/module-3/lab-supervised-learning-feature-extraction/your-code/main.ipynb
@@ -12,12 +12,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 184,
"metadata": {},
"outputs": [],
"source": [
"#Import your libraries\n",
- "\n",
"import numpy as np\n",
"import pandas as pd"
]
@@ -61,7 +60,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 185,
"metadata": {},
"outputs": [],
"source": [
@@ -79,11 +78,36 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 186,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "App object\n",
+ "Category object\n",
+ "Rating float64\n",
+ "Reviews object\n",
+ "Size object\n",
+ "Installs object\n",
+ "Type object\n",
+ "Price object\n",
+ "Content Rating object\n",
+ "Genres object\n",
+ "Last Updated object\n",
+ "Current Ver object\n",
+ "Android Ver object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 186,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play.dtypes"
]
},
{
@@ -95,11 +119,104 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 187,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Category | \n",
+ " Rating | \n",
+ " Reviews | \n",
+ " Size | \n",
+ " Installs | \n",
+ " Type | \n",
+ " Price | \n",
+ " Content Rating | \n",
+ " Genres | \n",
+ " Last Updated | \n",
+ " Current Ver | \n",
+ " Android Ver | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Photo Editor & Candy Camera & Grid & ScrapBook | \n",
+ " ART_AND_DESIGN | \n",
+ " 4.1 | \n",
+ " 159 | \n",
+ " 19M | \n",
+ " 10,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Everyone | \n",
+ " Art & Design | \n",
+ " January 7, 2018 | \n",
+ " 1.0.0 | \n",
+ " 4.0.3 and up | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Coloring book moana | \n",
+ " ART_AND_DESIGN | \n",
+ " 3.9 | \n",
+ " 967 | \n",
+ " 14M | \n",
+ " 500,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Everyone | \n",
+ " Art & Design;Pretend Play | \n",
+ " January 15, 2018 | \n",
+ " 2.0.0 | \n",
+ " 4.0.3 and up | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Category Rating \\\n",
+ "0 Photo Editor & Candy Camera & Grid & ScrapBook ART_AND_DESIGN 4.1 \n",
+ "1 Coloring book moana ART_AND_DESIGN 3.9 \n",
+ "\n",
+ " Reviews Size Installs Type Price Content Rating \\\n",
+ "0 159 19M 10,000+ Free 0 Everyone \n",
+ "1 967 14M 500,000+ Free 0 Everyone \n",
+ "\n",
+ " Genres Last Updated Current Ver Android Ver \n",
+ "0 Art & Design January 7, 2018 1.0.0 4.0.3 and up \n",
+ "1 Art & Design;Pretend Play January 15, 2018 2.0.0 4.0.3 and up "
+ ]
+ },
+ "execution_count": 187,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play.head(2)"
]
},
{
@@ -115,11 +232,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 188,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play['Reviews_numeric'] = pd.to_numeric(google_play['Reviews'],errors='coerce')"
]
},
{
@@ -131,11 +249,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 189,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play['Reviews_isnull'] = google_play['Reviews_numeric'].isnull()"
]
},
{
@@ -151,11 +270,89 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 190,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Category | \n",
+ " Rating | \n",
+ " Reviews | \n",
+ " Size | \n",
+ " Installs | \n",
+ " Type | \n",
+ " Price | \n",
+ " Content Rating | \n",
+ " Genres | \n",
+ " Last Updated | \n",
+ " Current Ver | \n",
+ " Android Ver | \n",
+ " Reviews_numeric | \n",
+ " Reviews_isnull | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 10472 | \n",
+ " Life Made WI-Fi Touchscreen Photo Frame | \n",
+ " 1.9 | \n",
+ " 19.0 | \n",
+ " 3.0M | \n",
+ " 1,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Everyone | \n",
+ " NaN | \n",
+ " February 11, 2018 | \n",
+ " 1.0.19 | \n",
+ " 4.0 and up | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Category Rating Reviews \\\n",
+ "10472 Life Made WI-Fi Touchscreen Photo Frame 1.9 19.0 3.0M \n",
+ "\n",
+ " Size Installs Type Price Content Rating Genres \\\n",
+ "10472 1,000+ Free 0 Everyone NaN February 11, 2018 \n",
+ "\n",
+ " Last Updated Current Ver Android Ver Reviews_numeric Reviews_isnull \n",
+ "10472 1.0.19 4.0 and up NaN NaN True "
+ ]
+ },
+ "execution_count": 190,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play[google_play['Reviews_isnull'] == True]"
]
},
{
@@ -175,28 +372,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 191,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 191,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here\n",
"\n",
"def convert_string_to_numeric(s):\n",
- " \"\"\"\n",
- " Convert a string value to numeric. If the last character of the string is `M`, obtain the \n",
- " numeric part of the string, multiply it with 1,000,000, then return the result. Otherwise, \n",
- " convert the string to numeric value and return the result.\n",
- " \n",
- " Args:\n",
- " s: The Reviews score in string format.\n",
- "\n",
- " Returns:\n",
- " The correct numeric value of the Reviews score.\n",
- " \"\"\"\n",
- " return np.NaN\n",
+ " if 'M' in s:\n",
+ " s = s.replace ('M', '00000')\n",
+ " s = s.replace('.','')\n",
+ " else:\n",
+ " pass\n",
+ " return float(s)\n",
+ "\n",
"\n",
- "test_string = '4.0M'\n",
"\n",
+ "test_string = '4.0M'\n",
"convert_string_to_numeric(test_string) == 4000000"
]
},
@@ -209,11 +412,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 192,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play['Reviews'] = google_play['Reviews'].apply(convert_string_to_numeric)\n"
]
},
{
@@ -227,11 +431,89 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 193,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Category | \n",
+ " Rating | \n",
+ " Reviews | \n",
+ " Size | \n",
+ " Installs | \n",
+ " Type | \n",
+ " Price | \n",
+ " Content Rating | \n",
+ " Genres | \n",
+ " Last Updated | \n",
+ " Current Ver | \n",
+ " Android Ver | \n",
+ " Reviews_numeric | \n",
+ " Reviews_isnull | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 10472 | \n",
+ " Life Made WI-Fi Touchscreen Photo Frame | \n",
+ " 1.9 | \n",
+ " 19.0 | \n",
+ " 3000000.0 | \n",
+ " 1,000+ | \n",
+ " Free | \n",
+ " 0 | \n",
+ " Everyone | \n",
+ " NaN | \n",
+ " February 11, 2018 | \n",
+ " 1.0.19 | \n",
+ " 4.0 and up | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Category Rating Reviews \\\n",
+ "10472 Life Made WI-Fi Touchscreen Photo Frame 1.9 19.0 3000000.0 \n",
+ "\n",
+ " Size Installs Type Price Content Rating Genres \\\n",
+ "10472 1,000+ Free 0 Everyone NaN February 11, 2018 \n",
+ "\n",
+ " Last Updated Current Ver Android Ver Reviews_numeric Reviews_isnull \n",
+ "10472 1.0.19 4.0 and up NaN NaN True "
+ ]
+ },
+ "execution_count": 193,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here\n"
+ "# Your code here\n",
+ "google_play.loc[[10472]]"
]
},
{
@@ -243,11 +525,39 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 194,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "App object\n",
+ "Category object\n",
+ "Rating float64\n",
+ "Reviews float64\n",
+ "Size object\n",
+ "Installs object\n",
+ "Type object\n",
+ "Price object\n",
+ "Content Rating object\n",
+ "Genres object\n",
+ "Last Updated object\n",
+ "Current Ver object\n",
+ "Android Ver object\n",
+ "Reviews_numeric float64\n",
+ "Reviews_isnull bool\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 194,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play['Reviews'] = google_play['Reviews'].astype(float)\n",
+ "google_play.dtypes"
]
},
{
@@ -261,11 +571,79 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 195,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['19M', '14M', '8.7M', '25M', '2.8M', '5.6M', '29M', '33M', '3.1M',\n",
+ " '28M', '12M', '20M', '21M', '37M', '2.7M', '5.5M', '17M', '39M',\n",
+ " '31M', '4.2M', '7.0M', '23M', '6.0M', '6.1M', '4.6M', '9.2M',\n",
+ " '5.2M', '11M', '24M', 'Varies with device', '9.4M', '15M', '10M',\n",
+ " '1.2M', '26M', '8.0M', '7.9M', '56M', '57M', '35M', '54M', '201k',\n",
+ " '3.6M', '5.7M', '8.6M', '2.4M', '27M', '2.5M', '16M', '3.4M',\n",
+ " '8.9M', '3.9M', '2.9M', '38M', '32M', '5.4M', '18M', '1.1M',\n",
+ " '2.2M', '4.5M', '9.8M', '52M', '9.0M', '6.7M', '30M', '2.6M',\n",
+ " '7.1M', '3.7M', '22M', '7.4M', '6.4M', '3.2M', '8.2M', '9.9M',\n",
+ " '4.9M', '9.5M', '5.0M', '5.9M', '13M', '73M', '6.8M', '3.5M',\n",
+ " '4.0M', '2.3M', '7.2M', '2.1M', '42M', '7.3M', '9.1M', '55M',\n",
+ " '23k', '6.5M', '1.5M', '7.5M', '51M', '41M', '48M', '8.5M', '46M',\n",
+ " '8.3M', '4.3M', '4.7M', '3.3M', '40M', '7.8M', '8.8M', '6.6M',\n",
+ " '5.1M', '61M', '66M', '79k', '8.4M', '118k', '44M', '695k', '1.6M',\n",
+ " '6.2M', '18k', '53M', '1.4M', '3.0M', '5.8M', '3.8M', '9.6M',\n",
+ " '45M', '63M', '49M', '77M', '4.4M', '4.8M', '70M', '6.9M', '9.3M',\n",
+ " '10.0M', '8.1M', '36M', '84M', '97M', '2.0M', '1.9M', '1.8M',\n",
+ " '5.3M', '47M', '556k', '526k', '76M', '7.6M', '59M', '9.7M', '78M',\n",
+ " '72M', '43M', '7.7M', '6.3M', '334k', '34M', '93M', '65M', '79M',\n",
+ " '100M', '58M', '50M', '68M', '64M', '67M', '60M', '94M', '232k',\n",
+ " '99M', '624k', '95M', '8.5k', '41k', '292k', '11k', '80M', '1.7M',\n",
+ " '74M', '62M', '69M', '75M', '98M', '85M', '82M', '96M', '87M',\n",
+ " '71M', '86M', '91M', '81M', '92M', '83M', '88M', '704k', '862k',\n",
+ " '899k', '378k', '266k', '375k', '1.3M', '975k', '980k', '4.1M',\n",
+ " '89M', '696k', '544k', '525k', '920k', '779k', '853k', '720k',\n",
+ " '713k', '772k', '318k', '58k', '241k', '196k', '857k', '51k',\n",
+ " '953k', '865k', '251k', '930k', '540k', '313k', '746k', '203k',\n",
+ " '26k', '314k', '239k', '371k', '220k', '730k', '756k', '91k',\n",
+ " '293k', '17k', '74k', '14k', '317k', '78k', '924k', '902k', '818k',\n",
+ " '81k', '939k', '169k', '45k', '475k', '965k', '90M', '545k', '61k',\n",
+ " '283k', '655k', '714k', '93k', '872k', '121k', '322k', '1.0M',\n",
+ " '976k', '172k', '238k', '549k', '206k', '954k', '444k', '717k',\n",
+ " '210k', '609k', '308k', '705k', '306k', '904k', '473k', '175k',\n",
+ " '350k', '383k', '454k', '421k', '70k', '812k', '442k', '842k',\n",
+ " '417k', '412k', '459k', '478k', '335k', '782k', '721k', '430k',\n",
+ " '429k', '192k', '200k', '460k', '728k', '496k', '816k', '414k',\n",
+ " '506k', '887k', '613k', '243k', '569k', '778k', '683k', '592k',\n",
+ " '319k', '186k', '840k', '647k', '191k', '373k', '437k', '598k',\n",
+ " '716k', '585k', '982k', '222k', '219k', '55k', '948k', '323k',\n",
+ " '691k', '511k', '951k', '963k', '25k', '554k', '351k', '27k',\n",
+ " '82k', '208k', '913k', '514k', '551k', '29k', '103k', '898k',\n",
+ " '743k', '116k', '153k', '209k', '353k', '499k', '173k', '597k',\n",
+ " '809k', '122k', '411k', '400k', '801k', '787k', '237k', '50k',\n",
+ " '643k', '986k', '97k', '516k', '837k', '780k', '961k', '269k',\n",
+ " '20k', '498k', '600k', '749k', '642k', '881k', '72k', '656k',\n",
+ " '601k', '221k', '228k', '108k', '940k', '176k', '33k', '663k',\n",
+ " '34k', '942k', '259k', '164k', '458k', '245k', '629k', '28k',\n",
+ " '288k', '775k', '785k', '636k', '916k', '994k', '309k', '485k',\n",
+ " '914k', '903k', '608k', '500k', '54k', '562k', '847k', '957k',\n",
+ " '688k', '811k', '270k', '48k', '329k', '523k', '921k', '874k',\n",
+ " '981k', '784k', '280k', '24k', '518k', '754k', '892k', '154k',\n",
+ " '860k', '364k', '387k', '626k', '161k', '879k', '39k', '970k',\n",
+ " '170k', '141k', '160k', '144k', '143k', '190k', '376k', '193k',\n",
+ " '246k', '73k', '658k', '992k', '253k', '420k', '404k', '1,000+',\n",
+ " '470k', '226k', '240k', '89k', '234k', '257k', '861k', '467k',\n",
+ " '157k', '44k', '676k', '67k', '552k', '885k', '1020k', '582k',\n",
+ " '619k'], dtype=object)"
+ ]
+ },
+ "execution_count": 195,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play['Size'].unique()"
]
},
{
@@ -281,11 +659,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 196,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Varies with device 15.635089\n",
+ "11M 1.826400\n",
+ "12M 1.807951\n",
+ "14M 1.789503\n",
+ "13M 1.761830\n",
+ " ... \n",
+ "412k 0.009224\n",
+ "421k 0.009224\n",
+ "720k 0.009224\n",
+ "314k 0.009224\n",
+ "97k 0.009224\n",
+ "Name: Size, Length: 462, dtype: float64"
+ ]
+ },
+ "execution_count": 196,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play['Size'].value_counts()/len(google_play) *100\n"
]
},
{
@@ -301,11 +702,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 197,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play.drop(columns = ['Size'], inplace = True)"
]
},
{
@@ -321,11 +723,37 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 198,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "App 0\n",
+ "Category 0\n",
+ "Rating 1474\n",
+ "Reviews 0\n",
+ "Installs 0\n",
+ "Type 1\n",
+ "Price 0\n",
+ "Content Rating 1\n",
+ "Genres 0\n",
+ "Last Updated 0\n",
+ "Current Ver 8\n",
+ "Android Ver 3\n",
+ "Reviews_numeric 1\n",
+ "Reviews_isnull 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 198,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play.isna().sum()"
]
},
{
@@ -341,11 +769,37 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 199,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "App 0.000000\n",
+ "Category 0.000000\n",
+ "Rating 13.596532\n",
+ "Reviews 0.000000\n",
+ "Installs 0.000000\n",
+ "Type 0.009224\n",
+ "Price 0.000000\n",
+ "Content Rating 0.009224\n",
+ "Genres 0.000000\n",
+ "Last Updated 0.000000\n",
+ "Current Ver 0.073794\n",
+ "Android Ver 0.027673\n",
+ "Reviews_numeric 0.009224\n",
+ "Reviews_isnull 0.000000\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 199,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_play.isna().sum()/len(google_play)*100"
]
},
{
@@ -367,11 +821,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 200,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_missing_removed = google_play.dropna()"
]
},
{
@@ -387,11 +842,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 207,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/alberto/miniconda3/envs/data_env/lib/python3.7/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " \n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here\n",
+ "google_missing_removed['Last Updated'] = pd.to_datetime(google_missing_removed['Last Updated'])"
]
},
{
@@ -405,11 +874,33 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 209,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['0', '$4.99', '$3.99', '$6.99', '$7.99', '$5.99', '$2.99', '$3.49',\n",
+ " '$1.99', '$9.99', '$7.49', '$0.99', '$9.00', '$5.49', '$10.00',\n",
+ " '$24.99', '$11.99', '$79.99', '$16.99', '$14.99', '$29.99',\n",
+ " '$12.99', '$2.49', '$10.99', '$1.50', '$19.99', '$15.99', '$33.99',\n",
+ " '$39.99', '$3.95', '$4.49', '$1.70', '$8.99', '$1.49', '$3.88',\n",
+ " '$399.99', '$17.99', '$400.00', '$3.02', '$1.76', '$4.84', '$4.77',\n",
+ " '$1.61', '$2.50', '$1.59', '$6.49', '$1.29', '$299.99', '$379.99',\n",
+ " '$37.99', '$18.99', '$389.99', '$8.49', '$1.75', '$14.00', '$2.00',\n",
+ " '$3.08', '$2.59', '$19.40', '$3.90', '$4.59', '$15.46', '$3.04',\n",
+ " '$13.99', '$4.29', '$3.28', '$4.60', '$1.00', '$2.95', '$2.90',\n",
+ " '$1.97', '$2.56', '$1.20'], dtype=object)"
+ ]
+ },
+ "execution_count": 209,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_missing_removed['Price'].unique()"
]
},
{
@@ -425,11 +916,54 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 210,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/alberto/miniconda3/envs/data_env/lib/python3.7/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " \n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_missing_removed['Price'] = google_missing_removed.Price.apply(lambda x : x.replace('$',''))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 211,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['0', '4.99', '3.99', '6.99', '7.99', '5.99', '2.99', '3.49',\n",
+ " '1.99', '9.99', '7.49', '0.99', '9.00', '5.49', '10.00', '24.99',\n",
+ " '11.99', '79.99', '16.99', '14.99', '29.99', '12.99', '2.49',\n",
+ " '10.99', '1.50', '19.99', '15.99', '33.99', '39.99', '3.95',\n",
+ " '4.49', '1.70', '8.99', '1.49', '3.88', '399.99', '17.99',\n",
+ " '400.00', '3.02', '1.76', '4.84', '4.77', '1.61', '2.50', '1.59',\n",
+ " '6.49', '1.29', '299.99', '379.99', '37.99', '18.99', '389.99',\n",
+ " '8.49', '1.75', '14.00', '2.00', '3.08', '2.59', '19.40', '3.90',\n",
+ " '4.59', '15.46', '3.04', '13.99', '4.29', '3.28', '4.60', '1.00',\n",
+ " '2.95', '2.90', '1.97', '2.56', '1.20'], dtype=object)"
+ ]
+ },
+ "execution_count": 211,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "google_missing_removed['Price'].unique()"
]
},
{
@@ -441,11 +975,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 212,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/alberto/miniconda3/envs/data_env/lib/python3.7/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
+ "\n",
+ "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " \n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_missing_removed['Price Numerical'] = pd.to_numeric(google_missing_removed['Price'],errors='coerce')"
]
},
{
@@ -457,11 +1005,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 213,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_missing_removed.drop(columns = ['Price'], inplace = True)"
]
},
{
@@ -477,11 +1026,37 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 214,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "App object\n",
+ "Category object\n",
+ "Rating float64\n",
+ "Reviews float64\n",
+ "Installs object\n",
+ "Type object\n",
+ "Content Rating object\n",
+ "Genres object\n",
+ "Last Updated datetime64[ns]\n",
+ "Current Ver object\n",
+ "Android Ver object\n",
+ "Reviews_numeric float64\n",
+ "Reviews_isnull bool\n",
+ "Price Numerical float64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 214,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here"
+ "# Your code here\n",
+ "google_missing_removed.dtypes"
]
},
{
@@ -500,7 +1075,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 215,
"metadata": {},
"outputs": [],
"source": [
@@ -520,11 +1095,106 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 216,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Translated_Review | \n",
+ " Sentiment | \n",
+ " Sentiment_Polarity | \n",
+ " Sentiment_Subjectivity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 10 Best Foods for You | \n",
+ " I like eat delicious food. That's I'm cooking ... | \n",
+ " Positive | \n",
+ " 1.00 | \n",
+ " 0.533333 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 10 Best Foods for You | \n",
+ " This help eating healthy exercise regular basis | \n",
+ " Positive | \n",
+ " 0.25 | \n",
+ " 0.288462 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 10 Best Foods for You | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 10 Best Foods for You | \n",
+ " Works great especially going grocery store | \n",
+ " Positive | \n",
+ " 0.40 | \n",
+ " 0.875000 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 10 Best Foods for You | \n",
+ " Best idea us | \n",
+ " Positive | \n",
+ " 1.00 | \n",
+ " 0.300000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Translated_Review \\\n",
+ "0 10 Best Foods for You I like eat delicious food. That's I'm cooking ... \n",
+ "1 10 Best Foods for You This help eating healthy exercise regular basis \n",
+ "2 10 Best Foods for You NaN \n",
+ "3 10 Best Foods for You Works great especially going grocery store \n",
+ "4 10 Best Foods for You Best idea us \n",
+ "\n",
+ " Sentiment Sentiment_Polarity Sentiment_Subjectivity \n",
+ "0 Positive 1.00 0.533333 \n",
+ "1 Positive 0.25 0.288462 \n",
+ "2 NaN NaN NaN \n",
+ "3 Positive 0.40 0.875000 \n",
+ "4 Positive 1.00 0.300000 "
+ ]
+ },
+ "execution_count": 216,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here\n"
+ "# Your code here\n",
+ "google_review.head()"
]
},
{
@@ -551,11 +1221,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 217,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "review_missing_removed = google_review.dropna()"
]
},
{
@@ -567,11 +1238,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 218,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Bowmasters 312\n",
+ "Angry Birds Classic 273\n",
+ "Helix Jump 273\n",
+ "Calorie Counter - MyFitnessPal 254\n",
+ "Duolingo: Learn Languages Free 240\n",
+ " ... \n",
+ "CallApp: Caller ID, Blocker & Phone Call Recorder 1\n",
+ "Apartment Decorating Ideas 1\n",
+ "Draw A Stickman 1\n",
+ "Daily Workouts - Exercise Fitness Routine Trainer 1\n",
+ "CBS News 1\n",
+ "Name: App, Length: 865, dtype: int64"
+ ]
+ },
+ "execution_count": 218,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "review_missing_removed['App'].value_counts()"
]
},
{
@@ -600,23 +1294,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 219,
"metadata": {},
"outputs": [],
"source": [
"# Your code below\n",
"\n",
"def positive_function(x):\n",
- " \"\"\"\n",
- " Count how many times the string `Positive` appears in a column (exact string match).\n",
- " \n",
- " Args:\n",
- " x: data column\n",
- " \n",
- " Returns:\n",
- " The number of occurrences of `Positive` in the column data.\n",
- " \"\"\"\n",
- " return 0"
+ " return len(x[(np.where(x == 'Positive', 1,0) == 1)])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 220,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "23998"
+ ]
+ },
+ "execution_count": 220,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "positive_function(review_missing_removed['Sentiment'])"
]
},
{
@@ -640,11 +1345,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 221,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_agg = review_missing_removed.groupby('App').agg({'Sentiment':[positive_function],'App':'count'})\n",
+ "google_agg.columns = google_agg.columns.droplevel()\n",
+ "google_agg = google_agg.rename(columns={'positive_function':'Positive','count':'Total'})"
]
},
{
@@ -656,11 +1364,87 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 222,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Positive | \n",
+ " Total | \n",
+ "
\n",
+ " \n",
+ " | App | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 10 Best Foods for You | \n",
+ " 162 | \n",
+ " 194 | \n",
+ "
\n",
+ " \n",
+ " | 104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 | \n",
+ " 31 | \n",
+ " 40 | \n",
+ "
\n",
+ " \n",
+ " | 11st | \n",
+ " 23 | \n",
+ " 39 | \n",
+ "
\n",
+ " \n",
+ " | 1800 Contacts - Lens Store | \n",
+ " 64 | \n",
+ " 80 | \n",
+ "
\n",
+ " \n",
+ " | 1LINE – One Line with One Touch | \n",
+ " 27 | \n",
+ " 38 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Positive Total\n",
+ "App \n",
+ "10 Best Foods for You 162 194\n",
+ "104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 31 40\n",
+ "11st 23 39\n",
+ "1800 Contacts - Lens Store 64 80\n",
+ "1LINE – One Line with One Touch 27 38"
+ ]
+ },
+ "execution_count": 222,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here\n"
+ "# Your code here\n",
+ "google_agg.head()"
]
},
{
@@ -674,11 +1458,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 223,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_agg['Positive Ratio'] = (google_agg['Positive']/google_agg['Total'])"
]
},
{
@@ -690,11 +1475,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 224,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_agg.drop(['Positive','Total'], axis = 1, inplace=True)"
]
},
{
@@ -708,11 +1494,80 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 225,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Positive Ratio | \n",
+ "
\n",
+ " \n",
+ " | App | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 10 Best Foods for You | \n",
+ " 0.835052 | \n",
+ "
\n",
+ " \n",
+ " | 104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 | \n",
+ " 0.775000 | \n",
+ "
\n",
+ " \n",
+ " | 11st | \n",
+ " 0.589744 | \n",
+ "
\n",
+ " \n",
+ " | 1800 Contacts - Lens Store | \n",
+ " 0.800000 | \n",
+ "
\n",
+ " \n",
+ " | 1LINE – One Line with One Touch | \n",
+ " 0.710526 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Positive Ratio\n",
+ "App \n",
+ "10 Best Foods for You 0.835052\n",
+ "104 找工作 - 找工作 找打工 找兼職 履歷健檢 履歷診療室 0.775000\n",
+ "11st 0.589744\n",
+ "1800 Contacts - Lens Store 0.800000\n",
+ "1LINE – One Line with One Touch 0.710526"
+ ]
+ },
+ "execution_count": 225,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "google_agg.head(5)"
]
},
{
@@ -728,11 +1583,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 234,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "final = google_missing_removed.join(google_agg, on='App').dropna()\n",
+ "final = final.drop(columns =['Reviews_numeric','Reviews_isnull'])"
]
},
{
@@ -744,21 +1601,102 @@
""
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 236,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " App | \n",
+ " Category | \n",
+ " Rating | \n",
+ " Reviews | \n",
+ " Installs | \n",
+ " Type | \n",
+ " Content Rating | \n",
+ " Genres | \n",
+ " Last Updated | \n",
+ " Current Ver | \n",
+ " Android Ver | \n",
+ " Price Numerical | \n",
+ " Positive Ratio | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " Coloring book moana | \n",
+ " ART_AND_DESIGN | \n",
+ " 3.9 | \n",
+ " 967.0 | \n",
+ " 500,000+ | \n",
+ " Free | \n",
+ " Everyone | \n",
+ " Art & Design;Pretend Play | \n",
+ " 2018-01-15 | \n",
+ " 2.0.0 | \n",
+ " 4.0.3 and up | \n",
+ " 0.0 | \n",
+ " 0.590909 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " App Category Rating Reviews Installs Type \\\n",
+ "1 Coloring book moana ART_AND_DESIGN 3.9 967.0 500,000+ Free \n",
+ "\n",
+ " Content Rating Genres Last Updated Current Ver \\\n",
+ "1 Everyone Art & Design;Pretend Play 2018-01-15 2.0.0 \n",
+ "\n",
+ " Android Ver Price Numerical Positive Ratio \n",
+ "1 4.0.3 and up 0.0 0.590909 "
+ ]
+ },
+ "execution_count": 236,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here:\n",
+ "final.head(1)"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": [
- "# Your code here:\n"
- ]
+ "source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python [conda env:data_env]",
"language": "python",
- "name": "python3"
+ "name": "conda-env-data_env-py"
},
"language_info": {
"codemirror_mode": {
@@ -770,9 +1708,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.6"
+ "version": "3.7.5"
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}