From acc3507a80f72f5aa787afefea60aecae957769b Mon Sep 17 00:00:00 2001 From: Andrew Bavuels Date: Sat, 28 Oct 2023 14:23:09 +0200 Subject: [PATCH 1/4] Workshop in process --- pandas_apply_lambda.ipynb | 803 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 803 insertions(+) create mode 100644 pandas_apply_lambda.ipynb diff --git a/pandas_apply_lambda.ipynb b/pandas_apply_lambda.ipynb new file mode 100644 index 0000000..c6f1f9e --- /dev/null +++ b/pandas_apply_lambda.ipynb @@ -0,0 +1,803 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 21, + "id": "5bd1b48d", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "c31b6a09", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('data/input/IMDB-Movie-Data.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "793c484b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1000 entries, 0 to 999\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Rank 1000 non-null int64 \n", + " 1 Title 1000 non-null object \n", + " 2 Genre 1000 non-null object \n", + " 3 Description 1000 non-null object \n", + " 4 Director 1000 non-null object \n", + " 5 Actors 1000 non-null object \n", + " 6 Year 1000 non-null int64 \n", + " 7 Runtime (Minutes) 1000 non-null int64 \n", + " 8 Rating 1000 non-null float64\n", + " 9 Votes 1000 non-null int64 \n", + " 10 Revenue (Millions) 872 non-null float64\n", + " 11 Metascore 936 non-null float64\n", + "dtypes: float64(3), int64(4), object(5)\n", + "memory usage: 93.9+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "0a56b116", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankYearRuntime (Minutes)RatingVotesRevenue (Millions)Metascore
count1000.0000001000.0000001000.0000001000.0000001.000000e+03872.000000936.000000
mean500.5000002012.783000113.1720006.7232001.698083e+0582.95637658.985043
std288.8194363.20596218.8109080.9454291.887626e+05103.25354017.194757
min1.0000002006.00000066.0000001.9000006.100000e+010.00000011.000000
25%250.7500002010.000000100.0000006.2000003.630900e+0413.27000047.000000
50%500.5000002014.000000111.0000006.8000001.107990e+0547.98500059.500000
75%750.2500002016.000000123.0000007.4000002.399098e+05113.71500072.000000
max1000.0000002016.000000191.0000009.0000001.791916e+06936.630000100.000000
\n", + "
" + ], + "text/plain": [ + " Rank Year Runtime (Minutes) Rating Votes \\\n", + "count 1000.000000 1000.000000 1000.000000 1000.000000 1.000000e+03 \n", + "mean 500.500000 2012.783000 113.172000 6.723200 1.698083e+05 \n", + "std 288.819436 3.205962 18.810908 0.945429 1.887626e+05 \n", + "min 1.000000 2006.000000 66.000000 1.900000 6.100000e+01 \n", + "25% 250.750000 2010.000000 100.000000 6.200000 3.630900e+04 \n", + "50% 500.500000 2014.000000 111.000000 6.800000 1.107990e+05 \n", + "75% 750.250000 2016.000000 123.000000 7.400000 2.399098e+05 \n", + "max 1000.000000 2016.000000 191.000000 9.000000 1.791916e+06 \n", + "\n", + " Revenue (Millions) Metascore \n", + "count 872.000000 936.000000 \n", + "mean 82.956376 58.985043 \n", + "std 103.253540 17.194757 \n", + "min 0.000000 11.000000 \n", + "25% 13.270000 47.000000 \n", + "50% 47.985000 59.500000 \n", + "75% 113.715000 72.000000 \n", + "max 936.630000 100.000000 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "548491fc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)Metascore
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore \n", + "0 8.1 757074 333.13 76.0 \n", + "1 7.0 485820 126.46 65.0 \n", + "2 7.3 157606 138.12 62.0 \n", + "3 7.2 60545 270.32 59.0 \n", + "4 6.2 393727 325.02 40.0 " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "e7414ce7", + "metadata": {}, + "source": [ + "### Challenge 1. Using a single argument\n", + "We want to create bins of movies according to the number of votes they've received. For that matter, we will create a new column named 'bin' which will tag every movie as follow:\n", + "\n", + "- From 0 to 999 ==> 'cat_1'\n", + "- From 1000 to 9999 ==> 'cat_2'\n", + "- From 10000 to 99999 ==> 'cat_3'\n", + "- From 100000 to 999999 ==> 'cat_4'\n", + "- More than 1000000 ==> 'cat_5'" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "d6a3a52f", + "metadata": {}, + "outputs": [], + "source": [ + "def warm_up(x):\n", + " if x >= 0 and x <= 999:\n", + " return \"cat_1\"\n", + " elif x >= 1000 and x <= 9999:\n", + " return \"cat_2\"\n", + " elif x >= 10000 and x <= 99999:\n", + " return \"cat_3\"\n", + " elif x >= 100000 and x <= 999999:\n", + " return \"cat_4\"\n", + " elif x >= 1000000:\n", + " return \"cat_5\"" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "64f89a69", + "metadata": {}, + "outputs": [], + "source": [ + "df['bin'] = df.apply(lambda x: warm_up(x['Votes']), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "c1b5d114", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)Metascorebin
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0cat_4
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0cat_4
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0cat_4
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0cat_3
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0cat_4
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore bin \n", + "0 8.1 757074 333.13 76.0 cat_4 \n", + "1 7.0 485820 126.46 65.0 cat_4 \n", + "2 7.3 157606 138.12 62.0 cat_4 \n", + "3 7.2 60545 270.32 59.0 cat_3 \n", + "4 6.2 393727 325.02 40.0 cat_4 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(5)" + ] + }, + { + "cell_type": "markdown", + "id": "6356a5d9", + "metadata": {}, + "source": [ + "### Challenge 2. Using two arguments\n", + "We want to know how much is the revenue per minute for every movie." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "d47bcaad", + "metadata": {}, + "outputs": [], + "source": [ + "df['Revenue per minutes'] = df.apply(lambda row: row['Revenue (Millions)'] / row['Runtime (Minutes)'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "a3665a70", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascorebinRevenue per minutes
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0cat_42.753140
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0cat_41.019839
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0cat_41.180513
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0cat_32.502963
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0cat_42.642439
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore bin Revenue per minutes \n", + "0 8.1 757074 333.13 76.0 cat_4 2.753140 \n", + "1 7.0 485820 126.46 65.0 cat_4 1.019839 \n", + "2 7.3 157606 138.12 62.0 cat_4 1.180513 \n", + "3 7.2 60545 270.32 59.0 cat_3 2.502963 \n", + "4 6.2 393727 325.02 40.0 cat_4 2.642439 " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34d35e6c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (jjupyter_env)", + "language": "python", + "name": "jupyter_env" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From d288a1d6d76b8843bc899cf3986c664b6446a446 Mon Sep 17 00:00:00 2001 From: Andrew Bavuels Date: Sun, 29 Oct 2023 13:23:34 +0100 Subject: [PATCH 2/4] =?UTF-8?q?Challenge=204:=20Definiendo=20funci=C3=B3n?= =?UTF-8?q?=20de=20n=C3=BAmeros=20primos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pandas_apply_lambda.ipynb | 436 ++++++++++++++++++++++++++++++++++---- 1 file changed, 396 insertions(+), 40 deletions(-) diff --git a/pandas_apply_lambda.ipynb b/pandas_apply_lambda.ipynb index c6f1f9e..8471bf0 100644 --- a/pandas_apply_lambda.ipynb +++ b/pandas_apply_lambda.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 21, + "execution_count": 47, "id": "5bd1b48d", "metadata": {}, "outputs": [], @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 48, "id": "c31b6a09", "metadata": {}, "outputs": [], @@ -22,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 49, "id": "793c484b", "metadata": {}, "outputs": [ @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 50, "id": "0a56b116", "metadata": {}, "outputs": [ @@ -199,7 +199,7 @@ "max 936.630000 100.000000 " ] }, - "execution_count": 24, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -210,7 +210,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 51, "id": "548491fc", "metadata": {}, "outputs": [ @@ -359,7 +359,7 @@ "4 6.2 393727 325.02 40.0 " ] }, - "execution_count": 25, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } @@ -385,27 +385,27 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 52, "id": "d6a3a52f", "metadata": {}, "outputs": [], "source": [ "def warm_up(x):\n", " if x >= 0 and x <= 999:\n", - " return \"cat_1\"\n", + " return \"Cat 1\"\n", " elif x >= 1000 and x <= 9999:\n", - " return \"cat_2\"\n", + " return \"Cat 2\"\n", " elif x >= 10000 and x <= 99999:\n", - " return \"cat_3\"\n", + " return \"Cat 3\"\n", " elif x >= 100000 and x <= 999999:\n", - " return \"cat_4\"\n", + " return \"Cat 4\"\n", " elif x >= 1000000:\n", - " return \"cat_5\"" + " return \"Cat 5\"" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 53, "id": "64f89a69", "metadata": {}, "outputs": [], @@ -415,7 +415,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 54, "id": "c1b5d114", "metadata": {}, "outputs": [ @@ -470,7 +470,7 @@ " 757074\n", " 333.13\n", " 76.0\n", - " cat_4\n", + " Cat 4\n", " \n", " \n", " 1\n", @@ -486,7 +486,7 @@ " 485820\n", " 126.46\n", " 65.0\n", - " cat_4\n", + " Cat 4\n", " \n", " \n", " 2\n", @@ -502,7 +502,7 @@ " 157606\n", " 138.12\n", " 62.0\n", - " cat_4\n", + " Cat 4\n", " \n", " \n", " 3\n", @@ -518,7 +518,7 @@ " 60545\n", " 270.32\n", " 59.0\n", - " cat_3\n", + " Cat 3\n", " \n", " \n", " 4\n", @@ -534,7 +534,7 @@ " 393727\n", " 325.02\n", " 40.0\n", - " cat_4\n", + " Cat 4\n", " \n", " \n", "\n", @@ -563,14 +563,14 @@ "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", "\n", " Rating Votes Revenue (Millions) Metascore bin \n", - "0 8.1 757074 333.13 76.0 cat_4 \n", - "1 7.0 485820 126.46 65.0 cat_4 \n", - "2 7.3 157606 138.12 62.0 cat_4 \n", - "3 7.2 60545 270.32 59.0 cat_3 \n", - "4 6.2 393727 325.02 40.0 cat_4 " + "0 8.1 757074 333.13 76.0 Cat 4 \n", + "1 7.0 485820 126.46 65.0 Cat 4 \n", + "2 7.3 157606 138.12 62.0 Cat 4 \n", + "3 7.2 60545 270.32 59.0 Cat 3 \n", + "4 6.2 393727 325.02 40.0 Cat 4 " ] }, - "execution_count": 30, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } @@ -590,7 +590,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 55, "id": "d47bcaad", "metadata": {}, "outputs": [], @@ -600,7 +600,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 56, "id": "a3665a70", "metadata": {}, "outputs": [ @@ -656,7 +656,7 @@ " 757074\n", " 333.13\n", " 76.0\n", - " cat_4\n", + " Cat 4\n", " 2.753140\n", " \n", " \n", @@ -673,7 +673,7 @@ " 485820\n", " 126.46\n", " 65.0\n", - " cat_4\n", + " Cat 4\n", " 1.019839\n", " \n", " \n", @@ -690,7 +690,7 @@ " 157606\n", " 138.12\n", " 62.0\n", - " cat_4\n", + " Cat 4\n", " 1.180513\n", " \n", " \n", @@ -707,7 +707,7 @@ " 60545\n", " 270.32\n", " 59.0\n", - " cat_3\n", + " Cat 3\n", " 2.502963\n", " \n", " \n", @@ -724,7 +724,7 @@ " 393727\n", " 325.02\n", " 40.0\n", - " cat_4\n", + " Cat 4\n", " 2.642439\n", " \n", " \n", @@ -754,14 +754,14 @@ "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", "\n", " Rating Votes Revenue (Millions) Metascore bin Revenue per minutes \n", - "0 8.1 757074 333.13 76.0 cat_4 2.753140 \n", - "1 7.0 485820 126.46 65.0 cat_4 1.019839 \n", - "2 7.3 157606 138.12 62.0 cat_4 1.180513 \n", - "3 7.2 60545 270.32 59.0 cat_3 2.502963 \n", - "4 6.2 393727 325.02 40.0 cat_4 2.642439 " + "0 8.1 757074 333.13 76.0 Cat 4 2.753140 \n", + "1 7.0 485820 126.46 65.0 Cat 4 1.019839 \n", + "2 7.3 157606 138.12 62.0 Cat 4 1.180513 \n", + "3 7.2 60545 270.32 59.0 Cat 3 2.502963 \n", + "4 6.2 393727 325.02 40.0 Cat 4 2.642439 " ] }, - "execution_count": 33, + "execution_count": 56, "metadata": {}, "output_type": "execute_result" } @@ -770,13 +770,369 @@ "df.head(5)" ] }, + { + "cell_type": "markdown", + "id": "73da8c52", + "metadata": {}, + "source": [ + "### Challenge 3. A bit more complicated\n", + "We want to create a new rating where we add 1 point if the genre is thriller but subtract 1 point if the genre is comedy." + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "b404ff9e", + "metadata": {}, + "outputs": [], + "source": [ + "def new_rating_genre(x,y): # x para Genre, y para Rank\n", + " if 'Thriller' in x:\n", + " y += 1\n", + " elif 'Comedy' in x:\n", + " y -= 1\n", + " return y" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "556f58eb", + "metadata": {}, + "outputs": [], + "source": [ + "df['New Rating'] = df.apply(lambda row: new_rating_genre(row['Genre'], row['Rank']), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "c6d960b3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascorebinRevenue per minutesNew Rating
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0Cat 42.7531401
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0Cat 41.0198392
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0Cat 41.1805134
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0Cat 32.5029633
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0Cat 42.6424395
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore bin Revenue per minutes \\\n", + "0 8.1 757074 333.13 76.0 Cat 4 2.753140 \n", + "1 7.0 485820 126.46 65.0 Cat 4 1.019839 \n", + "2 7.3 157606 138.12 62.0 Cat 4 1.180513 \n", + "3 7.2 60545 270.32 59.0 Cat 3 2.502963 \n", + "4 6.2 393727 325.02 40.0 Cat 4 2.642439 \n", + "\n", + " New Rating \n", + "0 1 \n", + "1 2 \n", + "2 4 \n", + "3 3 \n", + "4 5 " + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(5)" + ] + }, + { + "cell_type": "markdown", + "id": "48b8a3eb", + "metadata": {}, + "source": [ + "### Challenge 4. A bit too weird...\n", + "We want to know whether the integer part of the number resulting from the sum of the ASCII value of every character of the movie title divided by the number of votes, is a prime number (remember that prime numbers are integers)." + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "0ae32695", + "metadata": {}, + "outputs": [], + "source": [ + "# Código ASCII: https://elcodigoascii.com.ar/\n", + "\n", + "# Your code here (https://docs.python.org/3/library/functions.html#ord) & (https://foro.elhacker.net/scripting/pythonsumar_valor_numerico_de_cada_caracter_de_una_cadenaascii-t338102.0.html)\n", + "\n", + "# Pseudocode: Ej. 13 se divide entre 1 y así mismo\n", + "\n", + "# Guardians of the Galaxy(sum(ASCII characters) / number_of_votes) = prime_number\n", + "\n", + " # x for title\n", + "def sum_ascii(x):\n", + " sum_char = 0\n", + " for char in x:\n", + " sum_char += ord(char)\n", + " return sum_char\n", + "# sum_ascii('abc')" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "0cdda8d9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + " ..\n", + "995 0\n", + "996 0\n", + "997 0\n", + "998 0\n", + "999 0\n", + "Length: 1000, dtype: int64\n" + ] + } + ], + "source": [ + "# Weird results\n", + "\n", + "division = df.apply(lambda row: int(sum_ascii(row['Title']) / row['Votes']), axis=1)\n", + "print(division)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "id": "d39c5e64", + "metadata": {}, + "outputs": [], + "source": [ + "def prime(y):\n", + " for n in range(2, y):\n", + " if y % 2 == 0:\n", + " return False\n", + " else:\n", + " return True\n", + "prime(2)" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "34d35e6c", + "id": "ebb25997", + "metadata": {}, + "outputs": [], + "source": [ + "def es_primo(num):\n", + " for n in range(2, num):\n", + " if num % n == 0:\n", + " print(\"No es primo\", n, \"es divisor\")\n", + " return False\n", + " print(\"Es primo\")\n", + " return True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64e98cd4", + "metadata": {}, + "outputs": [], + "source": [ + "# Dummy code\n", + "\n", + "df['New Rating'] = df.apply(lambda row: new_rating_genre(row['Genre'], row['Rank']), axis=1)" + ] + }, + { + "cell_type": "markdown", + "id": "714c22c8", + "metadata": {}, + "source": [ + "### Challenge 5. And finally some fantasy\n", + "Feel free to propose your own ranking based in aggregations of at least 3 columns of the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "751cad88", "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "markdown", + "id": "73d23e4d", + "metadata": {}, + "source": [ + "### Bonus challenge. Freaky bonus\n", + "We want to know which movies might have hidden paterns in their description. A way to know that is finding those movies which the sum of all numeric values of the string description hash (SHA256) are between their revenue and their number of votes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1109beb3", + "metadata": {}, + "outputs": [], + "source": [ + "# Your code here" + ] } ], "metadata": { From 591be02612b42a06095a0c48d0866c74e234ffc7 Mon Sep 17 00:00:00 2001 From: Andrew Bavuels Date: Sun, 29 Oct 2023 17:14:08 +0100 Subject: [PATCH 3/4] Voy por el Challenge 5 --- pandas_apply_lambda.ipynb | 463 ++++++++++++++++++++++++++++++++++---- 1 file changed, 418 insertions(+), 45 deletions(-) diff --git a/pandas_apply_lambda.ipynb b/pandas_apply_lambda.ipynb index 8471bf0..15fc674 100644 --- a/pandas_apply_lambda.ipynb +++ b/pandas_apply_lambda.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 47, + "execution_count": 20, "id": "5bd1b48d", "metadata": {}, "outputs": [], @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 21, "id": "c31b6a09", "metadata": {}, "outputs": [], @@ -22,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 22, "id": "793c484b", "metadata": {}, "outputs": [ @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 23, "id": "0a56b116", "metadata": {}, "outputs": [ @@ -199,7 +199,7 @@ "max 936.630000 100.000000 " ] }, - "execution_count": 50, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -210,7 +210,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 24, "id": "548491fc", "metadata": {}, "outputs": [ @@ -359,7 +359,7 @@ "4 6.2 393727 325.02 40.0 " ] }, - "execution_count": 51, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -385,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 25, "id": "d6a3a52f", "metadata": {}, "outputs": [], @@ -405,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 26, "id": "64f89a69", "metadata": {}, "outputs": [], @@ -415,7 +415,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 27, "id": "c1b5d114", "metadata": {}, "outputs": [ @@ -570,7 +570,7 @@ "4 6.2 393727 325.02 40.0 Cat 4 " ] }, - "execution_count": 54, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -590,7 +590,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 28, "id": "d47bcaad", "metadata": {}, "outputs": [], @@ -600,7 +600,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 29, "id": "a3665a70", "metadata": {}, "outputs": [ @@ -761,7 +761,7 @@ "4 6.2 393727 325.02 40.0 Cat 4 2.642439 " ] }, - "execution_count": 56, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -781,7 +781,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 30, "id": "b404ff9e", "metadata": {}, "outputs": [], @@ -796,7 +796,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 31, "id": "556f58eb", "metadata": {}, "outputs": [], @@ -806,7 +806,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 32, "id": "c6d960b3", "metadata": {}, "outputs": [ @@ -980,7 +980,7 @@ "4 5 " ] }, - "execution_count": 59, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -1000,7 +1000,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 33, "id": "0ae32695", "metadata": {}, "outputs": [], @@ -1024,7 +1024,7 @@ }, { "cell_type": "code", - "execution_count": 107, + "execution_count": 39, "id": "0cdda8d9", "metadata": {}, "outputs": [ @@ -1051,51 +1051,226 @@ "# Weird results\n", "\n", "division = df.apply(lambda row: int(sum_ascii(row['Title']) / row['Votes']), axis=1)\n", - "print(division)\n" + "print(division)" ] }, { "cell_type": "code", - "execution_count": 112, + "execution_count": 63, "id": "d39c5e64", "metadata": {}, "outputs": [], "source": [ - "def prime(y):\n", - " for n in range(2, y):\n", - " if y % 2 == 0:\n", + "def prime(number):\n", + " if type(number) == float:\n", + " number = int(number)\n", + " if number < 2:\n", + " return False\n", + " for i in range(2, number):\n", + " if number % i == 0:\n", " return False\n", - " else:\n", - " return True\n", - "prime(2)" + " return True" ] }, { "cell_type": "code", - "execution_count": null, - "id": "ebb25997", + "execution_count": 66, + "id": "03b911a2", "metadata": {}, "outputs": [], "source": [ - "def es_primo(num):\n", - " for n in range(2, num):\n", - " if num % n == 0:\n", - " print(\"No es primo\", n, \"es divisor\")\n", - " return False\n", - " print(\"Es primo\")\n", - " return True" + "df['Is prime?'] = division.apply(lambda division_result: 'Yes' if prime(division_result) else 'No')" ] }, { "cell_type": "code", - "execution_count": null, - "id": "64e98cd4", + "execution_count": 68, + "id": "ebb25997", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascorebinRevenue per minutesNew RatingIs prime?
4445LowridersDramaA young street artist in East Los Angeles is c...Ricardo de MontreuilGabriel Chavarria, Demián Bichir, Theo Rossi,T...2016996.32794.2157.0Cat 10.04252545Yes
6869WakefieldDramaA man's nervous breakdown causes him to leave ...Robin SwicordBryan Cranston, Jennifer Garner, Beverly D'Ang...20161067.52910.0161.0Cat 10.00009469Yes
112113The Bad BatchRomance,Sci-FiA dystopian love story in a Texas wasteland an...Ana Lily AmirpourKeanu Reeves, Jason Momoa, Jim Carrey, Diego Luna20161186.1512NaN65.0Cat 1NaN113Yes
293294The ExceptionDramaA German soldier tries to determine if the Dut...David LeveauxLily James, Jai Courtney, Christopher Plummer,...20161077.796NaNNaNCat 1NaN294Yes
307308Vincent N RoxxyCrime,Drama,ThrillerA small town loner and a rebellious punk rocke...Gary Michael SchultzEmile Hirsch, Zoë Kravitz, Zoey Deutch,Emory C...20161105.5403NaNNaNCat 1NaN309Yes
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "44 45 Lowriders Drama \n", + "68 69 Wakefield Drama \n", + "112 113 The Bad Batch Romance,Sci-Fi \n", + "293 294 The Exception Drama \n", + "307 308 Vincent N Roxxy Crime,Drama,Thriller \n", + "\n", + " Description Director \\\n", + "44 A young street artist in East Los Angeles is c... Ricardo de Montreuil \n", + "68 A man's nervous breakdown causes him to leave ... Robin Swicord \n", + "112 A dystopian love story in a Texas wasteland an... Ana Lily Amirpour \n", + "293 A German soldier tries to determine if the Dut... David Leveaux \n", + "307 A small town loner and a rebellious punk rocke... Gary Michael Schultz \n", + "\n", + " Actors Year \\\n", + "44 Gabriel Chavarria, Demián Bichir, Theo Rossi,T... 2016 \n", + "68 Bryan Cranston, Jennifer Garner, Beverly D'Ang... 2016 \n", + "112 Keanu Reeves, Jason Momoa, Jim Carrey, Diego Luna 2016 \n", + "293 Lily James, Jai Courtney, Christopher Plummer,... 2016 \n", + "307 Emile Hirsch, Zoë Kravitz, Zoey Deutch,Emory C... 2016 \n", + "\n", + " Runtime (Minutes) Rating Votes Revenue (Millions) Metascore bin \\\n", + "44 99 6.3 279 4.21 57.0 Cat 1 \n", + "68 106 7.5 291 0.01 61.0 Cat 1 \n", + "112 118 6.1 512 NaN 65.0 Cat 1 \n", + "293 107 7.7 96 NaN NaN Cat 1 \n", + "307 110 5.5 403 NaN NaN Cat 1 \n", + "\n", + " Revenue per minutes New Rating Is prime? \n", + "44 0.042525 45 Yes \n", + "68 0.000094 69 Yes \n", + "112 NaN 113 Yes \n", + "293 NaN 294 Yes \n", + "307 NaN 309 Yes " + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Dummy code\n", - "\n", - "df['New Rating'] = df.apply(lambda row: new_rating_genre(row['Genre'], row['Rank']), axis=1)" + "df[df['Is prime?'] == 'Yes'].head()" ] }, { @@ -1109,11 +1284,209 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 77, "id": "751cad88", "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascorebinRevenue per minutesNew RatingIs prime?
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0Cat 42.7531401No
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0Cat 41.0198392No
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0Cat 41.1805134No
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0Cat 32.5029633No
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0Cat 42.6424395No
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore bin Revenue per minutes \\\n", + "0 8.1 757074 333.13 76.0 Cat 4 2.753140 \n", + "1 7.0 485820 126.46 65.0 Cat 4 1.019839 \n", + "2 7.3 157606 138.12 62.0 Cat 4 1.180513 \n", + "3 7.2 60545 270.32 59.0 Cat 3 2.502963 \n", + "4 6.2 393727 325.02 40.0 Cat 4 2.642439 \n", + "\n", + " New Rating Is prime? \n", + "0 1 No \n", + "1 2 No \n", + "2 4 No \n", + "3 3 No \n", + "4 5 No " + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e1a8767", + "metadata": {}, "outputs": [], - "source": [] + "source": [ + "'''\n", + "Categorías:\n", + "\n", + "1. Oppenheimer: 926 Million Dollars\n", + "2. Barbie: 1380 Million Dollars\n", + "'''" + ] }, { "cell_type": "markdown", From d048f6a6a9f4ce1bb283b03175496a2e3d2d9ed7 Mon Sep 17 00:00:00 2001 From: Andrew Bavuels Date: Thu, 2 Nov 2023 14:51:52 +0100 Subject: [PATCH 4/4] Workshop finished --- pandas_apply_lambda.ipynb | 967 ++++++++++++++++++++++++++++++++++---- 1 file changed, 885 insertions(+), 82 deletions(-) diff --git a/pandas_apply_lambda.ipynb b/pandas_apply_lambda.ipynb index 15fc674..526b75d 100644 --- a/pandas_apply_lambda.ipynb +++ b/pandas_apply_lambda.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 20, + "execution_count": 158, "id": "5bd1b48d", "metadata": {}, "outputs": [], @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 101, "id": "c31b6a09", "metadata": {}, "outputs": [], @@ -22,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 102, "id": "793c484b", "metadata": {}, "outputs": [ @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 103, "id": "0a56b116", "metadata": {}, "outputs": [ @@ -199,7 +199,7 @@ "max 936.630000 100.000000 " ] }, - "execution_count": 23, + "execution_count": 103, "metadata": {}, "output_type": "execute_result" } @@ -210,7 +210,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 104, "id": "548491fc", "metadata": {}, "outputs": [ @@ -359,7 +359,7 @@ "4 6.2 393727 325.02 40.0 " ] }, - "execution_count": 24, + "execution_count": 104, "metadata": {}, "output_type": "execute_result" } @@ -385,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 105, "id": "d6a3a52f", "metadata": {}, "outputs": [], @@ -405,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 106, "id": "64f89a69", "metadata": {}, "outputs": [], @@ -415,7 +415,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 107, "id": "c1b5d114", "metadata": {}, "outputs": [ @@ -570,7 +570,7 @@ "4 6.2 393727 325.02 40.0 Cat 4 " ] }, - "execution_count": 27, + "execution_count": 107, "metadata": {}, "output_type": "execute_result" } @@ -590,7 +590,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 108, "id": "d47bcaad", "metadata": {}, "outputs": [], @@ -600,7 +600,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 109, "id": "a3665a70", "metadata": {}, "outputs": [ @@ -761,7 +761,7 @@ "4 6.2 393727 325.02 40.0 Cat 4 2.642439 " ] }, - "execution_count": 29, + "execution_count": 109, "metadata": {}, "output_type": "execute_result" } @@ -781,7 +781,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 110, "id": "b404ff9e", "metadata": {}, "outputs": [], @@ -796,7 +796,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 111, "id": "556f58eb", "metadata": {}, "outputs": [], @@ -806,7 +806,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 112, "id": "c6d960b3", "metadata": {}, "outputs": [ @@ -980,7 +980,7 @@ "4 5 " ] }, - "execution_count": 32, + "execution_count": 112, "metadata": {}, "output_type": "execute_result" } @@ -1000,7 +1000,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 113, "id": "0ae32695", "metadata": {}, "outputs": [], @@ -1018,13 +1018,12 @@ " sum_char = 0\n", " for char in x:\n", " sum_char += ord(char)\n", - " return sum_char\n", - "# sum_ascii('abc')" + " return sum_char" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 114, "id": "0cdda8d9", "metadata": {}, "outputs": [ @@ -1056,7 +1055,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 115, "id": "d39c5e64", "metadata": {}, "outputs": [], @@ -1074,7 +1073,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 116, "id": "03b911a2", "metadata": {}, "outputs": [], @@ -1084,7 +1083,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 117, "id": "ebb25997", "metadata": {}, "outputs": [ @@ -1264,7 +1263,7 @@ "307 NaN 309 Yes " ] }, - "execution_count": 68, + "execution_count": 117, "metadata": {}, "output_type": "execute_result" } @@ -1284,8 +1283,21 @@ }, { "cell_type": "code", - "execution_count": 77, - "id": "751cad88", + "execution_count": 118, + "id": "bce40f18", + "metadata": {}, + "outputs": [], + "source": [ + "fantasy_ranking.loc[:, 'Is prime?'] = 'Yes'\n", + "fantasy_ranking.loc[:, 'bin'] = 'Cat 5'\n", + "fantasy_ranking.loc[:, 'New Rating'] = fantasy_ranking['New Rating'].max()\n", + "fantasy_ranking.loc[:, 'Revenue (Millions)'] = fantasy_ranking['Revenue (Millions)'].max()" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "id": "cf05f740", "metadata": {}, "outputs": [ { @@ -1329,6 +1341,202 @@ " \n", " \n", " \n", + " 36\n", + " 37\n", + " Interstellar\n", + " Adventure,Drama,Sci-Fi\n", + " A team of explorers travel through a wormhole ...\n", + " Christopher Nolan\n", + " Matthew McConaughey, Anne Hathaway, Jessica Ch...\n", + " 2014\n", + " 169\n", + " 8.6\n", + " 1047747\n", + " 623.28\n", + " 74.0\n", + " Cat 5\n", + " 1.112367\n", + " 145\n", + " Yes\n", + " \n", + " \n", + " 54\n", + " 55\n", + " The Dark Knight\n", + " Action,Crime,Drama\n", + " When the menace known as the Joker wreaks havo...\n", + " Christopher Nolan\n", + " Christian Bale, Heath Ledger, Aaron Eckhart,Mi...\n", + " 2008\n", + " 152\n", + " 9.0\n", + " 1791916\n", + " 623.28\n", + " 82.0\n", + " Cat 5\n", + " 3.508684\n", + " 145\n", + " Yes\n", + " \n", + " \n", + " 76\n", + " 77\n", + " The Avengers\n", + " Action,Sci-Fi\n", + " Earth's mightiest heroes must come together an...\n", + " Joss Whedon\n", + " Robert Downey Jr., Chris Evans, Scarlett Johan...\n", + " 2012\n", + " 143\n", + " 8.1\n", + " 1045588\n", + " 623.28\n", + " 69.0\n", + " Cat 5\n", + " 4.358601\n", + " 145\n", + " Yes\n", + " \n", + " \n", + " 80\n", + " 81\n", + " Inception\n", + " Action,Adventure,Sci-Fi\n", + " A thief, who steals corporate secrets through ...\n", + " Christopher Nolan\n", + " Leonardo DiCaprio, Joseph Gordon-Levitt, Ellen...\n", + " 2010\n", + " 148\n", + " 8.8\n", + " 1583625\n", + " 623.28\n", + " 74.0\n", + " Cat 5\n", + " 1.976824\n", + " 145\n", + " Yes\n", + " \n", + " \n", + " 124\n", + " 125\n", + " The Dark Knight Rises\n", + " Action,Thriller\n", + " Eight years after the Joker's reign of anarchy...\n", + " Christopher Nolan\n", + " Christian Bale, Tom Hardy, Anne Hathaway,Gary ...\n", + " 2012\n", + " 164\n", + " 8.5\n", + " 1222645\n", + " 623.28\n", + " 78.0\n", + " Cat 5\n", + " 2.732500\n", + " 145\n", + " Yes\n", + " \n", + " \n", + "\n", + "" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "36 37 Interstellar Adventure,Drama,Sci-Fi \n", + "54 55 The Dark Knight Action,Crime,Drama \n", + "76 77 The Avengers Action,Sci-Fi \n", + "80 81 Inception Action,Adventure,Sci-Fi \n", + "124 125 The Dark Knight Rises Action,Thriller \n", + "\n", + " Description Director \\\n", + "36 A team of explorers travel through a wormhole ... Christopher Nolan \n", + "54 When the menace known as the Joker wreaks havo... Christopher Nolan \n", + "76 Earth's mightiest heroes must come together an... Joss Whedon \n", + "80 A thief, who steals corporate secrets through ... Christopher Nolan \n", + "124 Eight years after the Joker's reign of anarchy... Christopher Nolan \n", + "\n", + " Actors Year \\\n", + "36 Matthew McConaughey, Anne Hathaway, Jessica Ch... 2014 \n", + "54 Christian Bale, Heath Ledger, Aaron Eckhart,Mi... 2008 \n", + "76 Robert Downey Jr., Chris Evans, Scarlett Johan... 2012 \n", + "80 Leonardo DiCaprio, Joseph Gordon-Levitt, Ellen... 2010 \n", + "124 Christian Bale, Tom Hardy, Anne Hathaway,Gary ... 2012 \n", + "\n", + " Runtime (Minutes) Rating Votes Revenue (Millions) Metascore bin \\\n", + "36 169 8.6 1047747 623.28 74.0 Cat 5 \n", + "54 152 9.0 1791916 623.28 82.0 Cat 5 \n", + "76 143 8.1 1045588 623.28 69.0 Cat 5 \n", + "80 148 8.8 1583625 623.28 74.0 Cat 5 \n", + "124 164 8.5 1222645 623.28 78.0 Cat 5 \n", + "\n", + " Revenue per minutes New Rating Is prime? \n", + "36 1.112367 145 Yes \n", + "54 3.508684 145 Yes \n", + "76 4.358601 145 Yes \n", + "80 1.976824 145 Yes \n", + "124 2.732500 145 Yes " + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fantasy_ranking.head()" + ] + }, + { + "cell_type": "markdown", + "id": "73d23e4d", + "metadata": {}, + "source": [ + "### Bonus challenge. Freaky bonus\n", + "We want to know which movies might have hidden paterns in their description. A way to know that is finding those movies which the sum of all numeric values of the string description hash (SHA256) are between their revenue and their number of votes." + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "id": "1109beb3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -1342,10 +1550,6 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", @@ -1361,10 +1565,6 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", @@ -1380,10 +1580,6 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", @@ -1399,10 +1595,6 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", @@ -1418,10 +1610,6 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)Metascore
01Guardians of the Galaxy757074333.1376.0Cat 42.7531401No
1485820126.4665.0Cat 41.0198392No
2157606138.1262.0Cat 41.1805134No
360545270.3259.0Cat 32.5029633No
4393727325.0240.0Cat 42.6424395No
\n", @@ -1449,62 +1637,677 @@ "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", "\n", - " Rating Votes Revenue (Millions) Metascore bin Revenue per minutes \\\n", - "0 8.1 757074 333.13 76.0 Cat 4 2.753140 \n", - "1 7.0 485820 126.46 65.0 Cat 4 1.019839 \n", - "2 7.3 157606 138.12 62.0 Cat 4 1.180513 \n", - "3 7.2 60545 270.32 59.0 Cat 3 2.502963 \n", - "4 6.2 393727 325.02 40.0 Cat 4 2.642439 \n", - "\n", - " New Rating Is prime? \n", - "0 1 No \n", - "1 2 No \n", - "2 4 No \n", - "3 3 No \n", - "4 5 No " + " Rating Votes Revenue (Millions) Metascore \n", + "0 8.1 757074 333.13 76.0 \n", + "1 7.0 485820 126.46 65.0 \n", + "2 7.3 157606 138.12 62.0 \n", + "3 7.2 60545 270.32 59.0 \n", + "4 6.2 393727 325.02 40.0 " ] }, - "execution_count": 77, + "execution_count": 138, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "df = pd.read_csv('data/input/IMDB-Movie-Data.csv')\n", "df.head()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "7e1a8767", + "execution_count": 150, + "id": "d984d716", "metadata": {}, "outputs": [], "source": [ - "'''\n", - "Categorías:\n", + "# Your code here (https://stackoverflow.com/questions/70711801/how-to-hash-dataframe-column-to-sha256)\n", "\n", - "1. Oppenheimer: 926 Million Dollars\n", - "2. Barbie: 1380 Million Dollars\n", - "'''" - ] - }, - { - "cell_type": "markdown", - "id": "73d23e4d", - "metadata": {}, - "source": [ - "### Bonus challenge. Freaky bonus\n", - "We want to know which movies might have hidden paterns in their description. A way to know that is finding those movies which the sum of all numeric values of the string description hash (SHA256) are between their revenue and their number of votes." + "# 1. Hashear la descripción de la película\n", + "import hashlib\n", + "\n", + "def sha_256(x):\n", + " return hashlib.sha256(x.encode('utf-8')).hexdigest()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "1109beb3", + "execution_count": 153, + "id": "423fbad7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascoreDescription SHA256
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.089e55ad0e7d96003c037ac4fc2a4ebb0717338fdb9ce20...
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.01d8d06e5f9ee801692a1c8dc80f71fc0cd268afeb13115...
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0f4149191d2a3f8a6effb0e5812a15cf84d82e70dd566e9...
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.08ace2c87f3f06d9d0b1a8cae5f2d049e86ff7e52528e83...
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.075fc4955bd5d81e20cd79b1be17f07d5571e4d07a07492...
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore \\\n", + "0 8.1 757074 333.13 76.0 \n", + "1 7.0 485820 126.46 65.0 \n", + "2 7.3 157606 138.12 62.0 \n", + "3 7.2 60545 270.32 59.0 \n", + "4 6.2 393727 325.02 40.0 \n", + "\n", + " Description SHA256 \n", + "0 89e55ad0e7d96003c037ac4fc2a4ebb0717338fdb9ce20... \n", + "1 1d8d06e5f9ee801692a1c8dc80f71fc0cd268afeb13115... \n", + "2 f4149191d2a3f8a6effb0e5812a15cf84d82e70dd566e9... \n", + "3 8ace2c87f3f06d9d0b1a8cae5f2d049e86ff7e52528e83... \n", + "4 75fc4955bd5d81e20cd79b1be17f07d5571e4d07a07492... " + ] + }, + "execution_count": 153, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 2. Duplicar columna (Movie description)\n", + "\n", + "df['Description SHA256'] = df['Description'].apply(lambda x: sha_256(x))\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "id": "f11acfe6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascoreDescription SHA256
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.04531
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.04580
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.04367
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.04645
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.04408
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore Description SHA256 \n", + "0 8.1 757074 333.13 76.0 4531 \n", + "1 7.0 485820 126.46 65.0 4580 \n", + "2 7.3 157606 138.12 62.0 4367 \n", + "3 7.2 60545 270.32 59.0 4645 \n", + "4 6.2 393727 325.02 40.0 4408 " + ] + }, + "execution_count": 154, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 3. Reemplazar los datos de la columna 'Description SHA256' con el resultado de la función def sum_ascii(x):\n", + "\n", + "def sum_ascii(x):\n", + " sum_char = 0\n", + " for char in x:\n", + " sum_char += ord(char)\n", + " return sum_char\n", + "\n", + "df['Description SHA256'] = df['Description SHA256'].apply(lambda x: sum_ascii(x))\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "id": "adb8b2a6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1000 entries, 0 to 999\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Rank 1000 non-null int64 \n", + " 1 Title 1000 non-null object \n", + " 2 Genre 1000 non-null object \n", + " 3 Description 1000 non-null object \n", + " 4 Director 1000 non-null object \n", + " 5 Actors 1000 non-null object \n", + " 6 Year 1000 non-null int64 \n", + " 7 Runtime (Minutes) 1000 non-null int64 \n", + " 8 Rating 1000 non-null float64\n", + " 9 Votes 1000 non-null int64 \n", + " 10 Revenue (Millions) 872 non-null float64\n", + " 11 Metascore 936 non-null float64\n", + " 12 Description SHA256 1000 non-null int64 \n", + "dtypes: float64(3), int64(5), object(5)\n", + "memory usage: 101.7+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "id": "73af02ae", + "metadata": {}, + "outputs": [], + "source": [ + "# Transformar df['Revenue (Millions)'] a enteros:\n", + "\n", + "df['Revenue (Millions)'] = df['Revenue (Millions)'].fillna(0).astype('int64')" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "id": "47bfd56f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1000 entries, 0 to 999\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Rank 1000 non-null int64 \n", + " 1 Title 1000 non-null object \n", + " 2 Genre 1000 non-null object \n", + " 3 Description 1000 non-null object \n", + " 4 Director 1000 non-null object \n", + " 5 Actors 1000 non-null object \n", + " 6 Year 1000 non-null int64 \n", + " 7 Runtime (Minutes) 1000 non-null int64 \n", + " 8 Rating 1000 non-null float64\n", + " 9 Votes 1000 non-null int64 \n", + " 10 Revenue (Millions) 1000 non-null int64 \n", + " 11 Metascore 936 non-null float64\n", + " 12 Description SHA256 1000 non-null int64 \n", + "dtypes: float64(2), int64(6), object(5)\n", + "memory usage: 101.7+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "id": "62843cea", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascoreDescription SHA256Freaky Movie
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.175707433376.04531True
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.048582012665.04580True
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.315760613862.04367True
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.26054527059.04645True
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.239372732540.04408True
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore Description SHA256 \\\n", + "0 8.1 757074 333 76.0 4531 \n", + "1 7.0 485820 126 65.0 4580 \n", + "2 7.3 157606 138 62.0 4367 \n", + "3 7.2 60545 270 59.0 4645 \n", + "4 6.2 393727 325 40.0 4408 \n", + "\n", + " Freaky Movie \n", + "0 True \n", + "1 True \n", + "2 True \n", + "3 True \n", + "4 True " + ] + }, + "execution_count": 172, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 4. Revisar con Booleanos, si 'Description SHA256' está entre los votos y los revenues (Millones)\n", + "df['Freaky Movie'] = df.apply(lambda row: row['Description SHA256'] >= row['Revenue (Millions)'] and row['Description SHA256'] <= row['Votes'], axis=1)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "id": "8350bf3d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "All titles are freaky movies in the DataFrame.\n" + ] + } + ], "source": [ - "# Your code here" + "if df['Freaky Movie'].any() == False:\n", + " print(\"There is at least a freaky movie in the DataFrame.\")\n", + "else:\n", + " print(\"All titles are freaky movies in the DataFrame.\")" ] } ],