From 95a685b3505088922752ab20349692b4257f2611 Mon Sep 17 00:00:00 2001 From: teresacardenosa Date: Sat, 28 Oct 2023 14:23:11 +0200 Subject: [PATCH 1/4] First commit - Completed challenge 1 and 2 --- notebooks/your_code_here.ipynb | 968 ++++++++++++++++++++++++++++++++- 1 file changed, 964 insertions(+), 4 deletions(-) diff --git a/notebooks/your_code_here.ipynb b/notebooks/your_code_here.ipynb index f60a09e..309a002 100644 --- a/notebooks/your_code_here.ipynb +++ b/notebooks/your_code_here.ipynb @@ -50,18 +50,978 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "d3c3dd76", "metadata": {}, "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "6881a066", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('../data/input/IMDB-Movie-Data.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "dff70543", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)Metascore
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore \n", + "0 8.1 757074 333.13 76.0 \n", + "1 7.0 485820 126.46 65.0 \n", + "2 7.3 157606 138.12 62.0 \n", + "3 7.2 60545 270.32 59.0 \n", + "4 6.2 393727 325.02 40.0 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "37abf7a7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1000 entries, 0 to 999\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Rank 1000 non-null int64 \n", + " 1 Title 1000 non-null object \n", + " 2 Genre 1000 non-null object \n", + " 3 Description 1000 non-null object \n", + " 4 Director 1000 non-null object \n", + " 5 Actors 1000 non-null object \n", + " 6 Year 1000 non-null int64 \n", + " 7 Runtime (Minutes) 1000 non-null int64 \n", + " 8 Rating 1000 non-null float64\n", + " 9 Votes 1000 non-null int64 \n", + " 10 Revenue (Millions) 872 non-null float64\n", + " 11 Metascore 936 non-null float64\n", + "dtypes: float64(3), int64(4), object(5)\n", + "memory usage: 93.9+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "id": "efcadb28", + "metadata": {}, + "source": [ + "# 🐼 Challenge 1. Using a single argument " + ] + }, + { + "cell_type": "markdown", + "id": "4868d208", + "metadata": {}, + "source": [ + "We want to create bins of movies according to the number of votes they've received. For that matter, we will create a new column named 'bin' which will tag every movie as follow:\n", + "\n", + "From 0 to 999 ==> 'cat_1' \n", + "\n", + "From 1000 to 9999 ==> 'cat_2'\n", + "\n", + "From 10000 to 99999 ==> 'cat_3'\n", + "\n", + "From 100000 to 999999 ==> 'cat_4'\n", + "\n", + "More than 1000000 ==> 'cat_5'" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "58e89ec9", + "metadata": {}, + "outputs": [], + "source": [ + "# Creas la categoria indicando que pille como argumento v. Por cada valor de v cumpliendo con las condicionales \n", + "# devuelve la categoría indicada \n", + "\n", + "def categoria(v):\n", + " if v <= 999:\n", + " return \"cat_1\"\n", + " elif (v <= 9999) & (v >= 1000):\n", + " return \"cat_2\"\n", + " elif (v <= 99999) & (v >= 10000):\n", + " return \"cat_3\"\n", + " elif (v <= 999999) & (v >= 100000):\n", + " return \"cat_4\"\n", + " elif v >= 1000000:\n", + " return \"cat_5\"" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "f150e8ba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cat_4\n" + ] + } + ], + "source": [ + "# Prueba \n", + "\n", + "x = categoria(999998)\n", + "print(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c0e3f5ad", + "metadata": {}, + "outputs": [], + "source": [ + "# Aplicas la función marcada, creando una nueva columna llamada \"Category\". A esta le dices que aplique \n", + "# la lamba por row llamando a la función categoría en función de la row de Votes\n", + "df[\"Category\"] = df.apply(lambda row: categoria(row[\"Votes\"]), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "c7c374a3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascoreCategory
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0cat_4
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0cat_4
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0cat_4
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0cat_3
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0cat_4
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore Category \n", + "0 8.1 757074 333.13 76.0 cat_4 \n", + "1 7.0 485820 126.46 65.0 cat_4 \n", + "2 7.3 157606 138.12 62.0 cat_4 \n", + "3 7.2 60545 270.32 59.0 cat_3 \n", + "4 6.2 393727 325.02 40.0 cat_4 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head() # Compruebas" + ] + }, + { + "cell_type": "markdown", + "id": "9e49c357", + "metadata": {}, + "source": [ + "## 🐼 🐼 Challenge 2. Using two arguments" + ] + }, + { + "cell_type": "markdown", + "id": "21aa0dcc", + "metadata": {}, + "source": [ + "We want to know how much is the revenue per minute for every movie." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "335bdece", + "metadata": {}, + "outputs": [], + "source": [ + "# Debe devolver el resultado de revenue / minutos" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "d79baf2e", + "metadata": {}, + "outputs": [], + "source": [ + "def division(min, rev):\n", + " return rev / min" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "c383169c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.7531404958677688\n" + ] + } + ], + "source": [ + "x = division(121, 333.13)\n", + "print(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "df67e893", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Revenue per minute (Millions)\"] = df.apply(lambda row: division(row[\"Runtime (Minutes)\"], row[\"Revenue (Millions)\"]), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "26375cfb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascoreCategoryRevenue per minuteRevenue per minute (Millions)
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0cat_42.7531402.753140
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0cat_41.0198391.019839
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0cat_41.1805131.180513
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0cat_32.5029632.502963
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0cat_42.6424392.642439
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore Category Revenue per minute \\\n", + "0 8.1 757074 333.13 76.0 cat_4 2.753140 \n", + "1 7.0 485820 126.46 65.0 cat_4 1.019839 \n", + "2 7.3 157606 138.12 62.0 cat_4 1.180513 \n", + "3 7.2 60545 270.32 59.0 cat_3 2.502963 \n", + "4 6.2 393727 325.02 40.0 cat_4 2.642439 \n", + "\n", + " Revenue per minute (Millions) \n", + "0 2.753140 \n", + "1 1.019839 \n", + "2 1.180513 \n", + "3 2.502963 \n", + "4 2.642439 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head() # Antes apliqué la lamba a una columna llamada \"Revenue per minute\", al hacerlo de nuevo \n", + " # para que se muestre el nombre de la columna \"Revenue per minute (Millions)\", dejé creada la columna anterior. " + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "6ea514df", + "metadata": {}, + "outputs": [], + "source": [ + "df = df.drop(['Revenue per minute'], axis=1) # Elimino la columna que cree antes por error" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "f22258a6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascoreCategoryRevenue per minute (Millions)
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0cat_42.753140
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0cat_41.019839
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0cat_41.180513
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0cat_32.502963
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0cat_42.642439
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore Category \\\n", + "0 8.1 757074 333.13 76.0 cat_4 \n", + "1 7.0 485820 126.46 65.0 cat_4 \n", + "2 7.3 157606 138.12 62.0 cat_4 \n", + "3 7.2 60545 270.32 59.0 cat_3 \n", + "4 6.2 393727 325.02 40.0 cat_4 \n", + "\n", + " Revenue per minute (Millions) \n", + "0 2.753140 \n", + "1 1.019839 \n", + "2 1.180513 \n", + "3 2.502963 \n", + "4 2.642439 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "a97b6ad4", + "metadata": {}, + "source": [ + "## 🐼 🐼 🐼 Challenge 3. A bit more complicated" + ] + }, + { + "cell_type": "markdown", + "id": "b0f804a6", + "metadata": {}, + "source": [ + "We want to create a new rating where we add 1 point if the genre is thriller but subtract 1 point if the genre is comedy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ce647d8f", + "metadata": {}, + "outputs": [], + "source": [ + "# Nueva columna que se llame 'New rating'\n", + "# Recorrer cada row de \"Genre\" y si aparece \"Thriller\" entonces es +1\n", + "# Si en el recorrido aparece \"Comedy\" entonces es -1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65b39cf5", + "metadata": {}, + "outputs": [], + "source": [ + "def ranqueo(texto):\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "83a890aa", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f73f93f5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7a6dcb9", + "metadata": {}, + "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:.conda-m1_env]", + "display_name": "Python (m1_env)", "language": "python", - "name": "conda-env-.conda-m1_env-py" + "name": "m1_env" }, "language_info": { "codemirror_mode": { @@ -73,7 +1033,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.10.13" } }, "nbformat": 4, From 572f93274afa5c2e683733e19f6ffdb8c00d73a3 Mon Sep 17 00:00:00 2001 From: teresacardenosa Date: Sat, 28 Oct 2023 23:49:52 +0200 Subject: [PATCH 2/4] Finished challenge 3, 4 y 5. Pdt bonus challenge --- notebooks/your_code_here.ipynb | 789 ++++++++++++++++++++++++++++++--- 1 file changed, 722 insertions(+), 67 deletions(-) diff --git a/notebooks/your_code_here.ipynb b/notebooks/your_code_here.ipynb index 309a002..d1feef0 100644 --- a/notebooks/your_code_here.ipynb +++ b/notebooks/your_code_here.ipynb @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "d3c3dd76", "metadata": {}, "outputs": [], @@ -61,7 +61,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "6881a066", + "id": "ad4a4de9", "metadata": {}, "outputs": [], "source": [ @@ -71,7 +71,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "dff70543", + "id": "40424996", "metadata": {}, "outputs": [ { @@ -231,7 +231,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "37abf7a7", + "id": "2f40188d", "metadata": {}, "outputs": [ { @@ -266,7 +266,7 @@ }, { "cell_type": "markdown", - "id": "efcadb28", + "id": "9d9fd88a", "metadata": {}, "source": [ "# 🐼 Challenge 1. Using a single argument " @@ -274,7 +274,7 @@ }, { "cell_type": "markdown", - "id": "4868d208", + "id": "b67d8d9e", "metadata": {}, "source": [ "We want to create bins of movies according to the number of votes they've received. For that matter, we will create a new column named 'bin' which will tag every movie as follow:\n", @@ -292,8 +292,8 @@ }, { "cell_type": "code", - "execution_count": 10, - "id": "58e89ec9", + "execution_count": 6, + "id": "9179f405", "metadata": {}, "outputs": [], "source": [ @@ -315,8 +315,8 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "f150e8ba", + "execution_count": 7, + "id": "04bcd2b6", "metadata": {}, "outputs": [ { @@ -336,8 +336,8 @@ }, { "cell_type": "code", - "execution_count": 14, - "id": "c0e3f5ad", + "execution_count": 8, + "id": "9d304b55", "metadata": {}, "outputs": [], "source": [ @@ -348,8 +348,8 @@ }, { "cell_type": "code", - "execution_count": 15, - "id": "c7c374a3", + "execution_count": 9, + "id": "221798ee", "metadata": {}, "outputs": [ { @@ -503,7 +503,7 @@ "4 6.2 393727 325.02 40.0 cat_4 " ] }, - "execution_count": 15, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -514,7 +514,7 @@ }, { "cell_type": "markdown", - "id": "9e49c357", + "id": "50e71ca8", "metadata": {}, "source": [ "## 🐼 🐼 Challenge 2. Using two arguments" @@ -522,7 +522,7 @@ }, { "cell_type": "markdown", - "id": "21aa0dcc", + "id": "afc85557", "metadata": {}, "source": [ "We want to know how much is the revenue per minute for every movie." @@ -530,8 +530,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "335bdece", + "execution_count": 10, + "id": "9cd78a45", "metadata": {}, "outputs": [], "source": [ @@ -540,8 +540,8 @@ }, { "cell_type": "code", - "execution_count": 16, - "id": "d79baf2e", + "execution_count": 11, + "id": "ecd6d1c5", "metadata": {}, "outputs": [], "source": [ @@ -551,8 +551,8 @@ }, { "cell_type": "code", - "execution_count": 18, - "id": "c383169c", + "execution_count": 12, + "id": "5478c7f4", "metadata": {}, "outputs": [ { @@ -570,8 +570,8 @@ }, { "cell_type": "code", - "execution_count": 21, - "id": "df67e893", + "execution_count": 13, + "id": "995fae18", "metadata": {}, "outputs": [], "source": [ @@ -580,8 +580,8 @@ }, { "cell_type": "code", - "execution_count": 22, - "id": "26375cfb", + "execution_count": 14, + "id": "fff7b892", "metadata": {}, "outputs": [ { @@ -618,7 +618,6 @@ " Revenue (Millions)\n", " Metascore\n", " Category\n", - " Revenue per minute\n", " Revenue per minute (Millions)\n", " \n", " \n", @@ -639,7 +638,6 @@ " 76.0\n", " cat_4\n", " 2.753140\n", - " 2.753140\n", " \n", " \n", " 1\n", @@ -657,7 +655,6 @@ " 65.0\n", " cat_4\n", " 1.019839\n", - " 1.019839\n", " \n", " \n", " 2\n", @@ -675,7 +672,6 @@ " 62.0\n", " cat_4\n", " 1.180513\n", - " 1.180513\n", " \n", " \n", " 3\n", @@ -693,7 +689,6 @@ " 59.0\n", " cat_3\n", " 2.502963\n", - " 2.502963\n", " \n", " \n", " 4\n", @@ -711,7 +706,6 @@ " 40.0\n", " cat_4\n", " 2.642439\n", - " 2.642439\n", " \n", " \n", "\n", @@ -739,12 +733,12 @@ "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", "\n", - " Rating Votes Revenue (Millions) Metascore Category Revenue per minute \\\n", - "0 8.1 757074 333.13 76.0 cat_4 2.753140 \n", - "1 7.0 485820 126.46 65.0 cat_4 1.019839 \n", - "2 7.3 157606 138.12 62.0 cat_4 1.180513 \n", - "3 7.2 60545 270.32 59.0 cat_3 2.502963 \n", - "4 6.2 393727 325.02 40.0 cat_4 2.642439 \n", + " Rating Votes Revenue (Millions) Metascore Category \\\n", + "0 8.1 757074 333.13 76.0 cat_4 \n", + "1 7.0 485820 126.46 65.0 cat_4 \n", + "2 7.3 157606 138.12 62.0 cat_4 \n", + "3 7.2 60545 270.32 59.0 cat_3 \n", + "4 6.2 393727 325.02 40.0 cat_4 \n", "\n", " Revenue per minute (Millions) \n", "0 2.753140 \n", @@ -754,7 +748,7 @@ "4 2.642439 " ] }, - "execution_count": 22, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -766,19 +760,120 @@ }, { "cell_type": "code", - "execution_count": 23, - "id": "6ea514df", + "execution_count": 15, + "id": "ffb84bb0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "KeyError", + "evalue": "\"['Revenue per minute'] not found in axis\"", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[15], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mRevenue per minute\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32m~\\miniconda3\\envs\\m1_env\\lib\\site-packages\\pandas\\core\\frame.py:5258\u001b[0m, in \u001b[0;36mDataFrame.drop\u001b[1;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[0;32m 5110\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdrop\u001b[39m(\n\u001b[0;32m 5111\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 5112\u001b[0m labels: IndexLabel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 5119\u001b[0m errors: IgnoreRaise \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 5120\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 5121\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 5122\u001b[0m \u001b[38;5;124;03m Drop specified labels from rows or columns.\u001b[39;00m\n\u001b[0;32m 5123\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 5256\u001b[0m \u001b[38;5;124;03m weight 1.0 0.8\u001b[39;00m\n\u001b[0;32m 5257\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 5258\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 5259\u001b[0m \u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 5260\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 5261\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 5262\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 5263\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 5264\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 5265\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 5266\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32m~\\miniconda3\\envs\\m1_env\\lib\\site-packages\\pandas\\core\\generic.py:4549\u001b[0m, in \u001b[0;36mNDFrame.drop\u001b[1;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[0;32m 4547\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m axis, labels \u001b[38;5;129;01min\u001b[39;00m axes\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m 4548\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m labels \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m-> 4549\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_drop_axis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 4551\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inplace:\n\u001b[0;32m 4552\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_inplace(obj)\n", + "File \u001b[1;32m~\\miniconda3\\envs\\m1_env\\lib\\site-packages\\pandas\\core\\generic.py:4591\u001b[0m, in \u001b[0;36mNDFrame._drop_axis\u001b[1;34m(self, labels, axis, level, errors, only_slice)\u001b[0m\n\u001b[0;32m 4589\u001b[0m new_axis \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mdrop(labels, level\u001b[38;5;241m=\u001b[39mlevel, errors\u001b[38;5;241m=\u001b[39merrors)\n\u001b[0;32m 4590\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 4591\u001b[0m new_axis \u001b[38;5;241m=\u001b[39m \u001b[43maxis\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 4592\u001b[0m indexer \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mget_indexer(new_axis)\n\u001b[0;32m 4594\u001b[0m \u001b[38;5;66;03m# Case for non-unique axis\u001b[39;00m\n\u001b[0;32m 4595\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[1;32m~\\miniconda3\\envs\\m1_env\\lib\\site-packages\\pandas\\core\\indexes\\base.py:6699\u001b[0m, in \u001b[0;36mIndex.drop\u001b[1;34m(self, labels, errors)\u001b[0m\n\u001b[0;32m 6697\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask\u001b[38;5;241m.\u001b[39many():\n\u001b[0;32m 6698\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m-> 6699\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlist\u001b[39m(labels[mask])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not found in axis\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 6700\u001b[0m indexer \u001b[38;5;241m=\u001b[39m indexer[\u001b[38;5;241m~\u001b[39mmask]\n\u001b[0;32m 6701\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdelete(indexer)\n", + "\u001b[1;31mKeyError\u001b[0m: \"['Revenue per minute'] not found in axis\"" + ] + } + ], "source": [ "df = df.drop(['Revenue per minute'], axis=1) # Elimino la columna que cree antes por error" ] }, { "cell_type": "code", - "execution_count": 24, - "id": "f22258a6", + "execution_count": null, + "id": "9db273dc", + "metadata": {}, + "outputs": [], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "34ed0add", + "metadata": {}, + "source": [ + "## 🐼 🐼 🐼 Challenge 3. A bit more complicated" + ] + }, + { + "cell_type": "markdown", + "id": "08aaf74e", + "metadata": {}, + "source": [ + "We want to create a new rating where we add 1 point if the genre is thriller but subtract 1 point if the genre is comedy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e83914c2", + "metadata": {}, + "outputs": [], + "source": [ + "# Nueva columna que se llame 'New rating'\n", + "# Recorrer cada row de \"Genre\" y si aparece \"Thriller\" entonces es +1\n", + "# Si en el recorrido aparece \"Comedy\" entonces es -1" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "b7de0ab5", + "metadata": {}, + "outputs": [], + "source": [ + "def ranqueo(texto, value):\n", + " if \"Thriller\" in texto:\n", + " value += 1\n", + " elif \"Comedy\" in texto:\n", + " value -= 1\n", + " return value" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "e938c756", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-1\n" + ] + } + ], + "source": [ + "x = \"Animation, Comedy, Family\"\n", + "y = ranqueo(x, 0)\n", + "print(y)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "2496e301", "metadata": {}, + "outputs": [], + "source": [ + "df[\"New Rating\"] = df.apply(lambda row: ranqueo(row[\"Genre\"], row[\"Rank\"]), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "86dc9e2b", + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { @@ -815,6 +910,7 @@ " Metascore\n", " Category\n", " Revenue per minute (Millions)\n", + " New Rating\n", " \n", " \n", " \n", @@ -834,6 +930,7 @@ " 76.0\n", " cat_4\n", " 2.753140\n", + " 1\n", " \n", " \n", " 1\n", @@ -851,6 +948,7 @@ " 65.0\n", " cat_4\n", " 1.019839\n", + " 2\n", " \n", " \n", " 2\n", @@ -868,6 +966,7 @@ " 62.0\n", " cat_4\n", " 1.180513\n", + " 4\n", " \n", " \n", " 3\n", @@ -885,6 +984,7 @@ " 59.0\n", " cat_3\n", " 2.502963\n", + " 3\n", " \n", " \n", " 4\n", @@ -902,6 +1002,7 @@ " 40.0\n", " cat_4\n", " 2.642439\n", + " 5\n", " \n", " \n", "\n", @@ -936,15 +1037,15 @@ "3 7.2 60545 270.32 59.0 cat_3 \n", "4 6.2 393727 325.02 40.0 cat_4 \n", "\n", - " Revenue per minute (Millions) \n", - "0 2.753140 \n", - "1 1.019839 \n", - "2 1.180513 \n", - "3 2.502963 \n", - "4 2.642439 " + " Revenue per minute (Millions) New Rating \n", + "0 2.753140 1 \n", + "1 1.019839 2 \n", + "2 1.180513 4 \n", + "3 2.502963 3 \n", + "4 2.642439 5 " ] }, - "execution_count": 24, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -955,47 +1056,601 @@ }, { "cell_type": "markdown", - "id": "a97b6ad4", + "id": "e3648f78", "metadata": {}, "source": [ - "## 🐼 🐼 🐼 Challenge 3. A bit more complicated" + "## 🐼 🐼 🐼 🐼 Challenge 4. A bit too weird..." ] }, { "cell_type": "markdown", - "id": "b0f804a6", + "id": "48f98c88", "metadata": {}, "source": [ - "We want to create a new rating where we add 1 point if the genre is thriller but subtract 1 point if the genre is comedy." + "We want to know whether the integer part of the number resulting from the sum of the ASCII value of every character of the movie title divided by the number of votes, is a prime number (remember that prime numbers are integers)." ] }, { "cell_type": "code", "execution_count": null, - "id": "ce647d8f", + "id": "b6799b89", "metadata": {}, "outputs": [], "source": [ - "# Nueva columna que se llame 'New rating'\n", - "# Recorrer cada row de \"Genre\" y si aparece \"Thriller\" entonces es +1\n", - "# Si en el recorrido aparece \"Comedy\" entonces es -1" + "# La suma del valor ASCII de cada caracter del título es un entero\n", + "# Comprobar si la suma dividida por el número de votos, obtiene un resultado número primo " ] }, { "cell_type": "code", - "execution_count": null, - "id": "65b39cf5", + "execution_count": 43, + "id": "99ea5f99", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2170\n" + ] + } + ], + "source": [ + "# Prueba - Obtener la suma total del valor ASCII \n", + "\n", + "string = \"Guardians of the Galaxy\" \n", + "ascii_value = sum(ord(ch) for ch in string) # Suma el valor ascii del caracter por cada caracter en string\n", + "print(ascii_value)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "42b10366", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.002866298406760766\n" + ] + } + ], + "source": [ + "votes = 757074\n", + "diff = ascii_value / votes # == número primo\n", + "print(diff)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "c677d263", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n" + ] + } + ], + "source": [ + "# Función para comprobar si un número es primo\n", + "def esprimo(n):\n", + " # Divisible entre él mismo y 1 únicamente\n", + " if n<= 1:\n", + " return False\n", + " elif n == 2:\n", + " return True\n", + " else: \n", + " for i in range(2, n): \n", + " if n % i == 0:\n", + " return False\n", + " return True \n", + " \n", + "print(esprimo(diff)) # Prueba con el resultado anterior, que el primero sería False" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "e7b1872f", + "metadata": {}, + "outputs": [], + "source": [ + "# Función para comprobar si el resultado de la división entre la suma ascii y el voto es primo\n", + "def ascii_primo(texto, voto): \n", + " ascii_value = sum(ord(ch) for ch in texto) # Obtener el ascii del texto que le dé\n", + " diff = ascii_value / voto # Obtener la diferencia entre los dos \n", + " return esprimo(int(diff)) # A la hora de devolver llamo a la función anterior así me devuelve True o False\n" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "9e470770", "metadata": {}, "outputs": [], "source": [ - "def ranqueo(texto):\n", - " " + "df[\"Prime number?\"] = df.apply(lambda row: ascii_primo(row[\"Title\"], row[\"Votes\"]), axis=1)\n", + "# Aplico al df creando una nueva columna Prime number? \n", + "# La lambda ejecuta la función ascii_primo que, a su vez, lleva dentro otra función aunque no se vea. " + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "a7f17207", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascoreCategoryRevenue per minute (Millions)New RatingPrime number?
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0cat_42.7531401False
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0cat_41.0198392False
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0cat_41.1805134False
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0cat_32.5029633False
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0cat_42.6424395False
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore Category \\\n", + "0 8.1 757074 333.13 76.0 cat_4 \n", + "1 7.0 485820 126.46 65.0 cat_4 \n", + "2 7.3 157606 138.12 62.0 cat_4 \n", + "3 7.2 60545 270.32 59.0 cat_3 \n", + "4 6.2 393727 325.02 40.0 cat_4 \n", + "\n", + " Revenue per minute (Millions) New Rating Prime number? \n", + "0 2.753140 1 False \n", + "1 1.019839 2 False \n", + "2 1.180513 4 False \n", + "3 2.502963 3 False \n", + "4 2.642439 5 False " + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "807025c1", + "metadata": {}, + "source": [ + "## 🐼 🐼 🐼 🐼 🐼 Challenge 5. And finally some fantasy" + ] + }, + { + "cell_type": "markdown", + "id": "7470382b", + "metadata": {}, + "source": [ + "Feel free to propose your own ranking based in aggregations of at least 3 columns of the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "e96f4a4d", + "metadata": {}, + "outputs": [], + "source": [ + "# ¿Qué generos son los que más votos y más revenue producen? \n", + "# Agrupo el df por la columna Genre y le indico que quiero la col Votes y Revenue(Millions)\n", + "# También, para los que sean del mismo género, le aplico la .sum y lo ordeno descendente por Revenue\n", + "df1 = df.groupby(['Genre'])[['Votes', 'Revenue (Millions)']].sum().sort_values(by='Revenue (Millions)', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "180b4102", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
VotesRevenue (Millions)
Genre
Action,Adventure,Sci-Fi1858207610461.51
Animation,Adventure,Comedy59130655754.75
Action,Adventure,Fantasy78168515248.29
Adventure,Family,Fantasy26406492201.47
Comedy36855291941.81
\n", + "
" + ], + "text/plain": [ + " Votes Revenue (Millions)\n", + "Genre \n", + "Action,Adventure,Sci-Fi 18582076 10461.51\n", + "Animation,Adventure,Comedy 5913065 5754.75\n", + "Action,Adventure,Fantasy 7816851 5248.29\n", + "Adventure,Family,Fantasy 2640649 2201.47\n", + "Comedy 3685529 1941.81" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "3e3797ab", + "metadata": {}, + "outputs": [], + "source": [ + "# ¿Qué media de votes y revenue genera cada director? \n", + "# Aquí agrupo el df por Director y mantengo Votes y Revenue (Millions)\n", + "df2 = df.groupby(['Director'])[['Votes', 'Revenue (Millions)']].mean().sort_values(by='Revenue (Millions)', ascending=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "88fa8c96", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
VotesRevenue (Millions)
Director
James Cameron935408.0760.510
Colin Trevorrow455169.0652.180
Joss Whedon781241.5541.135
Lee Unkrich586669.0414.980
Gary Ross382749.5408.000
\n", + "
" + ], + "text/plain": [ + " Votes Revenue (Millions)\n", + "Director \n", + "James Cameron 935408.0 760.510\n", + "Colin Trevorrow 455169.0 652.180\n", + "Joss Whedon 781241.5 541.135\n", + "Lee Unkrich 586669.0 414.980\n", + "Gary Ross 382749.5 408.000" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "4430c6e4", + "metadata": {}, + "outputs": [], + "source": [ + "# Spoiler no utilizo tres columnas pero quería trastear con .idxmax\n", + "# ¿Qué director genera mayor revenue?\n", + "df3 = df.groupby(['Director'])[['Revenue (Millions)']].sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "4a43b094", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "El director que genera mayor revenue es: J.J. Abrams\n" + ] + } + ], + "source": [ + "print(\"El director que genera mayor revenue es: \",df3['Revenue (Millions)'].idxmax())\n", + "# Como el df3 ya está agrupado por director, aquí llamo a la col Revenue con idx max" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "a199b7bb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "El director que genera menor revenue es: Adam Leon\n" + ] + } + ], + "source": [ + "print(\"El director que genera menor revenue es: \",df3['Revenue (Millions)'].idxmin())" + ] + }, + { + "cell_type": "markdown", + "id": "9b22be78", + "metadata": {}, + "source": [ + "## 🐼 🐼 🐼 🐼 🐼 🐼 Bonus challenge. Freaky bonus" + ] + }, + { + "cell_type": "markdown", + "id": "3d7de95a", + "metadata": {}, + "source": [ + "We want to know which movies might have hidden paterns in their description. A way to know that is finding those movies which the sum of all numeric values of the string description hash (SHA256) are between their revenue and their number of votes." ] }, { "cell_type": "code", "execution_count": null, - "id": "83a890aa", + "id": "256473b2", "metadata": {}, "outputs": [], "source": [] @@ -1003,7 +1658,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f73f93f5", + "id": "23ea4a3a", "metadata": {}, "outputs": [], "source": [] @@ -1011,7 +1666,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b7a6dcb9", + "id": "58c8db32", "metadata": {}, "outputs": [], "source": [] From d953cf7073bfb69a2703758763ecb6104dad401e Mon Sep 17 00:00:00 2001 From: teresacardenosa Date: Mon, 30 Oct 2023 18:33:42 +0100 Subject: [PATCH 3/4] Nuevo commit antes de clase --- notebooks/your_code_here.ipynb | 308 ++++++++++----------------------- 1 file changed, 89 insertions(+), 219 deletions(-) diff --git a/notebooks/your_code_here.ipynb b/notebooks/your_code_here.ipynb index d1feef0..ad53416 100644 --- a/notebooks/your_code_here.ipynb +++ b/notebooks/your_code_here.ipynb @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "d3c3dd76", "metadata": {}, "outputs": [], @@ -60,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "ad4a4de9", "metadata": {}, "outputs": [], @@ -70,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "40424996", "metadata": {}, "outputs": [ @@ -219,7 +219,7 @@ "4 6.2 393727 325.02 40.0 " ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -230,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "2f40188d", "metadata": {}, "outputs": [ @@ -292,7 +292,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "9179f405", "metadata": {}, "outputs": [], @@ -315,7 +315,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "04bcd2b6", "metadata": {}, "outputs": [ @@ -336,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "9d304b55", "metadata": {}, "outputs": [], @@ -348,7 +348,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "221798ee", "metadata": {}, "outputs": [ @@ -503,7 +503,7 @@ "4 6.2 393727 325.02 40.0 cat_4 " ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -570,189 +570,37 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 9, "id": "995fae18", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'division' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[9], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRevenue per minute (Millions)\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrow\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mdivision\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrow\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mRuntime (Minutes)\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrow\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mRevenue (Millions)\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32m~\\miniconda3\\envs\\m1_env\\lib\\site-packages\\pandas\\core\\frame.py:9423\u001b[0m, in \u001b[0;36mDataFrame.apply\u001b[1;34m(self, func, axis, raw, result_type, args, **kwargs)\u001b[0m\n\u001b[0;32m 9412\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapply\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m frame_apply\n\u001b[0;32m 9414\u001b[0m op \u001b[38;5;241m=\u001b[39m frame_apply(\n\u001b[0;32m 9415\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m 9416\u001b[0m func\u001b[38;5;241m=\u001b[39mfunc,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 9421\u001b[0m kwargs\u001b[38;5;241m=\u001b[39mkwargs,\n\u001b[0;32m 9422\u001b[0m )\n\u001b[1;32m-> 9423\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39m__finalize__(\u001b[38;5;28mself\u001b[39m, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapply\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[1;32m~\\miniconda3\\envs\\m1_env\\lib\\site-packages\\pandas\\core\\apply.py:678\u001b[0m, in \u001b[0;36mFrameApply.apply\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 675\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mraw:\n\u001b[0;32m 676\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapply_raw()\n\u001b[1;32m--> 678\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_standard\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32m~\\miniconda3\\envs\\m1_env\\lib\\site-packages\\pandas\\core\\apply.py:798\u001b[0m, in \u001b[0;36mFrameApply.apply_standard\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 797\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply_standard\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m--> 798\u001b[0m results, res_index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply_series_generator\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 800\u001b[0m \u001b[38;5;66;03m# wrap results\u001b[39;00m\n\u001b[0;32m 801\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mwrap_results(results, res_index)\n", + "File \u001b[1;32m~\\miniconda3\\envs\\m1_env\\lib\\site-packages\\pandas\\core\\apply.py:814\u001b[0m, in \u001b[0;36mFrameApply.apply_series_generator\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 811\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m option_context(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmode.chained_assignment\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 812\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(series_gen):\n\u001b[0;32m 813\u001b[0m \u001b[38;5;66;03m# ignore SettingWithCopy here in case the user mutates\u001b[39;00m\n\u001b[1;32m--> 814\u001b[0m results[i] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 815\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(results[i], ABCSeries):\n\u001b[0;32m 816\u001b[0m \u001b[38;5;66;03m# If we have a view on v, we need to make a copy because\u001b[39;00m\n\u001b[0;32m 817\u001b[0m \u001b[38;5;66;03m# series_generator will swap out the underlying data\u001b[39;00m\n\u001b[0;32m 818\u001b[0m results[i] \u001b[38;5;241m=\u001b[39m results[i]\u001b[38;5;241m.\u001b[39mcopy(deep\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "Cell \u001b[1;32mIn[9], line 1\u001b[0m, in \u001b[0;36m\u001b[1;34m(row)\u001b[0m\n\u001b[1;32m----> 1\u001b[0m df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRevenue per minute (Millions)\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m df\u001b[38;5;241m.\u001b[39mapply(\u001b[38;5;28;01mlambda\u001b[39;00m row: \u001b[43mdivision\u001b[49m(row[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRuntime (Minutes)\u001b[39m\u001b[38;5;124m\"\u001b[39m], row[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRevenue (Millions)\u001b[39m\u001b[38;5;124m\"\u001b[39m]), axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n", + "\u001b[1;31mNameError\u001b[0m: name 'division' is not defined" + ] + } + ], "source": [ "df[\"Revenue per minute (Millions)\"] = df.apply(lambda row: division(row[\"Runtime (Minutes)\"], row[\"Revenue (Millions)\"]), axis=1)" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "fff7b892", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascoreCategoryRevenue per minute (Millions)
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0cat_42.753140
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0cat_41.019839
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0cat_41.180513
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0cat_32.502963
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0cat_42.642439
\n", - "
" - ], - "text/plain": [ - " Rank Title Genre \\\n", - "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", - "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", - "2 3 Split Horror,Thriller \n", - "3 4 Sing Animation,Comedy,Family \n", - "4 5 Suicide Squad Action,Adventure,Fantasy \n", - "\n", - " Description Director \\\n", - "0 A group of intergalactic criminals are forced ... James Gunn \n", - "1 Following clues to the origin of mankind, a te... Ridley Scott \n", - "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", - "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", - "4 A secret government agency recruits some of th... David Ayer \n", - "\n", - " Actors Year Runtime (Minutes) \\\n", - "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", - "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", - "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", - "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", - "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", - "\n", - " Rating Votes Revenue (Millions) Metascore Category \\\n", - "0 8.1 757074 333.13 76.0 cat_4 \n", - "1 7.0 485820 126.46 65.0 cat_4 \n", - "2 7.3 157606 138.12 62.0 cat_4 \n", - "3 7.2 60545 270.32 59.0 cat_3 \n", - "4 6.2 393727 325.02 40.0 cat_4 \n", - "\n", - " Revenue per minute (Millions) \n", - "0 2.753140 \n", - "1 1.019839 \n", - "2 1.180513 \n", - "3 2.502963 \n", - "4 2.642439 " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.head() # Antes apliqué la lamba a una columna llamada \"Revenue per minute\", al hacerlo de nuevo \n", " # para que se muestre el nombre de la columna \"Revenue per minute (Millions)\", dejé creada la columna anterior. " @@ -1083,7 +931,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 10, "id": "99ea5f99", "metadata": {}, "outputs": [ @@ -1105,7 +953,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 11, "id": "42b10366", "metadata": {}, "outputs": [ @@ -1125,7 +973,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 12, "id": "c677d263", "metadata": {}, "outputs": [ @@ -1156,7 +1004,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 13, "id": "e7b1872f", "metadata": {}, "outputs": [], @@ -1170,7 +1018,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 14, "id": "9e470770", "metadata": {}, "outputs": [], @@ -1182,7 +1030,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 16, "id": "a7f17207", "metadata": {}, "outputs": [ @@ -1220,8 +1068,6 @@ " Revenue (Millions)\n", " Metascore\n", " Category\n", - " Revenue per minute (Millions)\n", - " New Rating\n", " Prime number?\n", " \n", " \n", @@ -1241,8 +1087,6 @@ " 333.13\n", " 76.0\n", " cat_4\n", - " 2.753140\n", - " 1\n", " False\n", " \n", " \n", @@ -1260,8 +1104,6 @@ " 126.46\n", " 65.0\n", " cat_4\n", - " 1.019839\n", - " 2\n", " False\n", " \n", " \n", @@ -1279,8 +1121,6 @@ " 138.12\n", " 62.0\n", " cat_4\n", - " 1.180513\n", - " 4\n", " False\n", " \n", " \n", @@ -1298,8 +1138,6 @@ " 270.32\n", " 59.0\n", " cat_3\n", - " 2.502963\n", - " 3\n", " False\n", " \n", " \n", @@ -1317,8 +1155,6 @@ " 325.02\n", " 40.0\n", " cat_4\n", - " 2.642439\n", - " 5\n", " False\n", " \n", " \n", @@ -1347,22 +1183,15 @@ "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", "\n", - " Rating Votes Revenue (Millions) Metascore Category \\\n", - "0 8.1 757074 333.13 76.0 cat_4 \n", - "1 7.0 485820 126.46 65.0 cat_4 \n", - "2 7.3 157606 138.12 62.0 cat_4 \n", - "3 7.2 60545 270.32 59.0 cat_3 \n", - "4 6.2 393727 325.02 40.0 cat_4 \n", - "\n", - " Revenue per minute (Millions) New Rating Prime number? \n", - "0 2.753140 1 False \n", - "1 1.019839 2 False \n", - "2 1.180513 4 False \n", - "3 2.502963 3 False \n", - "4 2.642439 5 False " + " Rating Votes Revenue (Millions) Metascore Category Prime number? \n", + "0 8.1 757074 333.13 76.0 cat_4 False \n", + "1 7.0 485820 126.46 65.0 cat_4 False \n", + "2 7.3 157606 138.12 62.0 cat_4 False \n", + "3 7.2 60545 270.32 59.0 cat_3 False \n", + "4 6.2 393727 325.02 40.0 cat_4 False " ] }, - "execution_count": 57, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1371,6 +1200,27 @@ "df.head()" ] }, + { + "cell_type": "code", + "execution_count": 21, + "id": "afe7bb94", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prime number?\n", + "False 981\n", + "True 19\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "print(df[\"Prime number?\"].value_counts())" + ] + }, { "cell_type": "markdown", "id": "807025c1", @@ -1653,15 +1503,35 @@ "id": "256473b2", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# 1. Encontrar una función que te traduzca la descripción a hash SHA256 (está en internet)\n", + "# 2. Hacer bucle for por el SHA256 y si el valor es un número te lo vaya sumando. \n", + "# 3. Haces un if para comprobar si ese número está entre el revenue y el número de votos. Devuelve True/False. " + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "23ea4a3a", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'hashlib' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[22], line 2\u001b[0m\n\u001b[0;32m 1\u001b[0m string \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mA group of intergalactic criminals are forced\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m----> 2\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mhashlib\u001b[49m\u001b[38;5;241m.\u001b[39msha256(\u001b[38;5;28mstr\u001b[39m\u001b[38;5;241m.\u001b[39mencode())\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(result\u001b[38;5;241m.\u001b[39mhexdigest())\n", + "\u001b[1;31mNameError\u001b[0m: name 'hashlib' is not defined" + ] + } + ], + "source": [ + "string = \"A group of intergalactic criminals are forced\"\n", + "result = hashlib.sha256(str.encode())\n", + "print(result.hexdigest())" + ] }, { "cell_type": "code", From 3ad3b0796af93d7902a6f2d5d342a03243c92a8d Mon Sep 17 00:00:00 2001 From: teresacardenosa Date: Tue, 31 Oct 2023 14:43:58 +0100 Subject: [PATCH 4/4] Workshop finished! --- notebooks/your_code_here.ipynb | 286 ++++++++++++++++++++++++++++++++- 1 file changed, 285 insertions(+), 1 deletion(-) diff --git a/notebooks/your_code_here.ipynb b/notebooks/your_code_here.ipynb index ad53416..82355df 100644 --- a/notebooks/your_code_here.ipynb +++ b/notebooks/your_code_here.ipynb @@ -1535,10 +1535,294 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "58c8db32", "metadata": {}, "outputs": [], + "source": [ + "import hashlib" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "b1fc57c8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a115f3845b4c7ea8756062a745904cc1eb0bfdfec06550d1940b0dd64b50811d\n" + ] + } + ], + "source": [ + "text = 'A group of intergalactic criminals are forced'\n", + "m = hashlib.sha256(text.encode('UTF-8')) # Codificas el string que quieres en hash con el equivalente encode\n", + " # Sobre esa string codificada le pasas la función hashlib.sha256\n", + "print(m.hexdigest()) # Print valor hexdigest\n", + "v = m.hexdigest()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "486ac68a", + "metadata": {}, + "outputs": [], + "source": [ + "def sum_values(num): # Creo una función para sumar los valores sha256\n", + " sum = 0\n", + " for ch in v: \n", + " if ch.isdecimal(): # Tienes que indicar caracter que sean dígitos enteros para sumarlos como tal\n", + " sum+= int(ch)\n", + " return sum" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "97938749", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "157\n" + ] + } + ], + "source": [ + "j = sum_values(v) # Funciona\n", + "print(j)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "b44f462e", + "metadata": {}, + "outputs": [], + "source": [ + "def comprob(num, col1, col2):\n", + " if (sum_values(num) >= col1) & (sum_values(num) <= col2):\n", + " return True\n", + " else:\n", + " return False\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "1134cd6e", + "metadata": {}, + "outputs": [], + "source": [ + "prueba = comprob(j, int(10), 50)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "fe5ed3cf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n" + ] + } + ], + "source": [ + "print(prueba)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "98b192d2", + "metadata": {}, + "outputs": [], + "source": [ + "df[\"Hidden pattern?\"] = df.apply(lambda row: comprob(sum_values(row['Description']), row[\"Revenue (Millions)\"], row[\"Votes\"]), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "5f58684d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascoreHidden pattern?
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0False
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0True
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0True
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0False
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0False
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore Hidden pattern? \n", + "0 8.1 757074 333.13 76.0 False \n", + "1 7.0 485820 126.46 65.0 True \n", + "2 7.3 157606 138.12 62.0 True \n", + "3 7.2 60545 270.32 59.0 False \n", + "4 6.2 393727 325.02 40.0 False " + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c20f51b", + "metadata": {}, + "outputs": [], "source": [] } ],