From 56b1aad3bafc76c0ceaef410a21b7d20986370e9 Mon Sep 17 00:00:00 2001 From: CarlosSanchezVicente Date: Sat, 28 Oct 2023 14:23:08 +0200 Subject: [PATCH 1/4] Challenge_1y2 --- notebooks/your_code_here.ipynb | 837 +++++++++++++++++++++++++++++++-- 1 file changed, 810 insertions(+), 27 deletions(-) diff --git a/notebooks/your_code_here.ipynb b/notebooks/your_code_here.ipynb index f60a09e..d20b1b0 100644 --- a/notebooks/your_code_here.ipynb +++ b/notebooks/your_code_here.ipynb @@ -10,58 +10,841 @@ "Always remember the Zen of Python!!!" ] }, + { + "cell_type": "markdown", + "id": "b8bbeb3d", + "metadata": {}, + "source": [ + "### Challenge 1" + ] + }, { "cell_type": "code", "execution_count": 1, "id": "ace6da70", "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d3c3dd76", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"../data/input/IMDB-Movie-Data.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "501ef8f5", + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The Zen of Python, by Tim Peters\n", - "\n", - "Beautiful is better than ugly.\n", - "Explicit is better than implicit.\n", - "Simple is better than complex.\n", - "Complex is better than complicated.\n", - "Flat is better than nested.\n", - "Sparse is better than dense.\n", - "Readability counts.\n", - "Special cases aren't special enough to break the rules.\n", - "Although practicality beats purity.\n", - "Errors should never pass silently.\n", - "Unless explicitly silenced.\n", - "In the face of ambiguity, refuse the temptation to guess.\n", - "There should be one-- and preferably only one --obvious way to do it.\n", - "Although that way may not be obvious at first unless you're Dutch.\n", - "Now is better than never.\n", - "Although never is often better than *right* now.\n", - "If the implementation is hard to explain, it's a bad idea.\n", - "If the implementation is easy to explain, it may be a good idea.\n", - "Namespaces are one honking great idea -- let's do more of those!\n" + "\n", + "RangeIndex: 1000 entries, 0 to 999\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Rank 1000 non-null int64 \n", + " 1 Title 1000 non-null object \n", + " 2 Genre 1000 non-null object \n", + " 3 Description 1000 non-null object \n", + " 4 Director 1000 non-null object \n", + " 5 Actors 1000 non-null object \n", + " 6 Year 1000 non-null int64 \n", + " 7 Runtime (Minutes) 1000 non-null int64 \n", + " 8 Rating 1000 non-null float64\n", + " 9 Votes 1000 non-null int64 \n", + " 10 Revenue (Millions) 872 non-null float64\n", + " 11 Metascore 936 non-null float64\n", + "dtypes: float64(3), int64(4), object(5)\n", + "memory usage: 93.9+ KB\n" ] } ], "source": [ - "import this" + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "3111282d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankYearRuntime (Minutes)RatingVotesRevenue (Millions)Metascore
count1000.0000001000.0000001000.0000001000.0000001.000000e+03872.000000936.000000
mean500.5000002012.783000113.1720006.7232001.698083e+0582.95637658.985043
std288.8194363.20596218.8109080.9454291.887626e+05103.25354017.194757
min1.0000002006.00000066.0000001.9000006.100000e+010.00000011.000000
25%250.7500002010.000000100.0000006.2000003.630900e+0413.27000047.000000
50%500.5000002014.000000111.0000006.8000001.107990e+0547.98500059.500000
75%750.2500002016.000000123.0000007.4000002.399098e+05113.71500072.000000
max1000.0000002016.000000191.0000009.0000001.791916e+06936.630000100.000000
\n", + "
" + ], + "text/plain": [ + " Rank Year Runtime (Minutes) Rating Votes \\\n", + "count 1000.000000 1000.000000 1000.000000 1000.000000 1.000000e+03 \n", + "mean 500.500000 2012.783000 113.172000 6.723200 1.698083e+05 \n", + "std 288.819436 3.205962 18.810908 0.945429 1.887626e+05 \n", + "min 1.000000 2006.000000 66.000000 1.900000 6.100000e+01 \n", + "25% 250.750000 2010.000000 100.000000 6.200000 3.630900e+04 \n", + "50% 500.500000 2014.000000 111.000000 6.800000 1.107990e+05 \n", + "75% 750.250000 2016.000000 123.000000 7.400000 2.399098e+05 \n", + "max 1000.000000 2016.000000 191.000000 9.000000 1.791916e+06 \n", + "\n", + " Revenue (Millions) Metascore \n", + "count 872.000000 936.000000 \n", + "mean 82.956376 58.985043 \n", + "std 103.253540 17.194757 \n", + "min 0.000000 11.000000 \n", + "25% 13.270000 47.000000 \n", + "50% 47.985000 59.500000 \n", + "75% 113.715000 72.000000 \n", + "max 936.630000 100.000000 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f88f9e24", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)Metascore
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore \n", + "0 8.1 757074 333.13 76.0 \n", + "1 7.0 485820 126.46 65.0 \n", + "2 7.3 157606 138.12 62.0 \n", + "3 7.2 60545 270.32 59.0 \n", + "4 6.2 393727 325.02 40.0 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "d2594c72", + "metadata": {}, + "outputs": [], + "source": [ + "# Definition of function\n", + "def bin_votes(votes):\n", + " if (votes >= 0) and (votes <= 999):\n", + " tag = 'cat_1'\n", + " elif (votes >= 1000) and (votes <= 9999):\n", + " tag = 'cat_2'\n", + " elif (votes >= 10000) and (votes <= 99999):\n", + " tag = 'cat_3'\n", + " elif (votes >= 100000) and (votes <= 999999):\n", + " tag = 'cat_4'\n", + " elif (votes >= 1000000) and (votes <= 9999999):\n", + " tag = 'cat_5'\n", + " \n", + " return tag" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "7390a386", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'cat_2'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test the function 'bin'\n", + "bin_votes(1000)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "ec157801", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascorebinRevenue/minrevenue_per_min
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0cat_42.7531402.753140
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0cat_41.0198391.019839
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0cat_41.1805131.180513
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0cat_32.5029632.502963
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0cat_42.6424392.642439
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore bin Revenue/min \\\n", + "0 8.1 757074 333.13 76.0 cat_4 2.753140 \n", + "1 7.0 485820 126.46 65.0 cat_4 1.019839 \n", + "2 7.3 157606 138.12 62.0 cat_4 1.180513 \n", + "3 7.2 60545 270.32 59.0 cat_3 2.502963 \n", + "4 6.2 393727 325.02 40.0 cat_4 2.642439 \n", + "\n", + " revenue_per_min \n", + "0 2.753140 \n", + "1 1.019839 \n", + "2 1.180513 \n", + "3 2.502963 \n", + "4 2.642439 " + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create new column called 'bin', and apply the function 'bin_votes' to column 'Votes'. Noted that 'axix=1' indicate that the \n", + "# operation is by each column\n", + "df['bin'] = df.apply(lambda row: bin_votes(row['Votes']), axis=1)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "3ca6dc7e", + "metadata": {}, + "source": [ + "### Challenge 2" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "a2215cfd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)MetascorebinRevenue/minrevenue_per_min
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0cat_42.7531402.753140
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0cat_41.0198391.019839
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0cat_41.1805131.180513
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0cat_32.5029632.502963
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0cat_42.6424392.642439
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore bin Revenue/min \\\n", + "0 8.1 757074 333.13 76.0 cat_4 2.753140 \n", + "1 7.0 485820 126.46 65.0 cat_4 1.019839 \n", + "2 7.3 157606 138.12 62.0 cat_4 1.180513 \n", + "3 7.2 60545 270.32 59.0 cat_3 2.502963 \n", + "4 6.2 393727 325.02 40.0 cat_4 2.642439 \n", + "\n", + " revenue_per_min \n", + "0 2.753140 \n", + "1 1.019839 \n", + "2 1.180513 \n", + "3 2.502963 \n", + "4 2.642439 " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Obtain a new column to calculate the Revenue per minute by each film\n", + "\n", + "# Direct way\n", + "#df['revenue_per_min'] = df['Revenue (Millions)'] / df['Runtime (Minutes)']\n", + "\n", + "# Using lambda function\n", + "df['revenue_per_min'] = df.apply(lambda row: row['Revenue (Millions)'] / row['Runtime (Minutes)'], axis=1)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "16f38327", + "metadata": {}, + "source": [ + "### Challenge 3" ] }, { "cell_type": "code", "execution_count": null, - "id": "d3c3dd76", + "id": "ef261d4a", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Create function to increment 1 rate if the film is a thriller and decrement 1 if the film is a comedy\n", + "def " + ] } ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:.conda-m1_env]", + "display_name": "Python (m1_env)", "language": "python", - "name": "conda-env-.conda-m1_env-py" + "name": "m1_env" }, "language_info": { "codemirror_mode": { @@ -73,7 +856,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.10.13" } }, "nbformat": 4, From 87e26a5912965a3de3c83ae3e07535406d88929f Mon Sep 17 00:00:00 2001 From: CarlosSanchezVicente Date: Sun, 29 Oct 2023 22:38:21 +0100 Subject: [PATCH 2/4] Challenge_3y4 --- notebooks/your_code_here.ipynb | 726 ++++++++++++++++++++++++++++++--- 1 file changed, 668 insertions(+), 58 deletions(-) diff --git a/notebooks/your_code_here.ipynb b/notebooks/your_code_here.ipynb index d20b1b0..38dcf3d 100644 --- a/notebooks/your_code_here.ipynb +++ b/notebooks/your_code_here.ipynb @@ -20,13 +20,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 79, "id": "ace6da70", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", - "import numpy as np" + "import numpy as np\n", + "import math" ] }, { @@ -41,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "id": "501ef8f5", "metadata": {}, "outputs": [ @@ -77,7 +78,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "id": "3111282d", "metadata": {}, "outputs": [ @@ -218,7 +219,7 @@ "max 936.630000 100.000000 " ] }, - "execution_count": 10, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -389,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 9, "id": "d2594c72", "metadata": {}, "outputs": [], @@ -412,7 +413,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 10, "id": "7390a386", "metadata": {}, "outputs": [ @@ -422,7 +423,7 @@ "'cat_2'" ] }, - "execution_count": 17, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -434,7 +435,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 11, "id": "ec157801", "metadata": {}, "outputs": [ @@ -472,8 +473,6 @@ " Revenue (Millions)\n", " Metascore\n", " bin\n", - " Revenue/min\n", - " revenue_per_min\n", " \n", " \n", " \n", @@ -492,8 +491,6 @@ " 333.13\n", " 76.0\n", " cat_4\n", - " 2.753140\n", - " 2.753140\n", " \n", " \n", " 1\n", @@ -510,8 +507,6 @@ " 126.46\n", " 65.0\n", " cat_4\n", - " 1.019839\n", - " 1.019839\n", " \n", " \n", " 2\n", @@ -528,8 +523,6 @@ " 138.12\n", " 62.0\n", " cat_4\n", - " 1.180513\n", - " 1.180513\n", " \n", " \n", " 3\n", @@ -546,8 +539,6 @@ " 270.32\n", " 59.0\n", " cat_3\n", - " 2.502963\n", - " 2.502963\n", " \n", " \n", " 4\n", @@ -564,8 +555,6 @@ " 325.02\n", " 40.0\n", " cat_4\n", - " 2.642439\n", - " 2.642439\n", " \n", " \n", "\n", @@ -593,22 +582,15 @@ "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", "\n", - " Rating Votes Revenue (Millions) Metascore bin Revenue/min \\\n", - "0 8.1 757074 333.13 76.0 cat_4 2.753140 \n", - "1 7.0 485820 126.46 65.0 cat_4 1.019839 \n", - "2 7.3 157606 138.12 62.0 cat_4 1.180513 \n", - "3 7.2 60545 270.32 59.0 cat_3 2.502963 \n", - "4 6.2 393727 325.02 40.0 cat_4 2.642439 \n", - "\n", - " revenue_per_min \n", - "0 2.753140 \n", - "1 1.019839 \n", - "2 1.180513 \n", - "3 2.502963 \n", - "4 2.642439 " + " Rating Votes Revenue (Millions) Metascore bin \n", + "0 8.1 757074 333.13 76.0 cat_4 \n", + "1 7.0 485820 126.46 65.0 cat_4 \n", + "2 7.3 157606 138.12 62.0 cat_4 \n", + "3 7.2 60545 270.32 59.0 cat_3 \n", + "4 6.2 393727 325.02 40.0 cat_4 " ] }, - "execution_count": 31, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -630,7 +612,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 12, "id": "a2215cfd", "metadata": {}, "outputs": [ @@ -668,7 +650,6 @@ " Revenue (Millions)\n", " Metascore\n", " bin\n", - " Revenue/min\n", " revenue_per_min\n", " \n", " \n", @@ -689,7 +670,6 @@ " 76.0\n", " cat_4\n", " 2.753140\n", - " 2.753140\n", " \n", " \n", " 1\n", @@ -707,7 +687,6 @@ " 65.0\n", " cat_4\n", " 1.019839\n", - " 1.019839\n", " \n", " \n", " 2\n", @@ -725,7 +704,6 @@ " 62.0\n", " cat_4\n", " 1.180513\n", - " 1.180513\n", " \n", " \n", " 3\n", @@ -743,7 +721,6 @@ " 59.0\n", " cat_3\n", " 2.502963\n", - " 2.502963\n", " \n", " \n", " 4\n", @@ -761,7 +738,6 @@ " 40.0\n", " cat_4\n", " 2.642439\n", - " 2.642439\n", " \n", " \n", "\n", @@ -789,22 +765,15 @@ "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", "\n", - " Rating Votes Revenue (Millions) Metascore bin Revenue/min \\\n", - "0 8.1 757074 333.13 76.0 cat_4 2.753140 \n", - "1 7.0 485820 126.46 65.0 cat_4 1.019839 \n", - "2 7.3 157606 138.12 62.0 cat_4 1.180513 \n", - "3 7.2 60545 270.32 59.0 cat_3 2.502963 \n", - "4 6.2 393727 325.02 40.0 cat_4 2.642439 \n", - "\n", - " revenue_per_min \n", - "0 2.753140 \n", - "1 1.019839 \n", - "2 1.180513 \n", - "3 2.502963 \n", - "4 2.642439 " + " Rating Votes Revenue (Millions) Metascore bin revenue_per_min \n", + "0 8.1 757074 333.13 76.0 cat_4 2.753140 \n", + "1 7.0 485820 126.46 65.0 cat_4 1.019839 \n", + "2 7.3 157606 138.12 62.0 cat_4 1.180513 \n", + "3 7.2 60545 270.32 59.0 cat_3 2.502963 \n", + "4 6.2 393727 325.02 40.0 cat_4 2.642439 " ] }, - "execution_count": 30, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -830,13 +799,654 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "ef261d4a", "metadata": {}, "outputs": [], "source": [ "# Create function to increment 1 rate if the film is a thriller and decrement 1 if the film is a comedy\n", - "def " + "def change_puntuation(genre, metascore):\n", + " if 'Thriller' in genre:\n", + " return metascore + 1\n", + " elif 'Comedy' in genre:\n", + " return metascore - 1\n", + " else:\n", + " return metascore" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "a49bd73c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "77.0" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "genre = 'Horror,Thriller'\n", + "metascore = 76.0\n", + "change_puntuation(genre, metascore)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "b8c47e48", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)Metascorebinrevenue_per_minnew_rating
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0cat_42.75314076.0
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0cat_41.01983965.0
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0cat_41.18051363.0
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0cat_32.50296358.0
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0cat_42.64243940.0
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore bin revenue_per_min \\\n", + "0 8.1 757074 333.13 76.0 cat_4 2.753140 \n", + "1 7.0 485820 126.46 65.0 cat_4 1.019839 \n", + "2 7.3 157606 138.12 62.0 cat_4 1.180513 \n", + "3 7.2 60545 270.32 59.0 cat_3 2.502963 \n", + "4 6.2 393727 325.02 40.0 cat_4 2.642439 \n", + "\n", + " new_rating \n", + "0 76.0 \n", + "1 65.0 \n", + "2 63.0 \n", + "3 58.0 \n", + "4 40.0 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['new_rating'] = df.apply(lambda row: change_puntuation(row['Genre'], row['Metascore']), axis=1)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "15ec5c62", + "metadata": {}, + "source": [ + "### Challenge 4" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "f0550045", + "metadata": {}, + "outputs": [], + "source": [ + "#This function is used to calculate if a number is prime or not\n", + "def is_prime(num):\n", + " for n in range(2, num): #Create an array from 2 to number to test\n", + " if num % n == 0: #If the module of division is 0, num is divisor of number, if not is prime\n", + " #print(\"The number: \", num, \" isn't prime, it's divisor of \", n)\n", + " return False\n", + " #print(num, \" is prime\")\n", + " return True\n", + "\n", + "#This function is used to convert the film title to ASCII code, sum de value of each character and divide the number of\n", + "#votes by this sum. And after that, test if the number is prime using the function below.\n", + "def ascii_value(name, votes):\n", + " value = 0 #Inicialize the value of sum to zero\n", + " for character in name: #Extract each character with this loop\n", + " value += ord(character) #Convert the character to ASCII code and accumulates it\n", + " \n", + " votes_per_sum = int(votes/value) #Do the division and convert to int to delete de decimal number\n", + " \n", + " return is_prime(votes_per_sum) #Evaluate the function 'is_prime'and return if the number is prime or not" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "52c01c54", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The number: 348 isn't prime, it's divisor of 2\n" + ] + }, + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ascii_value('Guardians of the Galaxy', 757074)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "831fd317", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)Metascorebinrevenue_per_minnew_ratingvotes_per_sum
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0cat_42.75314076.0False
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0cat_41.01983965.0False
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0cat_41.18051363.0False
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0cat_32.50296358.0False
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0cat_42.64243940.0False
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore bin revenue_per_min \\\n", + "0 8.1 757074 333.13 76.0 cat_4 2.753140 \n", + "1 7.0 485820 126.46 65.0 cat_4 1.019839 \n", + "2 7.3 157606 138.12 62.0 cat_4 1.180513 \n", + "3 7.2 60545 270.32 59.0 cat_3 2.502963 \n", + "4 6.2 393727 325.02 40.0 cat_4 2.642439 \n", + "\n", + " new_rating votes_per_sum \n", + "0 76.0 False \n", + "1 65.0 False \n", + "2 63.0 False \n", + "3 58.0 False \n", + "4 40.0 False " + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Add new column to dataframe and this indicates if the result of votes/sum is prime or not\n", + "df['votes_per_sum'] = df.apply(lambda row: ascii_value(row['Title'], row['Votes']), axis=1)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "647dd073", + "metadata": {}, + "source": [ + "### Challenge 5" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "5ea0c60f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Runtime (Minutes)RatingVotesRevenue (Millions)Metascoreeven_or_odd
Director
Aamir Khan165.008.5102697.001.20042.0odd
Abdellatif Kechiche180.007.8103150.002.20088.0odd
Adam McKay110.757.0201706.75109.53565.5even
Adam Shankman120.006.383733.5078.66564.0even
Adam Wingard94.505.948578.5010.53561.5even
\n", + "
" + ], + "text/plain": [ + " Runtime (Minutes) Rating Votes Revenue (Millions) \\\n", + "Director \n", + "Aamir Khan 165.00 8.5 102697.00 1.200 \n", + "Abdellatif Kechiche 180.00 7.8 103150.00 2.200 \n", + "Adam McKay 110.75 7.0 201706.75 109.535 \n", + "Adam Shankman 120.00 6.3 83733.50 78.665 \n", + "Adam Wingard 94.50 5.9 48578.50 10.535 \n", + "\n", + " Metascore even_or_odd \n", + "Director \n", + "Aamir Khan 42.0 odd \n", + "Abdellatif Kechiche 88.0 odd \n", + "Adam McKay 65.5 even \n", + "Adam Shankman 64.0 even \n", + "Adam Wingard 61.5 even " + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Create a new dataframe with some columns\n", + "director_df = df.loc[:, ['Director', 'Runtime (Minutes)', 'Rating', 'Votes', 'Revenue (Millions)', 'Metascore', 'bin']]\n", + "\n", + "#Group new dataframe by director column and do the mean of each colunm\n", + "director_df = new_df.groupby(by = 'Director').mean()\n", + "\n", + "#Delete the rows with NaN values\n", + "director_df = director_df.dropna()\n", + "\n", + "#With this funtion calculate this operation: 'votes / (revenue + metascore' and check if the value is odd or even.\n", + "def puntuation(metascore, votes, revenue):\n", + "\n", + " rev_per_score = int(votes / (revenue + metascore))\n", + " #print(rev_per_score)\n", + " if rev_per_score%2 == 0:\n", + " return 'even'\n", + " else:\n", + " return 'odd'\n", + "\n", + "#Apply this function to the dataframe and store the result in a new column\n", + "director_df['even_or_odd'] = director_df.apply(lambda row: puntuation(row['Metascore'], row['Votes'], \n", + " row['Revenue (Millions)']), axis=1)\n", + "director_df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "3aed073b", + "metadata": {}, + "source": [ + "### Bonus challenge " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45642149", + "metadata": {}, + "outputs": [], + "source": [ + "# Donwload the library: https://docs.python.org/es/3/library/hashlib.html" ] } ], From b10f061498d1306302c7865f30daa42604246b83 Mon Sep 17 00:00:00 2001 From: CarlosSanchezVicente Date: Mon, 30 Oct 2023 18:53:41 +0100 Subject: [PATCH 3/4] workshop_finished --- notebooks/your_code_here.ipynb | 325 +++++++++++++++++++++++++++++---- 1 file changed, 287 insertions(+), 38 deletions(-) diff --git a/notebooks/your_code_here.ipynb b/notebooks/your_code_here.ipynb index 38dcf3d..333781b 100644 --- a/notebooks/your_code_here.ipynb +++ b/notebooks/your_code_here.ipynb @@ -20,19 +20,20 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 1, "id": "ace6da70", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", - "import math" + "import math\n", + "import hashlib" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "id": "d3c3dd76", "metadata": {}, "outputs": [], @@ -42,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "id": "501ef8f5", "metadata": {}, "outputs": [ @@ -78,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "id": "3111282d", "metadata": {}, "outputs": [ @@ -219,7 +220,7 @@ "max 936.630000 100.000000 " ] }, - "execution_count": 7, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -230,7 +231,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "id": "f88f9e24", "metadata": {}, "outputs": [ @@ -379,7 +380,7 @@ "4 6.2 393727 325.02 40.0 " ] }, - "execution_count": 8, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -390,7 +391,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "id": "d2594c72", "metadata": {}, "outputs": [], @@ -413,7 +414,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "id": "7390a386", "metadata": {}, "outputs": [ @@ -423,7 +424,7 @@ "'cat_2'" ] }, - "execution_count": 10, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -435,7 +436,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "id": "ec157801", "metadata": {}, "outputs": [ @@ -590,7 +591,7 @@ "4 6.2 393727 325.02 40.0 cat_4 " ] }, - "execution_count": 11, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -612,7 +613,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "id": "a2215cfd", "metadata": {}, "outputs": [ @@ -773,7 +774,7 @@ "4 6.2 393727 325.02 40.0 cat_4 2.642439 " ] }, - "execution_count": 12, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -799,7 +800,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 11, "id": "ef261d4a", "metadata": {}, "outputs": [], @@ -816,7 +817,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 12, "id": "a49bd73c", "metadata": {}, "outputs": [ @@ -826,7 +827,7 @@ "77.0" ] }, - "execution_count": 18, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -839,7 +840,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 13, "id": "b8c47e48", "metadata": {}, "outputs": [ @@ -1013,7 +1014,7 @@ "4 40.0 " ] }, - "execution_count": 19, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1033,7 +1034,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 14, "id": "f0550045", "metadata": {}, "outputs": [], @@ -1061,24 +1062,17 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 15, "id": "52c01c54", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The number: 348 isn't prime, it's divisor of 2\n" - ] - }, { "data": { "text/plain": [ "False" ] }, - "execution_count": 67, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -1089,7 +1083,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 16, "id": "831fd317", "metadata": {}, "outputs": [ @@ -1269,7 +1263,7 @@ "4 40.0 False " ] }, - "execution_count": 60, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1290,7 +1284,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 19, "id": "5ea0c60f", "metadata": {}, "outputs": [ @@ -1400,17 +1394,17 @@ "Adam Wingard 61.5 even " ] }, - "execution_count": 97, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Create a new dataframe with some columns\n", - "director_df = df.loc[:, ['Director', 'Runtime (Minutes)', 'Rating', 'Votes', 'Revenue (Millions)', 'Metascore', 'bin']]\n", + "director_df = df.loc[:, ['Director', 'Runtime (Minutes)', 'Rating', 'Votes', 'Revenue (Millions)', 'Metascore']]\n", "\n", "#Group new dataframe by director column and do the mean of each colunm\n", - "director_df = new_df.groupby(by = 'Director').mean()\n", + "director_df = director_df.groupby(by = 'Director').mean()\n", "\n", "#Delete the rows with NaN values\n", "director_df = director_df.dropna()\n", @@ -1441,13 +1435,268 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, + "id": "2670468d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "157" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Function to convert the text to hash and sum the numbers results\n", + "def search_pattern(texto):\n", + " # With SHA256 it's possible to calculate the hash string\n", + " sha256 = hashlib.sha256(texto.encode()).hexdigest()\n", + " \n", + " # Sumamos los valores numéricos en la cadena hash\n", + " sum_hash = sum(int(number) for number in sha256 if number.isdigit())\n", + " \n", + " return sum_hash\n", + "\n", + "search_pattern('Guardians of the Galaxy')" + ] + }, + { + "cell_type": "code", + "execution_count": 28, "id": "45642149", "metadata": {}, "outputs": [], "source": [ - "# Donwload the library: https://docs.python.org/es/3/library/hashlib.html" + "#Donwload the library HASH: https://docs.python.org/es/3/library/hashlib.html\n", + "\n", + "#Function to convert the text to hash and sum the numbers results\n", + "def search_pattern(title, revenue, votes):\n", + " # With SHA256 it's possible to calculate the hash string\n", + " sha256 = hashlib.sha256(title.encode()).hexdigest()\n", + " \n", + " # Sumamos los valores numéricos en la cadena hash\n", + " sum_hash = sum(int(number) for number in sha256 if number.isdigit())\n", + " \n", + " # Check if the sum is between revenue and votes for this film\n", + " if (sum_hash > revenue) and (revenue < votes):\n", + " return True\n", + " else:\n", + " return False\n", + "\n", + "#Apply the function 'sum_hash' to the dataframe\n", + "df['Hash_sum'] = df.apply(lambda row: search_pattern(row['Title'], row['Revenue (Millions)'],\n", + " row['Votes']), axis=1)\n", + "#df['votes_per_sum'] = df.apply(lambda row: ascii_value(row['Title'], row['Votes']), axis=1)" ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "817cb699", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)Metascorebinrevenue_per_minnew_ratingvotes_per_sumHash_sum
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0cat_42.75314076.0FalseFalse
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0cat_41.01983965.0FalseTrue
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0cat_41.18051363.0FalseTrue
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0cat_32.50296358.0FalseFalse
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0cat_42.64243940.0FalseFalse
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore bin revenue_per_min \\\n", + "0 8.1 757074 333.13 76.0 cat_4 2.753140 \n", + "1 7.0 485820 126.46 65.0 cat_4 1.019839 \n", + "2 7.3 157606 138.12 62.0 cat_4 1.180513 \n", + "3 7.2 60545 270.32 59.0 cat_3 2.502963 \n", + "4 6.2 393727 325.02 40.0 cat_4 2.642439 \n", + "\n", + " new_rating votes_per_sum Hash_sum \n", + "0 76.0 False False \n", + "1 65.0 False True \n", + "2 63.0 False True \n", + "3 58.0 False False \n", + "4 40.0 False False " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "668e50d4", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From ad8e814069ee4cfff79a8eb5da93c53b6a417c24 Mon Sep 17 00:00:00 2001 From: CarlosSanchezVicente Date: Wed, 1 Nov 2023 18:15:09 +0100 Subject: [PATCH 4/4] workshop_finished --- notebooks/your_code_here.ipynb | 81 ++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/notebooks/your_code_here.ipynb b/notebooks/your_code_here.ipynb index 333781b..88bb6b1 100644 --- a/notebooks/your_code_here.ipynb +++ b/notebooks/your_code_here.ipynb @@ -38,6 +38,7 @@ "metadata": {}, "outputs": [], "source": [ + "#Read the data from the csv and store it in the dataframe.\n", "df = pd.read_csv(\"../data/input/IMDB-Movie-Data.csv\")" ] }, @@ -396,7 +397,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Definition of function\n", + "# Function definition:\n", "def bin_votes(votes):\n", " if (votes >= 0) and (votes <= 999):\n", " tag = 'cat_1'\n", @@ -597,8 +598,8 @@ } ], "source": [ - "# Create new column called 'bin', and apply the function 'bin_votes' to column 'Votes'. Noted that 'axix=1' indicate that the \n", - "# operation is by each column\n", + "#Create new column called 'bin', and apply the function 'bin_votes' to column 'Votes'. 'axix=1' indicates that the \n", + "#operation is by each column.\n", "df['bin'] = df.apply(lambda row: bin_votes(row['Votes']), axis=1)\n", "df.head()" ] @@ -780,12 +781,12 @@ } ], "source": [ - "# Obtain a new column to calculate the Revenue per minute by each film\n", + "# Obtain a new column to calculate the revenue per minute by each film\n", "\n", - "# Direct way\n", + "# Direct way:\n", "#df['revenue_per_min'] = df['Revenue (Millions)'] / df['Runtime (Minutes)']\n", "\n", - "# Using lambda function\n", + "# Using lambda function:\n", "df['revenue_per_min'] = df.apply(lambda row: row['Revenue (Millions)'] / row['Runtime (Minutes)'], axis=1)\n", "df.head()" ] @@ -805,7 +806,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Create function to increment 1 rate if the film is a thriller and decrement 1 if the film is a comedy\n", + "#Create function to increase by 1 the ratio if the movie is a thriller and decrease by 1 if the movie is a comedy.\n", "def change_puntuation(genre, metascore):\n", " if 'Thriller' in genre:\n", " return metascore + 1\n", @@ -1039,25 +1040,25 @@ "metadata": {}, "outputs": [], "source": [ - "#This function is used to calculate if a number is prime or not\n", + "#This function is used to calculate whether a number is prime or not\n", "def is_prime(num):\n", - " for n in range(2, num): #Create an array from 2 to number to test\n", - " if num % n == 0: #If the module of division is 0, num is divisor of number, if not is prime\n", + " for n in range(2, num): #Create an array from 2 to 'num' and use the loop to get each number\n", + " if num % n == 0: #If the module of division is 0, num is divisor of 'num', if not is prime\n", " #print(\"The number: \", num, \" isn't prime, it's divisor of \", n)\n", " return False\n", " #print(num, \" is prime\")\n", - " return True\n", + " return True #If the module of division is 1, num isn't divisor of 'num', so it's prime and return 'True'\n", "\n", - "#This function is used to convert the film title to ASCII code, sum de value of each character and divide the number of\n", - "#votes by this sum. And after that, test if the number is prime using the function below.\n", + "#This function is used to convert the movie title into ASCII code, sum the value of each character and divide the number of \n", + "#movie votes by this sum. And after that, it check if the number is prime using the function defined above ('is_prime').\n", "def ascii_value(name, votes):\n", " value = 0 #Inicialize the value of sum to zero\n", " for character in name: #Extract each character with this loop\n", " value += ord(character) #Convert the character to ASCII code and accumulates it\n", " \n", - " votes_per_sum = int(votes/value) #Do the division and convert to int to delete de decimal number\n", + " votes_per_sum = int(votes/value) #Make the division and convert to 'int' to remove the decimal number.\n", " \n", - " return is_prime(votes_per_sum) #Evaluate the function 'is_prime'and return if the number is prime or not" + " return is_prime(votes_per_sum) #Evaluate the function 'is_prime'and return if the number is prime or not." ] }, { @@ -1269,7 +1270,7 @@ } ], "source": [ - "#Add new column to dataframe and this indicates if the result of votes/sum is prime or not\n", + "#Add new column to dataframe indicating whether the result of votes/sum is prime or not.\n", "df['votes_per_sum'] = df.apply(lambda row: ascii_value(row['Title'], row['Votes']), axis=1)\n", "df.head()" ] @@ -1284,7 +1285,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "id": "5ea0c60f", "metadata": {}, "outputs": [ @@ -1394,22 +1395,22 @@ "Adam Wingard 61.5 even " ] }, - "execution_count": 19, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#Create a new dataframe with some columns\n", + "#Create a new dataframe with some columns.\n", "director_df = df.loc[:, ['Director', 'Runtime (Minutes)', 'Rating', 'Votes', 'Revenue (Millions)', 'Metascore']]\n", "\n", - "#Group new dataframe by director column and do the mean of each colunm\n", + "#Group new dataframe by director column and do the mean of each colunm.\n", "director_df = director_df.groupby(by = 'Director').mean()\n", "\n", - "#Delete the rows with NaN values\n", + "#Delete the rows with NaN values.\n", "director_df = director_df.dropna()\n", "\n", - "#With this funtion calculate this operation: 'votes / (revenue + metascore' and check if the value is odd or even.\n", + "#With this funtion calculate this operation: 'votes / (revenue + metascore). And check if the value is odd or even.\n", "def puntuation(metascore, votes, revenue):\n", "\n", " rev_per_score = int(votes / (revenue + metascore))\n", @@ -1419,7 +1420,7 @@ " else:\n", " return 'odd'\n", "\n", - "#Apply this function to the dataframe and store the result in a new column\n", + "#Apply this function to the dataframe and store the result in a new column.\n", "director_df['even_or_odd'] = director_df.apply(lambda row: puntuation(row['Metascore'], row['Votes'], \n", " row['Revenue (Millions)']), axis=1)\n", "director_df.head()" @@ -1435,7 +1436,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 8, "id": "2670468d", "metadata": {}, "outputs": [ @@ -1445,52 +1446,54 @@ "157" ] }, - "execution_count": 24, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#Function to convert the text to hash and sum the numbers results\n", + "#CHECK THE 'SHA256' FUNCTION BEHAVIOR - Function to convert the text to hash and sum the resulting numbers:\n", "def search_pattern(texto):\n", - " # With SHA256 it's possible to calculate the hash string\n", - " sha256 = hashlib.sha256(texto.encode()).hexdigest()\n", + " #With the 'sha256' function it's possible to calculate the hash string.\n", + " sha256_list = hashlib.sha256(texto.encode()).hexdigest()\n", + " #print(sha256_list)\n", + " #print(len(sha256_list))\n", " \n", - " # Sumamos los valores numéricos en la cadena hash\n", - " sum_hash = sum(int(number) for number in sha256 if number.isdigit())\n", + " #With this list comprehension it's possible to extract each character from the above conversion. And sum these numbers \n", + " #if the character is a number.\n", + " sum_hash = sum(int(number) for number in sha256_list if number.isdigit())\n", " \n", - " return sum_hash\n", + " return sum_hash #Return the sum\n", "\n", "search_pattern('Guardians of the Galaxy')" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 9, "id": "45642149", "metadata": {}, "outputs": [], "source": [ "#Donwload the library HASH: https://docs.python.org/es/3/library/hashlib.html\n", "\n", - "#Function to convert the text to hash and sum the numbers results\n", + "#Function to convert the text to hash and sum the resulting numbers:\n", "def search_pattern(title, revenue, votes):\n", - " # With SHA256 it's possible to calculate the hash string\n", + " #With the 'sha256' function it's possible to calculate the hash string.\n", " sha256 = hashlib.sha256(title.encode()).hexdigest()\n", " \n", - " # Sumamos los valores numéricos en la cadena hash\n", + " #With this list comprehension it's possible to extract each character from the above conversion. And sum these numbers \n", + " #if the character is a number.\n", " sum_hash = sum(int(number) for number in sha256 if number.isdigit())\n", " \n", - " # Check if the sum is between revenue and votes for this film\n", + " #Check if the sum of the sha256 values is between revenues and votes for this film.\n", " if (sum_hash > revenue) and (revenue < votes):\n", " return True\n", " else:\n", " return False\n", "\n", "#Apply the function 'sum_hash' to the dataframe\n", - "df['Hash_sum'] = df.apply(lambda row: search_pattern(row['Title'], row['Revenue (Millions)'],\n", - " row['Votes']), axis=1)\n", - "#df['votes_per_sum'] = df.apply(lambda row: ascii_value(row['Title'], row['Votes']), axis=1)" + "df['hash_sum'] = df.apply(lambda row: search_pattern(row['Title'], row['Revenue (Millions)'], row['Votes']), axis=1)" ] }, {