From e99c160caac8e987e4f00e77a06b9c4192c88581 Mon Sep 17 00:00:00 2001 From: Alana Castillo Date: Sat, 28 Oct 2023 14:25:30 +0200 Subject: [PATCH] workshop on going --- notebooks/your_code_here.ipynb | 451 +++++++++++++++++++++++++++++++-- 1 file changed, 423 insertions(+), 28 deletions(-) diff --git a/notebooks/your_code_here.ipynb b/notebooks/your_code_here.ipynb index f60a09e..e5a5a17 100644 --- a/notebooks/your_code_here.ipynb +++ b/notebooks/your_code_here.ipynb @@ -12,46 +12,441 @@ }, { "cell_type": "code", - "execution_count": 1, - "id": "ace6da70", + "execution_count": 17, + "id": "9ced0771", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "4fef775b", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('../data/input/IMDB-Movie-Data.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "437241fc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "The Zen of Python, by Tim Peters\n", - "\n", - "Beautiful is better than ugly.\n", - "Explicit is better than implicit.\n", - "Simple is better than complex.\n", - "Complex is better than complicated.\n", - "Flat is better than nested.\n", - "Sparse is better than dense.\n", - "Readability counts.\n", - "Special cases aren't special enough to break the rules.\n", - "Although practicality beats purity.\n", - "Errors should never pass silently.\n", - "Unless explicitly silenced.\n", - "In the face of ambiguity, refuse the temptation to guess.\n", - "There should be one-- and preferably only one --obvious way to do it.\n", - "Although that way may not be obvious at first unless you're Dutch.\n", - "Now is better than never.\n", - "Although never is often better than *right* now.\n", - "If the implementation is hard to explain, it's a bad idea.\n", - "If the implementation is easy to explain, it may be a good idea.\n", - "Namespaces are one honking great idea -- let's do more of those!\n" + "\n", + "RangeIndex: 1000 entries, 0 to 999\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Rank 1000 non-null int64 \n", + " 1 Title 1000 non-null object \n", + " 2 Genre 1000 non-null object \n", + " 3 Description 1000 non-null object \n", + " 4 Director 1000 non-null object \n", + " 5 Actors 1000 non-null object \n", + " 6 Year 1000 non-null int64 \n", + " 7 Runtime (Minutes) 1000 non-null int64 \n", + " 8 Rating 1000 non-null float64\n", + " 9 Votes 1000 non-null int64 \n", + " 10 Revenue (Millions) 872 non-null float64\n", + " 11 Metascore 936 non-null float64\n", + "dtypes: float64(3), int64(4), object(5)\n", + "memory usage: 93.9+ KB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "664ceb59", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankYearRuntime (Minutes)RatingVotesRevenue (Millions)Metascore
count1000.0000001000.0000001000.0000001000.0000001.000000e+03872.000000936.000000
mean500.5000002012.783000113.1720006.7232001.698083e+0582.95637658.985043
std288.8194363.20596218.8109080.9454291.887626e+05103.25354017.194757
min1.0000002006.00000066.0000001.9000006.100000e+010.00000011.000000
25%250.7500002010.000000100.0000006.2000003.630900e+0413.27000047.000000
50%500.5000002014.000000111.0000006.8000001.107990e+0547.98500059.500000
75%750.2500002016.000000123.0000007.4000002.399098e+05113.71500072.000000
max1000.0000002016.000000191.0000009.0000001.791916e+06936.630000100.000000
\n", + "
" + ], + "text/plain": [ + " Rank Year Runtime (Minutes) Rating Votes \\\n", + "count 1000.000000 1000.000000 1000.000000 1000.000000 1.000000e+03 \n", + "mean 500.500000 2012.783000 113.172000 6.723200 1.698083e+05 \n", + "std 288.819436 3.205962 18.810908 0.945429 1.887626e+05 \n", + "min 1.000000 2006.000000 66.000000 1.900000 6.100000e+01 \n", + "25% 250.750000 2010.000000 100.000000 6.200000 3.630900e+04 \n", + "50% 500.500000 2014.000000 111.000000 6.800000 1.107990e+05 \n", + "75% 750.250000 2016.000000 123.000000 7.400000 2.399098e+05 \n", + "max 1000.000000 2016.000000 191.000000 9.000000 1.791916e+06 \n", + "\n", + " Revenue (Millions) Metascore \n", + "count 872.000000 936.000000 \n", + "mean 82.956376 58.985043 \n", + "std 103.253540 17.194757 \n", + "min 0.000000 11.000000 \n", + "25% 13.270000 47.000000 \n", + "50% 47.985000 59.500000 \n", + "75% 113.715000 72.000000 \n", + "max 936.630000 100.000000 " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "1a2d72e7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
RankTitleGenreDescriptionDirectorActorsYearRuntime (Minutes)RatingVotesRevenue (Millions)Metascore
01Guardians of the GalaxyAction,Adventure,Sci-FiA group of intergalactic criminals are forced ...James GunnChris Pratt, Vin Diesel, Bradley Cooper, Zoe S...20141218.1757074333.1376.0
12PrometheusAdventure,Mystery,Sci-FiFollowing clues to the origin of mankind, a te...Ridley ScottNoomi Rapace, Logan Marshall-Green, Michael Fa...20121247.0485820126.4665.0
23SplitHorror,ThrillerThree girls are kidnapped by a man with a diag...M. Night ShyamalanJames McAvoy, Anya Taylor-Joy, Haley Lu Richar...20161177.3157606138.1262.0
34SingAnimation,Comedy,FamilyIn a city of humanoid animals, a hustling thea...Christophe LourdeletMatthew McConaughey,Reese Witherspoon, Seth Ma...20161087.260545270.3259.0
45Suicide SquadAction,Adventure,FantasyA secret government agency recruits some of th...David AyerWill Smith, Jared Leto, Margot Robbie, Viola D...20161236.2393727325.0240.0
\n", + "
" + ], + "text/plain": [ + " Rank Title Genre \\\n", + "0 1 Guardians of the Galaxy Action,Adventure,Sci-Fi \n", + "1 2 Prometheus Adventure,Mystery,Sci-Fi \n", + "2 3 Split Horror,Thriller \n", + "3 4 Sing Animation,Comedy,Family \n", + "4 5 Suicide Squad Action,Adventure,Fantasy \n", + "\n", + " Description Director \\\n", + "0 A group of intergalactic criminals are forced ... James Gunn \n", + "1 Following clues to the origin of mankind, a te... Ridley Scott \n", + "2 Three girls are kidnapped by a man with a diag... M. Night Shyamalan \n", + "3 In a city of humanoid animals, a hustling thea... Christophe Lourdelet \n", + "4 A secret government agency recruits some of th... David Ayer \n", + "\n", + " Actors Year Runtime (Minutes) \\\n", + "0 Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... 2014 121 \n", + "1 Noomi Rapace, Logan Marshall-Green, Michael Fa... 2012 124 \n", + "2 James McAvoy, Anya Taylor-Joy, Haley Lu Richar... 2016 117 \n", + "3 Matthew McConaughey,Reese Witherspoon, Seth Ma... 2016 108 \n", + "4 Will Smith, Jared Leto, Margot Robbie, Viola D... 2016 123 \n", + "\n", + " Rating Votes Revenue (Millions) Metascore \n", + "0 8.1 757074 333.13 76.0 \n", + "1 7.0 485820 126.46 65.0 \n", + "2 7.3 157606 138.12 62.0 \n", + "3 7.2 60545 270.32 59.0 \n", + "4 6.2 393727 325.02 40.0 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8ebe26c", + "metadata": {}, + "outputs": [], + "source": [ + "def guardians(x):\n", + "\n", + " if x >=0 and x <= 999=\n", + " return ='cat_1'\n", + " elif x >=1000 and x <= 9999=\n", + " return ='cat_2'\n", + " elif x >=10000 and x <= 99999=\n", + " return ='cat_3'\n", + " elif x >=10000 and x <= 99999=\n", + " return ='cat_4'" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "71ae2dd7", + "metadata": {}, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid syntax. Perhaps you forgot a comma? (2610953802.py, line 2)", + "output_type": "error", + "traceback": [ + "\u001b[1;36m Cell \u001b[1;32mIn[26], line 2\u001b[1;36m\u001b[0m\n\u001b[1;33m df_movies['Bins']= df_movies.apply(lamda row: warp_up['Votes'], axis=1)\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax. Perhaps you forgot a comma?\n" ] } ], "source": [ - "import this" + "df_movies['Bins']= df_movies.apply(lamda row: warp_up['Votes'], axis=1)" ] }, + { + "cell_type": "code", + "execution_count": 29, + "id": "00b9b2dd", + "metadata": {}, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "closing parenthesis ')' does not match opening parenthesis '[' (2045122085.py, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[1;36m Cell \u001b[1;32mIn[29], line 1\u001b[1;36m\u001b[0m\n\u001b[1;33m df['revenue_movie_per_minute'] = (df_movies.apply(lamba row : row ['Revenue (Millions)'])/ row [Runtime (Minutes)'], axis=1')\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m closing parenthesis ')' does not match opening parenthesis '['\n" + ] + } + ], + "source": [ + "df['revenue_movie_per_minute'] = (df_movies.apply(lamba row : row ['Revenue (Millions)'])/ row [Runtime (Minutes)'], axis=1')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e13d7a4b", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, - "id": "d3c3dd76", + "id": "e578c658", "metadata": {}, "outputs": [], "source": [] @@ -59,9 +454,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:.conda-m1_env]", + "display_name": "Python (m1_env)", "language": "python", - "name": "conda-env-.conda-m1_env-py" + "name": "myenv" }, "language_info": { "codemirror_mode": { @@ -73,7 +468,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.10.13" } }, "nbformat": 4,