From 4bb715258eb22af39f0cb437845f206be691ea05 Mon Sep 17 00:00:00 2001 From: Sarah Jane Date: Thu, 19 Mar 2026 15:51:14 +0100 Subject: [PATCH] lab solved --- lab-intro-probability.ipynb | 245 ++++++++++++++++++++++++++++++++---- 1 file changed, 221 insertions(+), 24 deletions(-) diff --git a/lab-intro-probability.ipynb b/lab-intro-probability.ipynb index 5893fc1..8865c49 100644 --- a/lab-intro-probability.ipynb +++ b/lab-intro-probability.ipynb @@ -36,13 +36,39 @@ "If the Ironhack Airlines routinely sells 460 tickets, what is the chance that they have a seats for all passenger?" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Binominal challenge" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Probability that all passengers get a seat: 0.8845\n", + "That's about 88.45%\n" + ] + } + ], "source": [ - "#code here" + "from scipy.stats import binom\n", + "#tickets sold\n", + "n = 460\n", + "#chance of catching the flight\n", + "p = 0.97\n", + "seats = 450\n", + "#P( or fewer passengers show up)\n", + "probability = binom.cdf(seats, n, p)\n", + "\n", + "print(f\"Probability that all passengers get a seat: {probability:.4f}\")\n", + "print(f\"That's about {probability*100:.2f}%\")" ] }, { @@ -70,13 +96,56 @@ "What is the probability that the representative needs to make at least three attempts before successfully resolving a customer complaint?" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Geometric distribution problem" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Probability of needing at least 3 attempts: 0.4900\n", + "That's about 49.00%\n" + ] + } + ], + "source": [ + "from scipy.stats import geom\n", + "# Parameters\n", + "p = 0.3 # probability of resolving on first attempt\n", + "\n", + "# P(needs AT LEAST 3 attempts) = P(first success happens on attempt 3 or later)\n", + "probability = geom.sf(2, p) # sf(2) means \"more than 2 attempts\", sf= survival function\n", + "\n", + "print(f\"Probability of needing at least 3 attempts: {probability:.4f}\")\n", + "print(f\"That's about {probability*100:.2f}%\")" + ] + }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Probability of resolving within 2 attempts: 51.00%\n" + ] + } + ], "source": [ - "#code here" + "# P(resolves on attempt 1 or 2)\n", + "probability_1_or_2 = geom.cdf(2, p) #cdf = Cumulative distribution function, For dummies: \"What is the probability of getting X or LESS?\"\n", + "print(f\"Probability of resolving within 2 attempts: {probability_1_or_2*100:.2f}%\")" ] }, { @@ -105,13 +174,38 @@ "What is the probability of the website server being overwhelmed?" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Poisson distribution" + ] + }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Probability of server being overwhelmed: 0.0129\n", + "That's about 1.29%\n" + ] + } + ], "source": [ - "#code here" + "from scipy.stats import poisson\n", + "# Parameters\n", + "mu = 500 # average visits per hour #mu is greek letter for M (weird u)\n", + "capacity = 550 # server capacity\n", + "\n", + "# P(more than 550 visits) = server overwhelmed\n", + "probability = poisson.sf(capacity, mu)\n", + "\n", + "print(f\"Probability of server being overwhelmed: {probability:.4f}\")\n", + "print(f\"That's about {probability*100:.2f}%\")" ] }, { @@ -123,11 +217,30 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Probability of being overwhelmed at least once in 24 hours: 0.2677\n", + "That's about 26.77%\n" + ] + } + ], "source": [ - "#code here" + "# Probability of NOT being overwhelmed in a single hour\n", + "p_safe_one_hour = 1 - probability # from previous result\n", + "\n", + "# Probability of NOT being overwhelmed in ALL 24 hours\n", + "p_safe_24_hours = p_safe_one_hour ** 24\n", + "\n", + "# Probability of being overwhelmed AT LEAST ONCE in 24 hours\n", + "p_overwhelmed_24_hours = 1 - p_safe_24_hours\n", + "\n", + "print(f\"Probability of being overwhelmed at least once in 24 hours: {p_overwhelmed_24_hours:.4f}\")\n", + "print(f\"That's about {p_overwhelmed_24_hours*100:.2f}%\")" ] }, { @@ -155,12 +268,38 @@ "What is the probability that the next customer will arrive within the next 5 minutes?" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Exponential distribution" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Probability of next customer arriving within 5 minutes: 0.3935\n", + "That's about 39.35%\n" + ] + } + ], + "source": [ + "from scipy.stats import expon\n", + "# Parameters\n", + "mean_arrival = 10 # average time between arrivals (in minutes)\n", + "\n", + "# P(next customer arrives within 5 minutes)\n", + "probability = expon.cdf(5, scale=mean_arrival)\n", + "\n", + "print(f\"Probability of next customer arriving within 5 minutes: {probability:.4f}\")\n", + "print(f\"That's about {probability*100:.2f}%\")" + ] }, { "cell_type": "markdown", @@ -173,10 +312,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Probability of an employee taking a break: 0.2231\n", + "That's about 22.31%\n" + ] + } + ], + "source": [ + "# P(no customer for 15 minutes) = P(next arrival takes MORE than 15 minutes)\n", + "probability_break = expon.sf(15, scale=mean_arrival)\n", + "\n", + "print(f\"Probability of an employee taking a break: {probability_break:.4f}\")\n", + "print(f\"That's about {probability_break*100:.2f}%\")" + ] }, { "cell_type": "markdown", @@ -194,13 +348,39 @@ "- If we randomly select a bird, what is the probability that its weight is between 140 and 160 grams?" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Normal distribution" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Probability of weight between 140 and 160 grams: 0.6827\n", + "That's about 68.27%\n" + ] + } + ], "source": [ - "#code here" + "from scipy.stats import norm\n", + "\n", + "# Parameters\n", + "mu = 150 # average weight\n", + "sigma = 10 # standard deviation\n", + "\n", + "# P(140 <= weight <= 160)\n", + "probability = norm.cdf(160, mu, sigma) - norm.cdf(140, mu, sigma)\n", + "\n", + "print(f\"Probability of weight between 140 and 160 grams: {probability:.4f}\")\n", + "print(f\"That's about {probability*100:.2f}%\")" ] }, { @@ -219,11 +399,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Probability of failing within 30 hours: 0.4512\n", + "That's about 45.12%\n" + ] + } + ], "source": [ - "#code here" + "\n", + "# Parameters\n", + "mean_lifetime = 50 # average lifetime in hours\n", + "\n", + "# P(fails within 30 hours) = P(lifetime <= 30)\n", + "probability = expon.cdf(30, scale=mean_lifetime)\n", + "\n", + "print(f\"Probability of failing within 30 hours: {probability:.4f}\")\n", + "print(f\"That's about {probability*100:.2f}%\")" ] } ], @@ -243,7 +440,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.14.2" } }, "nbformat": 4,