Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
245 changes: 221 additions & 24 deletions lab-intro-probability.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,39 @@
"If the Ironhack Airlines routinely sells 460 tickets, what is the chance that they have a seats for all passenger?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Binominal challenge"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Probability that all passengers get a seat: 0.8845\n",
"That's about 88.45%\n"
]
}
],
"source": [
"#code here"
"from scipy.stats import binom\n",
"#tickets sold\n",
"n = 460\n",
"#chance of catching the flight\n",
"p = 0.97\n",
"seats = 450\n",
"#P( or fewer passengers show up)\n",
"probability = binom.cdf(seats, n, p)\n",
"\n",
"print(f\"Probability that all passengers get a seat: {probability:.4f}\")\n",
"print(f\"That's about {probability*100:.2f}%\")"
]
},
{
Expand Down Expand Up @@ -70,13 +96,56 @@
"What is the probability that the representative needs to make at least three attempts before successfully resolving a customer complaint?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Geometric distribution problem"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Probability of needing at least 3 attempts: 0.4900\n",
"That's about 49.00%\n"
]
}
],
"source": [
"from scipy.stats import geom\n",
"# Parameters\n",
"p = 0.3 # probability of resolving on first attempt\n",
"\n",
"# P(needs AT LEAST 3 attempts) = P(first success happens on attempt 3 or later)\n",
"probability = geom.sf(2, p) # sf(2) means \"more than 2 attempts\", sf= survival function\n",
"\n",
"print(f\"Probability of needing at least 3 attempts: {probability:.4f}\")\n",
"print(f\"That's about {probability*100:.2f}%\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Probability of resolving within 2 attempts: 51.00%\n"
]
}
],
"source": [
"#code here"
"# P(resolves on attempt 1 or 2)\n",
"probability_1_or_2 = geom.cdf(2, p) #cdf = Cumulative distribution function, For dummies: \"What is the probability of getting X or LESS?\"\n",
"print(f\"Probability of resolving within 2 attempts: {probability_1_or_2*100:.2f}%\")"
]
},
{
Expand Down Expand Up @@ -105,13 +174,38 @@
"What is the probability of the website server being overwhelmed?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Poisson distribution"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Probability of server being overwhelmed: 0.0129\n",
"That's about 1.29%\n"
]
}
],
"source": [
"#code here"
"from scipy.stats import poisson\n",
"# Parameters\n",
"mu = 500 # average visits per hour #mu is greek letter for M (weird u)\n",
"capacity = 550 # server capacity\n",
"\n",
"# P(more than 550 visits) = server overwhelmed\n",
"probability = poisson.sf(capacity, mu)\n",
"\n",
"print(f\"Probability of server being overwhelmed: {probability:.4f}\")\n",
"print(f\"That's about {probability*100:.2f}%\")"
]
},
{
Expand All @@ -123,11 +217,30 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Probability of being overwhelmed at least once in 24 hours: 0.2677\n",
"That's about 26.77%\n"
]
}
],
"source": [
"#code here"
"# Probability of NOT being overwhelmed in a single hour\n",
"p_safe_one_hour = 1 - probability # from previous result\n",
"\n",
"# Probability of NOT being overwhelmed in ALL 24 hours\n",
"p_safe_24_hours = p_safe_one_hour ** 24\n",
"\n",
"# Probability of being overwhelmed AT LEAST ONCE in 24 hours\n",
"p_overwhelmed_24_hours = 1 - p_safe_24_hours\n",
"\n",
"print(f\"Probability of being overwhelmed at least once in 24 hours: {p_overwhelmed_24_hours:.4f}\")\n",
"print(f\"That's about {p_overwhelmed_24_hours*100:.2f}%\")"
]
},
{
Expand Down Expand Up @@ -155,12 +268,38 @@
"What is the probability that the next customer will arrive within the next 5 minutes?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Exponential distribution"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Probability of next customer arriving within 5 minutes: 0.3935\n",
"That's about 39.35%\n"
]
}
],
"source": [
"from scipy.stats import expon\n",
"# Parameters\n",
"mean_arrival = 10 # average time between arrivals (in minutes)\n",
"\n",
"# P(next customer arrives within 5 minutes)\n",
"probability = expon.cdf(5, scale=mean_arrival)\n",
"\n",
"print(f\"Probability of next customer arriving within 5 minutes: {probability:.4f}\")\n",
"print(f\"That's about {probability*100:.2f}%\")"
]
},
{
"cell_type": "markdown",
Expand All @@ -173,10 +312,25 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Probability of an employee taking a break: 0.2231\n",
"That's about 22.31%\n"
]
}
],
"source": [
"# P(no customer for 15 minutes) = P(next arrival takes MORE than 15 minutes)\n",
"probability_break = expon.sf(15, scale=mean_arrival)\n",
"\n",
"print(f\"Probability of an employee taking a break: {probability_break:.4f}\")\n",
"print(f\"That's about {probability_break*100:.2f}%\")"
]
},
{
"cell_type": "markdown",
Expand All @@ -194,13 +348,39 @@
"- If we randomly select a bird, what is the probability that its weight is between 140 and 160 grams?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Normal distribution"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Probability of weight between 140 and 160 grams: 0.6827\n",
"That's about 68.27%\n"
]
}
],
"source": [
"#code here"
"from scipy.stats import norm\n",
"\n",
"# Parameters\n",
"mu = 150 # average weight\n",
"sigma = 10 # standard deviation\n",
"\n",
"# P(140 <= weight <= 160)\n",
"probability = norm.cdf(160, mu, sigma) - norm.cdf(140, mu, sigma)\n",
"\n",
"print(f\"Probability of weight between 140 and 160 grams: {probability:.4f}\")\n",
"print(f\"That's about {probability*100:.2f}%\")"
]
},
{
Expand All @@ -219,11 +399,28 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Probability of failing within 30 hours: 0.4512\n",
"That's about 45.12%\n"
]
}
],
"source": [
"#code here"
"\n",
"# Parameters\n",
"mean_lifetime = 50 # average lifetime in hours\n",
"\n",
"# P(fails within 30 hours) = P(lifetime <= 30)\n",
"probability = expon.cdf(30, scale=mean_lifetime)\n",
"\n",
"print(f\"Probability of failing within 30 hours: {probability:.4f}\")\n",
"print(f\"That's about {probability*100:.2f}%\")"
]
}
],
Expand All @@ -243,7 +440,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.14.2"
}
},
"nbformat": 4,
Expand Down