From d9e7f9220382bb3ff831a948fed77009b382796e Mon Sep 17 00:00:00 2001 From: Ricardo Mendes Date: Sat, 20 May 2023 18:50:41 +0100 Subject: [PATCH] msg --- your-code/main.ipynb | 182 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 163 insertions(+), 19 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 332f496..f7a2622 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -9,11 +9,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "# Libraries" + "# Libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "import scipy.stats as st" ] }, { @@ -32,11 +35,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample mean: 173.47\n", + "Sample standard deviation: 7.36\n", + "Sample size: 15\n", + "Confidence interval: (170.91, 176.02)\n" + ] + } + ], "source": [ - "# your code here" + "# your code here\n", + "\n", + "heights = [167, 167, 168, 168, 168, 169, 171, 172, 173, 175, 175, 175, 177, 182, 195]\n", + "n = len(heights)\n", + "\n", + "sample_mean = np.mean(heights)\n", + "sample_std = np.std(heights, ddof=1)\n", + "\n", + "#t-value for 80% confidence level with n-1 degrees of freedom\n", + "t_value = st.t.ppf(0.9, n-1)\n", + "\n", + "# confidence interval\n", + "CI = (sample_mean - t_value*(sample_std/np.sqrt(n)), sample_mean + t_value*(sample_std/np.sqrt(n)))\n", + "\n", + "\n", + "print(\"Sample mean: {:.2f}\".format(sample_mean))\n", + "print(\"Sample standard deviation: {:.2f}\".format(sample_std))\n", + "print(\"Sample size: {}\".format(n))\n", + "print(\"Confidence interval: ({:.2f}, {:.2f})\".format(CI[0], CI[1]))\n" ] }, { @@ -51,11 +83,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample proportion: 0.26\n", + "80% Confidence interval: (0.20, 0.31)\n", + "90% Confidence interval: (0.19, 0.33)\n" + ] + } + ], "source": [ - "# your code here" + "# your code here\n", + "\n", + "n = 105\n", + "p = 27/105\n", + "se = np.sqrt(p*(1-p)/n)\n", + "s\n", + "z_80 = st.norm.ppf(0.9)\n", + "z_90 = st.norm.ppf(0.95)\n", + "\n", + "# confidence intervals\n", + "CI_80 = (p - z_80*se, p + z_80*se)\n", + "CI_90 = (p - z_90*se, p + z_90*se)\n", + "\n", + "print(\"Sample proportion: {:.2f}\".format(p))\n", + "print(\"80% Confidence interval: ({:.2f}, {:.2f})\".format(CI_80[0], CI_80[1]))\n", + "print(\"90% Confidence interval: ({:.2f}, {:.2f})\".format(CI_90[0], CI_90[1]))\n" ] }, { @@ -76,11 +133,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample size required: 425\n" + ] + } + ], "source": [ - "# your code here" + "# your code here\n", + "\n", + "# set up the problem\n", + "sigma = 4 # population standard deviation\n", + "E = 0.5 # desired error level\n", + "alpha = 0.01 # significance level\n", + "z_alpha_2 = st.norm.ppf(1 - alpha/2) # z-value for given significance level\n", + "\n", + "# calculate sample size\n", + "n = np.ceil((z_alpha_2 * sigma / E) ** 2)\n", + "\n", + "# print the result\n", + "print(\"Sample size required: {}\".format(int(n)))\n" ] }, { @@ -94,11 +171,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sample size required: 3138\n" + ] + } + ], "source": [ - "# your code here" + "# your code here\n", + "\n", + "# set up the problem\n", + "n = 105 # sample size\n", + "p_hat = 27/105 # sample proportion of businesses with losses\n", + "alpha = 0.2 # significance level (1 - confidence level)\n", + "z_alpha_2 = st.norm.ppf(1 - alpha/2) # z-value for given significance level\n", + "E = 0.01 # desired error level\n", + "\n", + "# calculate sample size\n", + "q_hat = 1 - p_hat\n", + "n_required = ((z_alpha_2**2) * p_hat * q_hat) / (E**2)\n", + "n_required = np.ceil(n_required)\n", + "\n", + "# print the result\n", + "print(\"Sample size required: {}\".format(int(n_required)))\n" ] }, { @@ -121,17 +221,61 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The 94% confidence interval for the difference in mean durations is [6.30, 25.70].\n" + ] + } + ], "source": [ - "# your code here" + "# your code here\n", + "\n", + "#calculate the sample means and standard deviations\n", + "n_x = 40\n", + "x_bar = 418\n", + "sigma_x = 26\n", + "\n", + "n_y = 50\n", + "y_bar = 402\n", + "sigma_y = 22\n", + "\n", + "#calculate the standard error\n", + "se = np.sqrt(sigma_x**2/n_x + sigma_y**2/n_y)\n", + "\n", + "#critical value for a 94% confidence interval\n", + "alpha = 0.06/2 # two-tailed test\n", + "z_critical = st.norm.ppf(1 - alpha)\n", + "\n", + "#construct the confidence interval\n", + "lower_bound = (x_bar - y_bar) - z_critical * se\n", + "upper_bound = (x_bar - y_bar) + z_critical * se\n", + "\n", + "print(f\"The 94% confidence interval for the difference in mean durations is [{lower_bound:.2f}, {upper_bound:.2f}].\")\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -145,7 +289,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.10.9" } }, "nbformat": 4,