From 17bc2a4585c7ed939ebb611af56052027f225b75 Mon Sep 17 00:00:00 2001 From: AnaCarvalho84 <131803922+AnaCarvalho84@users.noreply.github.com> Date: Sat, 19 Aug 2023 10:32:50 +0100 Subject: [PATCH] Lab done --- your_code/main.ipynb | 152 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 142 insertions(+), 10 deletions(-) diff --git a/your_code/main.ipynb b/your_code/main.ipynb index 7810ccf..f7502a2 100644 --- a/your_code/main.ipynb +++ b/your_code/main.ipynb @@ -14,11 +14,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chi-squared statistic: 6.491310681109821\n", + "P-value: 0.4836889068537269\n", + "Fail to reject the null hypothesis: The data follows a Poisson distribution.\n" + ] + } + ], "source": [ - "# your answer here" + "import pandas as pd\n", + "import numpy as np\n", + "import scipy.stats as st\n", + "\n", + "from scipy.stats import poisson\n", + "\n", + "significance = 0.05\n", + "mu = 2.435\n", + "f_obs = np.array([35, 99, 104, 110, 62, 25, 10, 3])\n", + "poisson_dist = poisson(mu)\n", + "\n", + "poisson_pmfs = np.array([poisson_dist.pmf(i) for i in range(0, 7)])\n", + "poisson_pmfs_last = np.append(poisson_pmfs, 1 - poisson_pmfs.sum())\n", + "f_exp = poisson_pmfs_last * 448\n", + "\n", + "# Perform the chi-squared goodness-of-fit test\n", + "chi_squared, p_value = st.chisquare(f_obs=f_obs, f_exp=f_exp)\n", + "\n", + "# Compare p-value with significance level\n", + "if p_value < significance:\n", + " result = \"Reject the null hypothesis: The data does not follow a Poisson distribution.\"\n", + "else:\n", + " result = \"Fail to reject the null hypothesis: The data follows a Poisson distribution.\"\n", + "\n", + "print(\"Chi-squared statistic:\", chi_squared)\n", + "print(\"P-value:\", p_value)\n", + "print(result)" ] }, { @@ -41,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -60,11 +96,59 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We can reject the null hypothesis\n", + "p-value: 0.015715783395950887\n" + ] + } + ], "source": [ - "# your answer here" + "from scipy.stats import binom\n", + "\n", + "# H0: The sample comes from a binomial population (with n = 10 and p = 0.05)\n", + "# H1: The sample does not come from a binomial population (with n = 10 and p = 0.05)\n", + "\n", + "# Observed frequencies of defective tires\n", + "O = np.array([138, 53, 9])\n", + "\n", + "population = O.sum()\n", + "n = 10\n", + "p = 0.05\n", + "alpha = 0.05 \n", + "\n", + "binom_dist = binom(n, p)\n", + "\n", + "\n", + "# Calculate the entire PMF for 0 to 2 or more\n", + "binom_pmfs = np.array([binom_dist.pmf(i) for i in range(0, 2)])\n", + "\n", + "tail = 1 - binom_pmfs.sum()\n", + "\n", + "binom_with_tail = np.append(binom_pmfs, tail)\n", + "\n", + "E = binom_with_tail * population\n", + "\n", + "chisquare_result = st.chisquare(f_obs = O, f_exp = E)\n", + "\n", + "if chisquare_result.pvalue < alpha:\n", + " print(\"We can reject the null hypothesis\")\n", + "else:\n", + " print(\"We can not reject the null hypothesis\")\n", + " \n", + "print(\"p-value:\", chisquare_result.pvalue)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " Since this p-value is less than the chosen significance level (alpha = 0.05), i can reject the null hypothesis. This suggests that there is evidence to believe that the observed data does not follow a binomial distribution.The result indicates that the distribution of defective tires in the sample significantly deviates from what would be expected under the assumed binomial distribution." ] }, { @@ -79,11 +163,59 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chi-squared statistic: 10.712198008709638\n", + "P-value: 0.004719280137040844\n", + "Degrees of freedom: 2\n", + "Expected frequencies:\n", + " [[24.08421053 19.91578947]\n", + " [19.70526316 16.29473684]\n", + " [ 8.21052632 6.78947368]]\n", + "We can reject the null hypothesis\n" + ] + } + ], + "source": [ + "from scipy.stats import chi2_contingency\n", + "\n", + "#Hypothesis\n", + "\n", + "#H0 : Physical Activity is independent of sugar drinks consumption \n", + "#H1 : Physical Activity is not independent of sugar drinks consumption\n", + "\n", + "alpha = 0.05\n", + "\n", + "\n", + "children = [[32, 12],\n", + " [14, 22],\n", + " [6,9]]\n", + "\n", + "chi2_stat, p_val, dof, expected = chi2_contingency(children)\n", + "\n", + "print(\"Chi-squared statistic:\", chi2_stat)\n", + "print(\"P-value:\", p_val)\n", + "print(\"Degrees of freedom:\", dof)\n", + "print(\"Expected frequencies:\\n\", expected)\n", + "\n", + "if p_val < alpha:\n", + " print(\"We can reject the null hypothesis\")\n", + "else:\n", + " print(\"We cannot reject the null hypothesis\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "#your answer here" + "#The rejection of the null hypothesis suggests that there might be a meaningful connection between physical activity and sugar drinks consumption" ] } ], @@ -103,7 +235,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.10.9" } }, "nbformat": 4,