diff --git a/your_code/main.ipynb b/your_code/main.ipynb index 7810ccf..f1e6f37 100644 --- a/your_code/main.ipynb +++ b/your_code/main.ipynb @@ -14,11 +14,54 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "# your answer here" + "import numpy as np\n", + "import pandas as pd\n", + "import scipy.stats as st\n", + "from scipy.stats import poisson\n", + "from scipy.stats import binom" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "p_value is: 0.483688906853727\n", + "We can not reject the null hypothesis: there is reason to believe that at a .05 level the number of scores is a Poisson variable\n" + ] + } + ], + "source": [ + "# your answer here\n", + "# our observation O:\n", + "O = [35,99,104,110,62,25,10,3]\n", + "alpha = 0.05\n", + "mu = 2.435\n", + "poisson_dist = poisson(mu)\n", + "\n", + "# poisson distribution for 1 to 6th value of probability\n", + "poisson_pmfs = np.array([poisson_dist.pmf(i) for i in range(0,7)]) \n", + "# probabilityo of scoring more than 7 will be 1 - cumulative probability of getting less than 7 i.e. sum of all value poisson_pmfs\n", + "poisson_pmfs_tail = 1-sum(poisson_pmfs) \n", + "# Now our final pmfs will be with the tail value\n", + "with_tail = np.append(poisson_pmfs, poisson_pmfs_tail)\n", + "# Calcualte the expected score according to our calculated pmfs where population will be the sum of our observed score\n", + "E = with_tail * sum(O)\n", + "\n", + "stats, p_value = st.chisquare(f_obs = O, f_exp = E)\n", + "print(f\"p_value is: {p_value}\")\n", + "if p_value < alpha:\n", + " print(\"We can reject the null hypothesis : there is no reason to believe that at a .05 level the number of scores is a Poisson variable\")\n", + "else: \n", + " print(\"We can not reject the null hypothesis: there is reason to believe that at a .05 level the number of scores is a Poisson variable\")\n" ] }, { @@ -41,11 +84,50 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "p_value is: 3.008342324775625e-07\n", + "We can reject the null hypothesis : there is no reason to believe that at a .05 level the number of defective items follow Poisson distribution\n" + ] + } + ], "source": [ - "# your code here" + "# your code here\n", + "defective_quantity = np.array([0, 1, 2, 3, 4])\n", + "O = np.array([32,15, 0, 9, 4])\n", + "# mu = ∑(x * f(x)) / n\n", + "mu = ((0*32)+(1*15)+(3*15)+(4*4))/60\n", + "alpha = 0.05\n", + "poisson_dist = poisson(mu)\n", + "\n", + "# poisson distribution for each observed defective items\n", + "pmf_0 = poisson_dist.pmf(0)\n", + "pmf_1 = poisson_dist.pmf(1)\n", + "pmf_2 = poisson_dist.pmf(2)\n", + "pmf_3 = poisson_dist.pmf(3)\n", + "pmf_4 = poisson_dist.pmf(4)\n", + "\n", + "\n", + "# calculate pmf for each occurance\n", + "pmfs = np.array([pmf_0, pmf_1, pmf_2, pmf_3, pmf_4])\n", + "# Calcualte the expected frequency according to our calculated pmfs where population will be the sum of our observed frequency\n", + "E = pmfs * sum(O)\n", + "# normalize the expected frequency with the observed frequency\n", + "E_norm = E / np.sum(E) * np.sum(O)\n", + "\n", + "stats, p_value = st.chisquare(f_obs = O, f_exp = E_norm, ddof = 1)\n", + "\n", + "print(f\"p_value is: {p_value}\")\n", + "if p_value < alpha:\n", + " print(\"We can reject the null hypothesis : there is no reason to believe that at a .05 level the number of defective items follow Poisson distribution\")\n", + "else: \n", + " print(\"We can not reject the null hypothesis: there is reason to believe that at a .05 level the number of defective items follow Poisson distribution\")\n", + "\n" ] }, { @@ -60,11 +142,46 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "8.306179519542757\n", + "P_value: 0.015715783395951262\n", + "We can reject the null hypothesis\n" + ] + } + ], "source": [ - "# your answer here" + "# your answer here\n", + "n = 10\n", + "p = 0.05\n", + "O = np.array([138, 53, 9])\n", + "Population = O.sum()\n", + "\n", + "binom_dist = binom(n,p)\n", + "binom_pmfs = np.array([binom_dist.pmf(i) for i in range(2)])\n", + "pmf_tail = 1 - binom_pmfs.sum()\n", + "with_tail = np.append(binom_pmfs, pmf_tail)\n", + "\n", + "E = with_tail * Population\n", + "\n", + "stats, p_value = st.chisquare(O, E)\n", + "\n", + "print(stats)\n", + "print(f\"P_value: {p_value}\")\n", + "\n", + "if p_value < alpha:\n", + " print(\"We can reject the null hypothesis\")\n", + "else:\n", + " print(\"We can not reject the null hypothesis\")\n", + "\n", + "\n", + "\n", + "\n" ] }, { @@ -79,17 +196,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 47, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.004719280137040844\n", + "We can reject the null hypothesis: There is association between physical activity and sugary drinks consumption\n" + ] + } + ], "source": [ - "#your answer here" + "#your answer here\n", + "\n", + "# H0: there is no association between physical activity and the consumption of sugary drinks\n", + "# H1: there is an association between physical activity and the consumption of sugary drinks\n", + "\n", + "alpha = 0.05\n", + "category = np.array([[32, 12],\n", + " [14, 22],\n", + " [6, 9]])\n", + "stats, p_value, df, E = st.chi2_contingency(category)\n", + "print(p_value)\n", + "if p_value < alpha:\n", + " print(\"We can reject the null hypothesis: There is association between physical activity and sugary drinks consumption\")\n", + "else:\n", + " print(\"We can not reject the null hypothesis. We can not say that there is association between phsical activity and sugary drinks consumption\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -103,7 +250,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.9.13" } }, "nbformat": 4,