diff --git a/your_code/main.ipynb b/your_code/main.ipynb index 7810ccf..71d0bc3 100644 --- a/your_code/main.ipynb +++ b/your_code/main.ipynb @@ -12,13 +12,66 @@ "Based on these results, we create a Poisson distribution with the sample mean parameter = 2.435. Is there any reason to believe that at a .05 level the number of scores is a Poisson variable?" ] }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import scipy.stats as st\n", + "from scipy.stats import bernoulli, binom, geom, poisson, uniform, expon, norm,chisquare\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "#H0: follows poisson\n", + "#H1: does not follow poisson\n", + "\n", + "points = np.array([35, 99, 104, 110, 62, 25, 10, 3])\n", + "\n", + "mu = 2.435\n", + "poisson_dist = poisson(mu)\n", + "\n", + "poisson_pmf = np.array([poisson_dist.pmf(i) for i in range(7)])\n", + "\n", + "with_tail = np.append(poisson_pmf, 1-poisson_pmf.sum())\n", + "f_exp = with_tail*sum(points)\n", + "\n", + "\n", + "output = st.chisquare(f_obs = f_obs, f_exp = f_exp)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The p_value is: 0.4836889068537269\n" + ] + } + ], + "source": [ + "print(\"The p_value is:\", output[1])" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# your answer here" + "# we can not reject the hypothesis ==> it might be a poisson distribution" ] }, { @@ -60,11 +113,52 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ - "# your answer here" + "#H0: follows Binominal\n", + "#H1: Does not follow Binominal\n", + "\n", + "f_obs = np.array([138,53,9])\n", + "\n", + "n = 10\n", + "p = 0.05\n", + "\n", + "binom_dist = binom(n,p)\n", + "pmfs = np.array([binom_dist.pmf(i) for i in range(2)])\n", + "\n", + "with_tail = np.append(pmfs, 1 - pmfs.sum())\n", + "f_exp = with_tail*sum(f_obs)\n", + "\n", + "output = st.chisquare(f_obs = f_obs, f_exp = f_exp)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "P_value 0.015715783395950946\n" + ] + } + ], + "source": [ + "print(\"P_value\", output[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "#can reject the hypothesis\n", + "# it does not follow a binomial distribution with 0.05" ] }, { @@ -77,19 +171,145 @@ "" ] }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | Yes | \n", + "No | \n", + "Total | \n", + "
|---|---|---|---|
| Physical Activity | \n", + "\n", + " | \n", + " | \n", + " |
| low | \n", + "32 | \n", + "12 | \n", + "44 | \n", + "
| medium | \n", + "14 | \n", + "22 | \n", + "36 | \n", + "
| high | \n", + "6 | \n", + "9 | \n", + "15 | \n", + "
| total | \n", + "51 | \n", + "43 | \n", + "95 | \n", + "