From 5f92be50beff6ba0b346a69ed46db85e975c8bce Mon Sep 17 00:00:00 2001 From: HenrikSoeder Date: Wed, 22 Nov 2023 16:10:19 +0000 Subject: [PATCH] Lab Done --- your_code/main.ipynb | 232 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 226 insertions(+), 6 deletions(-) diff --git a/your_code/main.ipynb b/your_code/main.ipynb index 7810ccf..71d0bc3 100644 --- a/your_code/main.ipynb +++ b/your_code/main.ipynb @@ -12,13 +12,66 @@ "Based on these results, we create a Poisson distribution with the sample mean parameter = 2.435. Is there any reason to believe that at a .05 level the number of scores is a Poisson variable?" ] }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import scipy.stats as st\n", + "from scipy.stats import bernoulli, binom, geom, poisson, uniform, expon, norm,chisquare\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "#H0: follows poisson\n", + "#H1: does not follow poisson\n", + "\n", + "points = np.array([35, 99, 104, 110, 62, 25, 10, 3])\n", + "\n", + "mu = 2.435\n", + "poisson_dist = poisson(mu)\n", + "\n", + "poisson_pmf = np.array([poisson_dist.pmf(i) for i in range(7)])\n", + "\n", + "with_tail = np.append(poisson_pmf, 1-poisson_pmf.sum())\n", + "f_exp = with_tail*sum(points)\n", + "\n", + "\n", + "output = st.chisquare(f_obs = f_obs, f_exp = f_exp)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The p_value is: 0.4836889068537269\n" + ] + } + ], + "source": [ + "print(\"The p_value is:\", output[1])" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# your answer here" + "# we can not reject the hypothesis ==> it might be a poisson distribution" ] }, { @@ -60,11 +113,52 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ - "# your answer here" + "#H0: follows Binominal\n", + "#H1: Does not follow Binominal\n", + "\n", + "f_obs = np.array([138,53,9])\n", + "\n", + "n = 10\n", + "p = 0.05\n", + "\n", + "binom_dist = binom(n,p)\n", + "pmfs = np.array([binom_dist.pmf(i) for i in range(2)])\n", + "\n", + "with_tail = np.append(pmfs, 1 - pmfs.sum())\n", + "f_exp = with_tail*sum(f_obs)\n", + "\n", + "output = st.chisquare(f_obs = f_obs, f_exp = f_exp)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "P_value 0.015715783395950946\n" + ] + } + ], + "source": [ + "print(\"P_value\", output[1])" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "#can reject the hypothesis\n", + "# it does not follow a binomial distribution with 0.05" ] }, { @@ -77,19 +171,145 @@ "![](table4.png)" ] }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YesNoTotal
Physical Activity
low321244
medium142236
high6915
total514395
\n", + "
" + ], + "text/plain": [ + " Yes No Total\n", + "Physical Activity \n", + "low 32 12 44\n", + "medium 14 22 36\n", + "high 6 9 15\n", + "total 51 43 95" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df =pd.DataFrame({\"Yes\":[32,14,6,51], \"No\":[12,22,9,43], \"Total\":[44,36,15,95], \"Physical Activity\":[\"low\", \"medium\", \"high\", \"total\"]})\n", + "df = df.set_index(\"Physical Activity\")\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#H0: Pattern Association\n", + "#H1: Not a Pattern Association\n", + "\n", + "from scipy import stats\n", + "\n", + "f_obs = [\n", + " [32, 12],\n", + " [14, 22],\n", + " [6, 9]\n", + "]\n", + "\n", + "output = stats.chi2_contingency(f_obs)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.004719280137040844\n" + ] + } + ], + "source": [ + "print(output[1])" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "#your answer here" + "# we can reject the hypothesis \n", + "# there is a no pattern " ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -103,7 +323,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.11.4" } }, "nbformat": 4,