ta-data-lis · HenrikSoeder · Nov 22, 2023
diff --git a/your_code/main.ipynb b/your_code/main.ipynb
@@ -12,13 +12,66 @@
     "Based on these results, we create a Poisson distribution with the sample mean parameter  = 2.435. Is there any reason to believe that at a .05 level the number of scores is a Poisson variable?"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import scipy.stats as st\n",
+    "from scipy.stats import bernoulli, binom, geom, poisson, uniform, expon, norm,chisquare\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#H0: follows poisson\n",
+    "#H1: does not follow poisson\n",
+    "\n",
+    "points = np.array([35, 99, 104, 110, 62, 25, 10, 3])\n",
+    "\n",
+    "mu = 2.435\n",
+    "poisson_dist = poisson(mu)\n",
+    "\n",
+    "poisson_pmf = np.array([poisson_dist.pmf(i) for i in range(7)])\n",
+    "\n",
+    "with_tail = np.append(poisson_pmf, 1-poisson_pmf.sum())\n",
+    "f_exp = with_tail*sum(points)\n",
+    "\n",
+    "\n",
+    "output = st.chisquare(f_obs = f_obs, f_exp = f_exp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The p_value is: 0.4836889068537269\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"The p_value is:\", output[1])"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your answer here"
+    "# we can not reject the hypothesis ==> it might be a poisson distribution"
    ]
   },
   {
@@ -60,11 +113,52 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your answer here"
+    "#H0: follows Binominal\n",
+    "#H1: Does not follow Binominal\n",
+    "\n",
+    "f_obs = np.array([138,53,9])\n",
+    "\n",
+    "n = 10\n",
+    "p = 0.05\n",
+    "\n",
+    "binom_dist = binom(n,p)\n",
+    "pmfs = np.array([binom_dist.pmf(i) for i in range(2)])\n",
+    "\n",
+    "with_tail = np.append(pmfs, 1 - pmfs.sum())\n",
+    "f_exp = with_tail*sum(f_obs)\n",
+    "\n",
+    "output = st.chisquare(f_obs = f_obs, f_exp = f_exp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "P_value 0.015715783395950946\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"P_value\", output[1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#can reject the hypothesis\n",
+    "# it does not follow a binomial distribution with 0.05"
    ]
   },
   {
@@ -77,19 +171,145 @@
     "![](table4.png)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Yes</th>\n",
+       "      <th>No</th>\n",
+       "      <th>Total</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Physical Activity</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>low</th>\n",
+       "      <td>32</td>\n",
+       "      <td>12</td>\n",
+       "      <td>44</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>medium</th>\n",
+       "      <td>14</td>\n",
+       "      <td>22</td>\n",
+       "      <td>36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>high</th>\n",
+       "      <td>6</td>\n",
+       "      <td>9</td>\n",
+       "      <td>15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>total</th>\n",
+       "      <td>51</td>\n",
+       "      <td>43</td>\n",
+       "      <td>95</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                   Yes  No  Total\n",
+       "Physical Activity                \n",
+       "low                 32  12     44\n",
+       "medium              14  22     36\n",
+       "high                 6   9     15\n",
+       "total               51  43     95"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df =pd.DataFrame({\"Yes\":[32,14,6,51], \"No\":[12,22,9,43], \"Total\":[44,36,15,95], \"Physical Activity\":[\"low\", \"medium\", \"high\", \"total\"]})\n",
+    "df = df.set_index(\"Physical Activity\")\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "#H0: Pattern Association\n",
+    "#H1: Not a Pattern Association\n",
+    "\n",
+    "from scipy import stats\n",
+    "\n",
+    "f_obs = [\n",
+    "    [32, 12],\n",
+    "    [14, 22],\n",
+    "    [6, 9]\n",
+    "]\n",
+    "\n",
+    "output = stats.chi2_contingency(f_obs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.004719280137040844\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(output[1])"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#your answer here"
+    "# we can reject the hypothesis \n",
+    "# there is a no pattern "
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -103,7 +323,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.3"
+   "version": "3.11.4"
   }
  },
  "nbformat": 4,