From e45d1946ca3f031e4a4c39fc32eb75a8ac8df561 Mon Sep 17 00:00:00 2001
From: Jasper Tielmann <jaspertielmann@web.de>
Date: Tue, 21 Nov 2023 18:07:34 +0000
Subject: [PATCH] Lab done

---
 your_code/main.ipynb | 230 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 219 insertions(+), 11 deletions(-)
diff --git a/your_code/main.ipynb b/your_code/main.ipynb
index 7810ccf..bd74f1b 100644
--- a/your_code/main.ipynb
+++ b/your_code/main.ipynb
@@ -1,5 +1,18 @@
 {
  "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import scipy.stats as st\n",
+    "from scipy.stats import bernoulli, binom, geom, poisson, uniform, expon, norm,chisquare\n"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -14,17 +27,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ 39.24379068  95.5586303  116.34263239  94.43143662  57.48513704\n",
+      "  27.99526174  11.36141039   5.58170083]\n",
+      "0.05\n",
+      "0.4836889068537269\n",
+      "Can not Reject H0\n"
+     ]
+    }
+   ],
    "source": [
-    "# your answer here"
+    "#H0: follows poisson\n",
+    "#H1: does not follow poisson\n",
+    "\n",
+    "f_obs = np.array([35, 99, 104, 110, 62, 25, 10, 3])\n",
+    "\n",
+    "mu = 2.435\n",
+    "\n",
+    "poisson_dist = poisson(mu)\n",
+    "\n",
+    "pmfs = np.array([poisson_dist.pmf(i) for i in range(7)])\n",
+    "with_tail = np.append(pmfs, 1 - pmfs.sum())\n",
+    "f_exp = with_tail*sum(f_obs)\n",
+    "print(f_exp)\n",
+    "p_value = st.chisquare(f_obs = f_obs, f_exp = f_exp)[1]\n",
+    "print(0.05)\n",
+    "print(p_value)\n",
+    "print(\"Can not Reject H0\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "\n",
+    "\n",
     "## BONUS/OPTIONAL - Question 2\n",
     "Let's analyze a discrete distribution. To analyze the number of defective items in a factory in the city of Medellín, we took a random sample of n = 60 articles and observed the number of defectives in the following table:\n",
     "\n",
@@ -60,11 +103,39 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[119.74738785  63.02494097  17.22767118]\n",
+      "0.05\n",
+      "0.015715783395950887\n",
+      "Reject H0\n"
+     ]
+    }
+   ],
    "source": [
-    "# your answer here"
+    "#H0: follows Binominal\n",
+    "#H1: Does not follow Binominal\n",
+    "\n",
+    "f_obs = np.array([138,53,9])\n",
+    "\n",
+    "n = 10\n",
+    "p = 0.05\n",
+    "\n",
+    "binom_dist = binom(n,p)\n",
+    "pmfs = np.array([binom_dist.pmf(i) for i in range(2)])\n",
+    "\n",
+    "with_tail = np.append(pmfs, 1 - pmfs.sum())\n",
+    "f_exp = with_tail*sum(f_obs)\n",
+    "print(f_exp)\n",
+    "p_value = st.chisquare(f_obs = f_obs, f_exp = f_exp)[1]\n",
+    "print(0.05)\n",
+    "print(p_value)\n",
+    "print(\"Reject H0\")"
    ]
   },
   {
@@ -79,17 +150,154 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Yes</th>\n",
+       "      <th>No</th>\n",
+       "      <th>Total</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Physical Activity</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>low</th>\n",
+       "      <td>32</td>\n",
+       "      <td>12</td>\n",
+       "      <td>44</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>medium</th>\n",
+       "      <td>14</td>\n",
+       "      <td>22</td>\n",
+       "      <td>36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>high</th>\n",
+       "      <td>6</td>\n",
+       "      <td>9</td>\n",
+       "      <td>15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>total</th>\n",
+       "      <td>51</td>\n",
+       "      <td>43</td>\n",
+       "      <td>95</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                   Yes  No  Total\n",
+       "Physical Activity                \n",
+       "low                 32  12     44\n",
+       "medium              14  22     36\n",
+       "high                 6   9     15\n",
+       "total               51  43     95"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df =pd.DataFrame({\"Yes\":[32,14,6,51], \"No\":[12,22,9,43], \"Total\":[44,36,15,95], \"Physical Activity\":[\"low\", \"medium\", \"high\", \"total\"]})\n",
+    "df = df.set_index(\"Physical Activity\")\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.05\n",
+      "0.004719280137040844\n"
+     ]
+    }
+   ],
+   "source": [
+    "#H0: Pattern Association\n",
+    "#H1: Not a Pattern Association\n",
+    "\n",
+    "from scipy import stats\n",
+    "\n",
+    "f_obs = [\n",
+    "    [32, 12],\n",
+    "    [14, 22],\n",
+    "    [6, 9]\n",
+    "]\n",
+    "\n",
+    "p = stats.chi2_contingency(f_obs)[1]\n",
+    "print(0.05)\n",
+    "print(p)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'\\nWe can not reject the null Hypothesis. Therefore, there is an association between both.\\n'"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "#your answer here"
+    "\"\"\"\n",
+    "We can not reject the null Hypothesis. Therefore, there is an association between both.\n",
+    "\"\"\""
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -103,7 +311,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.3"
+   "version": "3.11.3"
   }
  },
  "nbformat": 4,