ta-data-lis · ekraj-pok · May 14, 2023
diff --git a/your_code/main.ipynb b/your_code/main.ipynb
@@ -14,11 +14,54 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your answer here"
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import scipy.stats as st\n",
+    "from scipy.stats import poisson\n",
+    "from scipy.stats import binom"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "p_value is: 0.483688906853727\n",
+      "We can not reject the null hypothesis: there is reason to believe that at a .05 level the number of scores is a Poisson variable\n"
+     ]
+    }
+   ],
+   "source": [
+    "# your answer here\n",
+    "# our observation O:\n",
+    "O = [35,99,104,110,62,25,10,3]\n",
+    "alpha = 0.05\n",
+    "mu = 2.435\n",
+    "poisson_dist = poisson(mu)\n",
+    "\n",
+    "# poisson distribution for 1 to 6th value of probability\n",
+    "poisson_pmfs = np.array([poisson_dist.pmf(i) for i in range(0,7)])  \n",
+    "# probabilityo of scoring more than 7 will be 1 - cumulative probability of getting less than 7 i.e. sum of all value poisson_pmfs\n",
+    "poisson_pmfs_tail = 1-sum(poisson_pmfs) \n",
+    "# Now our final pmfs will be with the tail value\n",
+    "with_tail = np.append(poisson_pmfs, poisson_pmfs_tail)\n",
+    "# Calcualte the expected score according to our calculated pmfs where population will be the sum of our observed score\n",
+    "E = with_tail * sum(O)\n",
+    "\n",
+    "stats, p_value = st.chisquare(f_obs = O, f_exp  = E)\n",
+    "print(f\"p_value is: {p_value}\")\n",
+    "if p_value < alpha:\n",
+    "    print(\"We can reject the null hypothesis :  there is no reason to believe that at a .05 level the number of scores is a Poisson variable\")\n",
+    "else: \n",
+    "    print(\"We can not reject the null hypothesis: there is reason to believe that at a .05 level the number of scores is a Poisson variable\")\n"
    ]
   },
   {
@@ -41,11 +84,50 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "p_value is: 3.008342324775625e-07\n",
+      "We can reject the null hypothesis :  there is no reason to believe that at a .05 level the number of defective items follow Poisson distribution\n"
+     ]
+    }
+   ],
    "source": [
-    "# your code here"
+    "# your code here\n",
+    "defective_quantity = np.array([0, 1, 2, 3, 4])\n",
+    "O = np.array([32,15, 0, 9, 4])\n",
+    "# mu = ∑(x * f(x)) / n\n",
+    "mu = ((0*32)+(1*15)+(3*15)+(4*4))/60\n",
+    "alpha = 0.05\n",
+    "poisson_dist = poisson(mu)\n",
+    "\n",
+    "# poisson distribution for each observed defective items\n",
+    "pmf_0 = poisson_dist.pmf(0)\n",
+    "pmf_1 = poisson_dist.pmf(1)\n",
+    "pmf_2 = poisson_dist.pmf(2)\n",
+    "pmf_3 = poisson_dist.pmf(3)\n",
+    "pmf_4 = poisson_dist.pmf(4)\n",
+    "\n",
+    "\n",
+    "# calculate pmf for each occurance\n",
+    "pmfs = np.array([pmf_0, pmf_1, pmf_2, pmf_3, pmf_4])\n",
+    "# Calcualte the expected frequency according to our calculated pmfs where population will be the sum of our observed frequency\n",
+    "E = pmfs * sum(O)\n",
+    "# normalize the expected frequency with the observed frequency\n",
+    "E_norm = E / np.sum(E) * np.sum(O)\n",
+    "\n",
+    "stats, p_value = st.chisquare(f_obs = O, f_exp  = E_norm, ddof = 1)\n",
+    "\n",
+    "print(f\"p_value is: {p_value}\")\n",
+    "if p_value < alpha:\n",
+    "    print(\"We can reject the null hypothesis :  there is no reason to believe that at a .05 level the number of defective items follow Poisson distribution\")\n",
+    "else: \n",
+    "    print(\"We can not reject the null hypothesis: there is reason to believe that at a .05 level the number of defective items follow Poisson distribution\")\n",
+    "\n"
    ]
   },
   {
@@ -60,11 +142,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 40,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "8.306179519542757\n",
+      "P_value: 0.015715783395951262\n",
+      "We can reject the null hypothesis\n"
+     ]
+    }
+   ],
    "source": [
-    "# your answer here"
+    "# your answer here\n",
+    "n = 10\n",
+    "p = 0.05\n",
+    "O = np.array([138, 53, 9])\n",
+    "Population = O.sum()\n",
+    "\n",
+    "binom_dist = binom(n,p)\n",
+    "binom_pmfs = np.array([binom_dist.pmf(i) for i in range(2)])\n",
+    "pmf_tail = 1 - binom_pmfs.sum()\n",
+    "with_tail = np.append(binom_pmfs, pmf_tail)\n",
+    "\n",
+    "E = with_tail * Population\n",
+    "\n",
+    "stats, p_value = st.chisquare(O, E)\n",
+    "\n",
+    "print(stats)\n",
+    "print(f\"P_value: {p_value}\")\n",
+    "\n",
+    "if p_value < alpha:\n",
+    "    print(\"We can reject the null hypothesis\")\n",
+    "else:\n",
+    "    print(\"We can not reject the null hypothesis\")\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
    ]
   },
   {
@@ -79,17 +196,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 47,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.004719280137040844\n",
+      "We can reject the null hypothesis: There is association between physical activity and sugary drinks consumption\n"
+     ]
+    }
+   ],
    "source": [
-    "#your answer here"
+    "#your answer here\n",
+    "\n",
+    "# H0: there is no association between physical activity and the consumption of sugary drinks\n",
+    "# H1: there is an association between physical activity and the consumption of sugary drinks\n",
+    "\n",
+    "alpha = 0.05\n",
+    "category = np.array([[32, 12],\n",
+    "                     [14, 22],\n",
+    "                     [6, 9]])\n",
+    "stats, p_value, df, E = st.chi2_contingency(category)\n",
+    "print(p_value)\n",
+    "if p_value < alpha:\n",
+    "    print(\"We can reject the null hypothesis: There is association between physical activity and sugary drinks consumption\")\n",
+    "else:\n",
+    "    print(\"We can not reject the null hypothesis. We can not say that there is association between phsical activity and sugary drinks consumption\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -103,7 +250,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.3"
+   "version": "3.9.13"
   }
  },
  "nbformat": 4,