ta-data-lis · AnaCarvalho84 · Aug 19, 2023
diff --git a/your_code/main.ipynb b/your_code/main.ipynb
@@ -14,11 +14,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Chi-squared statistic: 6.491310681109821\n",
+      "P-value: 0.4836889068537269\n",
+      "Fail to reject the null hypothesis: The data follows a Poisson distribution.\n"
+     ]
+    }
+   ],
    "source": [
-    "# your answer here"
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import scipy.stats as st\n",
+    "\n",
+    "from scipy.stats import poisson\n",
+    "\n",
+    "significance = 0.05\n",
+    "mu = 2.435\n",
+    "f_obs = np.array([35, 99, 104, 110, 62, 25, 10, 3])\n",
+    "poisson_dist = poisson(mu)\n",
+    "\n",
+    "poisson_pmfs = np.array([poisson_dist.pmf(i) for i in range(0, 7)])\n",
+    "poisson_pmfs_last = np.append(poisson_pmfs, 1 - poisson_pmfs.sum())\n",
+    "f_exp = poisson_pmfs_last * 448\n",
+    "\n",
+    "# Perform the chi-squared goodness-of-fit test\n",
+    "chi_squared, p_value = st.chisquare(f_obs=f_obs, f_exp=f_exp)\n",
+    "\n",
+    "# Compare p-value with significance level\n",
+    "if p_value < significance:\n",
+    "    result = \"Reject the null hypothesis: The data does not follow a Poisson distribution.\"\n",
+    "else:\n",
+    "    result = \"Fail to reject the null hypothesis: The data follows a Poisson distribution.\"\n",
+    "\n",
+    "print(\"Chi-squared statistic:\", chi_squared)\n",
+    "print(\"P-value:\", p_value)\n",
+    "print(result)"
    ]
   },
   {
@@ -41,7 +77,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -60,11 +96,59 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "We can reject the null hypothesis\n",
+      "p-value: 0.015715783395950887\n"
+     ]
+    }
+   ],
    "source": [
-    "# your answer here"
+    "from scipy.stats import binom\n",
+    "\n",
+    "# H0: The sample comes from a binomial population (with n = 10 and p = 0.05)\n",
+    "# H1: The sample does not come from a binomial population (with n = 10 and p = 0.05)\n",
+    "\n",
+    "# Observed frequencies of defective tires\n",
+    "O = np.array([138, 53, 9])\n",
+    "\n",
+    "population = O.sum()\n",
+    "n = 10\n",
+    "p = 0.05\n",
+    "alpha = 0.05 \n",
+    "\n",
+    "binom_dist = binom(n, p)\n",
+    "\n",
+    "\n",
+    "# Calculate the entire PMF for 0 to 2 or more\n",
+    "binom_pmfs = np.array([binom_dist.pmf(i) for i in range(0, 2)])\n",
+    "\n",
+    "tail = 1 - binom_pmfs.sum()\n",
+    "\n",
+    "binom_with_tail = np.append(binom_pmfs, tail)\n",
+    "\n",
+    "E = binom_with_tail * population\n",
+    "\n",
+    "chisquare_result = st.chisquare(f_obs = O, f_exp = E)\n",
+    "\n",
+    "if chisquare_result.pvalue < alpha:\n",
+    "    print(\"We can reject the null hypothesis\")\n",
+    "else:\n",
+    "    print(\"We can not reject the null hypothesis\")\n",
+    "    \n",
+    "print(\"p-value:\", chisquare_result.pvalue)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    " Since this p-value is less than the chosen significance level (alpha = 0.05), i can reject the null hypothesis. This suggests that there is evidence to believe that the observed data does not follow a binomial distribution.The result indicates that the distribution of defective tires in the sample significantly deviates from what would be expected under the assumed binomial distribution."
    ]
   },
   {
@@ -79,11 +163,59 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Chi-squared statistic: 10.712198008709638\n",
+      "P-value: 0.004719280137040844\n",
+      "Degrees of freedom: 2\n",
+      "Expected frequencies:\n",
+      " [[24.08421053 19.91578947]\n",
+      " [19.70526316 16.29473684]\n",
+      " [ 8.21052632  6.78947368]]\n",
+      "We can reject the null hypothesis\n"
+     ]
+    }
+   ],
+   "source": [
+    "from scipy.stats import chi2_contingency\n",
+    "\n",
+    "#Hypothesis\n",
+    "\n",
+    "#H0 : Physical Activity is independent of sugar drinks consumption \n",
+    "#H1 : Physical Activity is not independent of sugar drinks consumption\n",
+    "\n",
+    "alpha = 0.05\n",
+    "\n",
+    "\n",
+    "children = [[32, 12],\n",
+    "           [14, 22],\n",
+    "           [6,9]]\n",
+    "\n",
+    "chi2_stat, p_val, dof, expected = chi2_contingency(children)\n",
+    "\n",
+    "print(\"Chi-squared statistic:\", chi2_stat)\n",
+    "print(\"P-value:\", p_val)\n",
+    "print(\"Degrees of freedom:\", dof)\n",
+    "print(\"Expected frequencies:\\n\", expected)\n",
+    "\n",
+    "if p_val < alpha:\n",
+    "    print(\"We can reject the null hypothesis\")\n",
+    "else:\n",
+    "    print(\"We cannot reject the null hypothesis\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#your answer here"
+    "#The rejection of the null hypothesis suggests that there might be a meaningful connection between physical activity and sugar drinks consumption"
    ]
   }
  ],
@@ -103,7 +235,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.3"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,