ta-data-lis · leticiademarchiferreira · May 22, 2023
diff --git a/your_code/main.ipynb b/your_code/main.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -14,14 +15,45 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Chi-square statistic: 0.011613292884118704\n",
+      "p-value: 0.9999832065478385\n",
+      "Critical value: 11.070497693516351\n"
+     ]
+    }
+   ],
    "source": [
-    "# your answer here"
+    "import numpy as np\n",
+    "import scipy.stats as stats\n",
+    "\n",
+    "observed_freq = np.array([35, 99, 104, 110, 62, 25])\n",
+    "\n",
+    "sample_mean = 2.435\n",
+    "\n",
+    "expected_freq = stats.poisson.pmf(range(6), sample_mean) * sum(observed_freq)\n",
+    "\n",
+    "observed_freq_norm = observed_freq / sum(observed_freq)\n",
+    "expected_freq_norm = expected_freq / sum(expected_freq)\n",
+    "\n",
+    "chi2_stat, p_value = stats.chisquare(observed_freq_norm, expected_freq_norm, ddof=1)\n",
+    "\n",
+    "alpha = 0.05\n",
+    "critical_value = stats.chi2.ppf(1 - alpha, df=5)\n",
+    "\n",
+    "print(\"Chi-square statistic:\", chi2_stat)\n",
+    "print(\"p-value:\", p_value)\n",
+    "print(\"Critical value:\", critical_value)\n",
+    "\n"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -41,14 +73,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "For each axis slice, the sum of the observed frequencies must agree with the sum of the expected frequencies to a relative tolerance of 1e-08, but the percent differences are:\n0.22082438453020287",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[20], line 7\u001b[0m\n\u001b[0;32m      4\u001b[0m lambda_param \u001b[39m=\u001b[39m \u001b[39msum\u001b[39m(n_defectives \u001b[39m*\u001b[39m observed_freq) \u001b[39m/\u001b[39m \u001b[39msum\u001b[39m(observed_freq)\n\u001b[0;32m      5\u001b[0m expected_freq \u001b[39m=\u001b[39m stats\u001b[39m.\u001b[39mpoisson\u001b[39m.\u001b[39mpmf(n_defectives, lambda_param) \u001b[39m*\u001b[39m \u001b[39msum\u001b[39m(observed_freq)\n\u001b[1;32m----> 7\u001b[0m chi2_stat, p_value \u001b[39m=\u001b[39m stats\u001b[39m.\u001b[39;49mchisquare(observed_freq, expected_freq, ddof\u001b[39m=\u001b[39;49m\u001b[39mlen\u001b[39;49m(n_defectives)\u001b[39m-\u001b[39;49m\u001b[39m1\u001b[39;49m)\n\u001b[0;32m      9\u001b[0m alpha \u001b[39m=\u001b[39m \u001b[39m0.05\u001b[39m\n\u001b[0;32m     10\u001b[0m df \u001b[39m=\u001b[39m \u001b[39mlen\u001b[39m(n_defectives) \u001b[39m-\u001b[39m \u001b[39m1\u001b[39m\n",
+      "File \u001b[1;32mc:\\Users\\Leticia Demarchi\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\scipy\\stats\\_stats_py.py:7553\u001b[0m, in \u001b[0;36mchisquare\u001b[1;34m(f_obs, f_exp, ddof, axis)\u001b[0m\n\u001b[0;32m   7428\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mchisquare\u001b[39m(f_obs, f_exp\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, ddof\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m, axis\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m):\n\u001b[0;32m   7429\u001b[0m \u001b[39m    \u001b[39m\u001b[39m\"\"\"Calculate a one-way chi-square test.\u001b[39;00m\n\u001b[0;32m   7430\u001b[0m \n\u001b[0;32m   7431\u001b[0m \u001b[39m    The chi-square test tests the null hypothesis that the categorical data\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   7551\u001b[0m \n\u001b[0;32m   7552\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[1;32m-> 7553\u001b[0m     \u001b[39mreturn\u001b[39;00m power_divergence(f_obs, f_exp\u001b[39m=\u001b[39;49mf_exp, ddof\u001b[39m=\u001b[39;49mddof, axis\u001b[39m=\u001b[39;49maxis,\n\u001b[0;32m   7554\u001b[0m                             lambda_\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mpearson\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n",
+      "File \u001b[1;32mc:\\Users\\Leticia Demarchi\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\scipy\\stats\\_stats_py.py:7394\u001b[0m, in \u001b[0;36mpower_divergence\u001b[1;34m(f_obs, f_exp, ddof, axis, lambda_)\u001b[0m\n\u001b[0;32m   7388\u001b[0m     \u001b[39mif\u001b[39;00m diff_gt_tol:\n\u001b[0;32m   7389\u001b[0m         msg \u001b[39m=\u001b[39m (\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mFor each axis slice, the sum of the observed \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m   7390\u001b[0m                \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mfrequencies must agree with the sum of the \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m   7391\u001b[0m                \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mexpected frequencies to a relative tolerance \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m   7392\u001b[0m                \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mof \u001b[39m\u001b[39m{\u001b[39;00mrtol\u001b[39m}\u001b[39;00m\u001b[39m, but the percent differences are:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m   7393\u001b[0m                \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mrelative_diff\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[1;32m-> 7394\u001b[0m         \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(msg)\n\u001b[0;32m   7396\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m   7397\u001b[0m     \u001b[39m# Ignore 'invalid' errors so the edge case of a data set with length 0\u001b[39;00m\n\u001b[0;32m   7398\u001b[0m     \u001b[39m# is handled without spurious warnings.\u001b[39;00m\n\u001b[0;32m   7399\u001b[0m     \u001b[39mwith\u001b[39;00m np\u001b[39m.\u001b[39merrstate(invalid\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mignore\u001b[39m\u001b[39m'\u001b[39m):\n",
+      "\u001b[1;31mValueError\u001b[0m: For each axis slice, the sum of the observed frequencies must agree with the sum of the expected frequencies to a relative tolerance of 1e-08, but the percent differences are:\n0.22082438453020287"
+     ]
+    }
+   ],
    "source": [
-    "# your code here"
+    "observed_freq = np.array([32, 15, 9, 4])\n",
+    "n_defectives = np.array([0, 1, 3, 4])\n",
+    "\n",
+    "lambda_param = sum(n_defectives * observed_freq) / sum(observed_freq)\n",
+    "expected_freq = stats.poisson.pmf(n_defectives, lambda_param) * sum(observed_freq)\n",
+    "\n",
+    "chi2_stat, p_value = stats.chisquare(observed_freq, expected_freq, ddof=len(n_defectives)-1)\n",
+    "\n",
+    "alpha = 0.05\n",
+    "df = len(n_defectives) - 1\n",
+    "critical_value = stats.chi2.ppf(1 - alpha, df=df)\n",
+    "\n",
+    "print(\"Chi-square statistic:\", chi2_stat)\n",
+    "print(\"p-value:\", p_value)\n",
+    "print(\"Critical value:\", critical_value)\n"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -60,14 +121,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "p-value: 0.01150355737929688\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\Leticia Demarchi\\AppData\\Local\\Temp\\ipykernel_6544\\326399162.py:7: DeprecationWarning: 'binom_test' is deprecated in favour of 'binomtest' from version 1.7.0 and will be removed in Scipy 1.12.0.\n",
+      "  p_value = stats.binom_test(defective_tires, n, p)\n"
+     ]
+    }
+   ],
    "source": [
-    "# your answer here"
+    "from scipy import stats\n",
+    "\n",
+    "defective_tires = 3\n",
+    "n = 10\n",
+    "p = 0.05\n",
+    "\n",
+    "p_value = stats.binom_test(defective_tires, n, p)\n",
+    "alpha = 0.05\n",
+    "\n",
+    "print(\"p-value:\", p_value)\n"
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -79,12 +166,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 28,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Chi-square statistic: 3.8125000000000004\n",
+      "p-value: 0.14863673053812249\n",
+      "Degrees of freedom: 2\n",
+      "Expected frequencies:\n",
+      "[[16.66666667 33.33333333]\n",
+      " [13.33333333 26.66666667]\n",
+      " [15.         30.        ]]\n"
+     ]
+    }
+   ],
    "source": [
-    "#your answer here"
+    "import numpy as np\n",
+    "from scipy import stats\n",
+    "\n",
+    "chi2_stat, p_value, dof, expected_freq = stats.chi2_contingency(observed_freq)\n",
+    "alpha = 0.05\n",
+    "\n",
+    "print(\"Chi-square statistic:\", chi2_stat)\n",
+    "print(\"p-value:\", p_value)\n",
+    "print(\"Degrees of freedom:\", dof)\n",
+    "print(\"Expected frequencies:\")\n",
+    "print(expected_freq)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {