diff --git a/your_code/main.ipynb b/your_code/main.ipynb index 7810ccf..b7c8d78 100644 --- a/your_code/main.ipynb +++ b/your_code/main.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -14,14 +15,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chi-square statistic: 0.011613292884118704\n", + "p-value: 0.9999832065478385\n", + "Critical value: 11.070497693516351\n" + ] + } + ], "source": [ - "# your answer here" + "import numpy as np\n", + "import scipy.stats as stats\n", + "\n", + "observed_freq = np.array([35, 99, 104, 110, 62, 25])\n", + "\n", + "sample_mean = 2.435\n", + "\n", + "expected_freq = stats.poisson.pmf(range(6), sample_mean) * sum(observed_freq)\n", + "\n", + "observed_freq_norm = observed_freq / sum(observed_freq)\n", + "expected_freq_norm = expected_freq / sum(expected_freq)\n", + "\n", + "chi2_stat, p_value = stats.chisquare(observed_freq_norm, expected_freq_norm, ddof=1)\n", + "\n", + "alpha = 0.05\n", + "critical_value = stats.chi2.ppf(1 - alpha, df=5)\n", + "\n", + "print(\"Chi-square statistic:\", chi2_stat)\n", + "print(\"p-value:\", p_value)\n", + "print(\"Critical value:\", critical_value)\n", + "\n" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -41,14 +73,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ValueError", + "evalue": "For each axis slice, the sum of the observed frequencies must agree with the sum of the expected frequencies to a relative tolerance of 1e-08, but the percent differences are:\n0.22082438453020287", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[20], line 7\u001b[0m\n\u001b[0;32m 4\u001b[0m lambda_param \u001b[39m=\u001b[39m \u001b[39msum\u001b[39m(n_defectives \u001b[39m*\u001b[39m observed_freq) \u001b[39m/\u001b[39m \u001b[39msum\u001b[39m(observed_freq)\n\u001b[0;32m 5\u001b[0m expected_freq \u001b[39m=\u001b[39m stats\u001b[39m.\u001b[39mpoisson\u001b[39m.\u001b[39mpmf(n_defectives, lambda_param) \u001b[39m*\u001b[39m \u001b[39msum\u001b[39m(observed_freq)\n\u001b[1;32m----> 7\u001b[0m chi2_stat, p_value \u001b[39m=\u001b[39m stats\u001b[39m.\u001b[39;49mchisquare(observed_freq, expected_freq, ddof\u001b[39m=\u001b[39;49m\u001b[39mlen\u001b[39;49m(n_defectives)\u001b[39m-\u001b[39;49m\u001b[39m1\u001b[39;49m)\n\u001b[0;32m 9\u001b[0m alpha \u001b[39m=\u001b[39m \u001b[39m0.05\u001b[39m\n\u001b[0;32m 10\u001b[0m df \u001b[39m=\u001b[39m \u001b[39mlen\u001b[39m(n_defectives) \u001b[39m-\u001b[39m \u001b[39m1\u001b[39m\n", + "File \u001b[1;32mc:\\Users\\Leticia Demarchi\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\scipy\\stats\\_stats_py.py:7553\u001b[0m, in \u001b[0;36mchisquare\u001b[1;34m(f_obs, f_exp, ddof, axis)\u001b[0m\n\u001b[0;32m 7428\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mchisquare\u001b[39m(f_obs, f_exp\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, ddof\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m, axis\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m):\n\u001b[0;32m 7429\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Calculate a one-way chi-square test.\u001b[39;00m\n\u001b[0;32m 7430\u001b[0m \n\u001b[0;32m 7431\u001b[0m \u001b[39m The chi-square test tests the null hypothesis that the categorical data\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 7551\u001b[0m \n\u001b[0;32m 7552\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 7553\u001b[0m \u001b[39mreturn\u001b[39;00m power_divergence(f_obs, f_exp\u001b[39m=\u001b[39;49mf_exp, ddof\u001b[39m=\u001b[39;49mddof, axis\u001b[39m=\u001b[39;49maxis,\n\u001b[0;32m 7554\u001b[0m lambda_\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mpearson\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n", + "File \u001b[1;32mc:\\Users\\Leticia Demarchi\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\scipy\\stats\\_stats_py.py:7394\u001b[0m, in \u001b[0;36mpower_divergence\u001b[1;34m(f_obs, f_exp, ddof, axis, lambda_)\u001b[0m\n\u001b[0;32m 7388\u001b[0m \u001b[39mif\u001b[39;00m diff_gt_tol:\n\u001b[0;32m 7389\u001b[0m msg \u001b[39m=\u001b[39m (\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mFor each axis slice, the sum of the observed \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 7390\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mfrequencies must agree with the sum of the \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 7391\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mexpected frequencies to a relative tolerance \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 7392\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mof \u001b[39m\u001b[39m{\u001b[39;00mrtol\u001b[39m}\u001b[39;00m\u001b[39m, but the percent differences are:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m 7393\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mrelative_diff\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[1;32m-> 7394\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(msg)\n\u001b[0;32m 7396\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 7397\u001b[0m \u001b[39m# Ignore 'invalid' errors so the edge case of a data set with length 0\u001b[39;00m\n\u001b[0;32m 7398\u001b[0m \u001b[39m# is handled without spurious warnings.\u001b[39;00m\n\u001b[0;32m 7399\u001b[0m \u001b[39mwith\u001b[39;00m np\u001b[39m.\u001b[39merrstate(invalid\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mignore\u001b[39m\u001b[39m'\u001b[39m):\n", + "\u001b[1;31mValueError\u001b[0m: For each axis slice, the sum of the observed frequencies must agree with the sum of the expected frequencies to a relative tolerance of 1e-08, but the percent differences are:\n0.22082438453020287" + ] + } + ], "source": [ - "# your code here" + "observed_freq = np.array([32, 15, 9, 4])\n", + "n_defectives = np.array([0, 1, 3, 4])\n", + "\n", + "lambda_param = sum(n_defectives * observed_freq) / sum(observed_freq)\n", + "expected_freq = stats.poisson.pmf(n_defectives, lambda_param) * sum(observed_freq)\n", + "\n", + "chi2_stat, p_value = stats.chisquare(observed_freq, expected_freq, ddof=len(n_defectives)-1)\n", + "\n", + "alpha = 0.05\n", + "df = len(n_defectives) - 1\n", + "critical_value = stats.chi2.ppf(1 - alpha, df=df)\n", + "\n", + "print(\"Chi-square statistic:\", chi2_stat)\n", + "print(\"p-value:\", p_value)\n", + "print(\"Critical value:\", critical_value)\n" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -60,14 +121,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "p-value: 0.01150355737929688\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Leticia Demarchi\\AppData\\Local\\Temp\\ipykernel_6544\\326399162.py:7: DeprecationWarning: 'binom_test' is deprecated in favour of 'binomtest' from version 1.7.0 and will be removed in Scipy 1.12.0.\n", + " p_value = stats.binom_test(defective_tires, n, p)\n" + ] + } + ], "source": [ - "# your answer here" + "from scipy import stats\n", + "\n", + "defective_tires = 3\n", + "n = 10\n", + "p = 0.05\n", + "\n", + "p_value = stats.binom_test(defective_tires, n, p)\n", + "alpha = 0.05\n", + "\n", + "print(\"p-value:\", p_value)\n" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -79,12 +166,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chi-square statistic: 3.8125000000000004\n", + "p-value: 0.14863673053812249\n", + "Degrees of freedom: 2\n", + "Expected frequencies:\n", + "[[16.66666667 33.33333333]\n", + " [13.33333333 26.66666667]\n", + " [15. 30. ]]\n" + ] + } + ], "source": [ - "#your answer here" + "import numpy as np\n", + "from scipy import stats\n", + "\n", + "chi2_stat, p_value, dof, expected_freq = stats.chi2_contingency(observed_freq)\n", + "alpha = 0.05\n", + "\n", + "print(\"Chi-square statistic:\", chi2_stat)\n", + "print(\"p-value:\", p_value)\n", + "print(\"Degrees of freedom:\", dof)\n", + "print(\"Expected frequencies:\")\n", + "print(expected_freq)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {