Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 130 additions & 12 deletions your_code/main.ipynb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -14,14 +15,45 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 17,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Chi-square statistic: 0.011613292884118704\n",
"p-value: 0.9999832065478385\n",
"Critical value: 11.070497693516351\n"
]
}
],
"source": [
"# your answer here"
"import numpy as np\n",
"import scipy.stats as stats\n",
"\n",
"observed_freq = np.array([35, 99, 104, 110, 62, 25])\n",
"\n",
"sample_mean = 2.435\n",
"\n",
"expected_freq = stats.poisson.pmf(range(6), sample_mean) * sum(observed_freq)\n",
"\n",
"observed_freq_norm = observed_freq / sum(observed_freq)\n",
"expected_freq_norm = expected_freq / sum(expected_freq)\n",
"\n",
"chi2_stat, p_value = stats.chisquare(observed_freq_norm, expected_freq_norm, ddof=1)\n",
"\n",
"alpha = 0.05\n",
"critical_value = stats.chi2.ppf(1 - alpha, df=5)\n",
"\n",
"print(\"Chi-square statistic:\", chi2_stat)\n",
"print(\"p-value:\", p_value)\n",
"print(\"Critical value:\", critical_value)\n",
"\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -41,14 +73,43 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 20,
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "ValueError",
"evalue": "For each axis slice, the sum of the observed frequencies must agree with the sum of the expected frequencies to a relative tolerance of 1e-08, but the percent differences are:\n0.22082438453020287",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[20], line 7\u001b[0m\n\u001b[0;32m 4\u001b[0m lambda_param \u001b[39m=\u001b[39m \u001b[39msum\u001b[39m(n_defectives \u001b[39m*\u001b[39m observed_freq) \u001b[39m/\u001b[39m \u001b[39msum\u001b[39m(observed_freq)\n\u001b[0;32m 5\u001b[0m expected_freq \u001b[39m=\u001b[39m stats\u001b[39m.\u001b[39mpoisson\u001b[39m.\u001b[39mpmf(n_defectives, lambda_param) \u001b[39m*\u001b[39m \u001b[39msum\u001b[39m(observed_freq)\n\u001b[1;32m----> 7\u001b[0m chi2_stat, p_value \u001b[39m=\u001b[39m stats\u001b[39m.\u001b[39;49mchisquare(observed_freq, expected_freq, ddof\u001b[39m=\u001b[39;49m\u001b[39mlen\u001b[39;49m(n_defectives)\u001b[39m-\u001b[39;49m\u001b[39m1\u001b[39;49m)\n\u001b[0;32m 9\u001b[0m alpha \u001b[39m=\u001b[39m \u001b[39m0.05\u001b[39m\n\u001b[0;32m 10\u001b[0m df \u001b[39m=\u001b[39m \u001b[39mlen\u001b[39m(n_defectives) \u001b[39m-\u001b[39m \u001b[39m1\u001b[39m\n",
"File \u001b[1;32mc:\\Users\\Leticia Demarchi\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\scipy\\stats\\_stats_py.py:7553\u001b[0m, in \u001b[0;36mchisquare\u001b[1;34m(f_obs, f_exp, ddof, axis)\u001b[0m\n\u001b[0;32m 7428\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mchisquare\u001b[39m(f_obs, f_exp\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, ddof\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m, axis\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m):\n\u001b[0;32m 7429\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Calculate a one-way chi-square test.\u001b[39;00m\n\u001b[0;32m 7430\u001b[0m \n\u001b[0;32m 7431\u001b[0m \u001b[39m The chi-square test tests the null hypothesis that the categorical data\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 7551\u001b[0m \n\u001b[0;32m 7552\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 7553\u001b[0m \u001b[39mreturn\u001b[39;00m power_divergence(f_obs, f_exp\u001b[39m=\u001b[39;49mf_exp, ddof\u001b[39m=\u001b[39;49mddof, axis\u001b[39m=\u001b[39;49maxis,\n\u001b[0;32m 7554\u001b[0m lambda_\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mpearson\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n",
"File \u001b[1;32mc:\\Users\\Leticia Demarchi\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\scipy\\stats\\_stats_py.py:7394\u001b[0m, in \u001b[0;36mpower_divergence\u001b[1;34m(f_obs, f_exp, ddof, axis, lambda_)\u001b[0m\n\u001b[0;32m 7388\u001b[0m \u001b[39mif\u001b[39;00m diff_gt_tol:\n\u001b[0;32m 7389\u001b[0m msg \u001b[39m=\u001b[39m (\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mFor each axis slice, the sum of the observed \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 7390\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mfrequencies must agree with the sum of the \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 7391\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mexpected frequencies to a relative tolerance \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[0;32m 7392\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mof \u001b[39m\u001b[39m{\u001b[39;00mrtol\u001b[39m}\u001b[39;00m\u001b[39m, but the percent differences are:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[0;32m 7393\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mrelative_diff\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[1;32m-> 7394\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(msg)\n\u001b[0;32m 7396\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m 7397\u001b[0m \u001b[39m# Ignore 'invalid' errors so the edge case of a data set with length 0\u001b[39;00m\n\u001b[0;32m 7398\u001b[0m \u001b[39m# is handled without spurious warnings.\u001b[39;00m\n\u001b[0;32m 7399\u001b[0m \u001b[39mwith\u001b[39;00m np\u001b[39m.\u001b[39merrstate(invalid\u001b[39m=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mignore\u001b[39m\u001b[39m'\u001b[39m):\n",
"\u001b[1;31mValueError\u001b[0m: For each axis slice, the sum of the observed frequencies must agree with the sum of the expected frequencies to a relative tolerance of 1e-08, but the percent differences are:\n0.22082438453020287"
]
}
],
"source": [
"# your code here"
"observed_freq = np.array([32, 15, 9, 4])\n",
"n_defectives = np.array([0, 1, 3, 4])\n",
"\n",
"lambda_param = sum(n_defectives * observed_freq) / sum(observed_freq)\n",
"expected_freq = stats.poisson.pmf(n_defectives, lambda_param) * sum(observed_freq)\n",
"\n",
"chi2_stat, p_value = stats.chisquare(observed_freq, expected_freq, ddof=len(n_defectives)-1)\n",
"\n",
"alpha = 0.05\n",
"df = len(n_defectives) - 1\n",
"critical_value = stats.chi2.ppf(1 - alpha, df=df)\n",
"\n",
"print(\"Chi-square statistic:\", chi2_stat)\n",
"print(\"p-value:\", p_value)\n",
"print(\"Critical value:\", critical_value)\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -60,14 +121,40 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 25,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"p-value: 0.01150355737929688\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Leticia Demarchi\\AppData\\Local\\Temp\\ipykernel_6544\\326399162.py:7: DeprecationWarning: 'binom_test' is deprecated in favour of 'binomtest' from version 1.7.0 and will be removed in Scipy 1.12.0.\n",
" p_value = stats.binom_test(defective_tires, n, p)\n"
]
}
],
"source": [
"# your answer here"
"from scipy import stats\n",
"\n",
"defective_tires = 3\n",
"n = 10\n",
"p = 0.05\n",
"\n",
"p_value = stats.binom_test(defective_tires, n, p)\n",
"alpha = 0.05\n",
"\n",
"print(\"p-value:\", p_value)\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -79,12 +166,43 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 28,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Chi-square statistic: 3.8125000000000004\n",
"p-value: 0.14863673053812249\n",
"Degrees of freedom: 2\n",
"Expected frequencies:\n",
"[[16.66666667 33.33333333]\n",
" [13.33333333 26.66666667]\n",
" [15. 30. ]]\n"
]
}
],
"source": [
"#your answer here"
"import numpy as np\n",
"from scipy import stats\n",
"\n",
"chi2_stat, p_value, dof, expected_freq = stats.chi2_contingency(observed_freq)\n",
"alpha = 0.05\n",
"\n",
"print(\"Chi-square statistic:\", chi2_stat)\n",
"print(\"p-value:\", p_value)\n",
"print(\"Degrees of freedom:\", dof)\n",
"print(\"Expected frequencies:\")\n",
"print(expected_freq)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down