Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
228 changes: 218 additions & 10 deletions your_code/main.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"import scipy.stats as st\n",
"from scipy.stats import poisson\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -14,11 +25,95 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.08759775, 0.21330051, 0.25969338, 0.21078446, 0.12831504,\n",
" 0.06248942, 0.02536029])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#H0: our observations follow Poisson\n",
"#H1: our observations do not follow Poisson\n",
"observed=np.array([35,99,104,110,62,25,10,3])\n",
"mu=2.435\n",
"poisson_dist=poisson(mu)\n",
"\n",
"#we have observed but not expected\n",
"#we know probability of values 0 to 6 under poisson\n",
"poisson_pmfs=np.array([poisson_dist.pmf(i) for i in range(0,7)])\n",
"poisson_pmfs\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.08759775, 0.21330051, 0.25969338, 0.21078446, 0.12831504,\n",
" 0.06248942, 0.02536029, 0.01245915])"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#to account for >7 observations\n",
"with_tail=np.append(poisson_pmfs,1-poisson_pmfs.sum())\n",
"with_tail"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# your answer here"
"#448 observations total\n",
"expected=with_tail*448"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Power_divergenceResult(statistic=6.491310681109821, pvalue=0.4836889068537269)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"st.chisquare(f_exp=expected,f_obs=observed)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"#we do not reject that our observations follow a poisson, because p value is greater than significance 0.05"
]
},
{
Expand All @@ -41,11 +136,56 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"2.0"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# your code here"
"mu=(0+1+3+4)/4\n",
"mu"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "For each axis slice, the sum of the observed frequencies must agree with the sum of the expected frequencies to a relative tolerance of 1e-08, but the percent differences are:\n0.4778112197861302",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[37], line 11\u001b[0m\n\u001b[0;32m 7\u001b[0m poisson_pmfs\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39marray([poisson_dist\u001b[38;5;241m.\u001b[39mpmf(i) \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;241m0\u001b[39m,\u001b[38;5;241m1\u001b[39m,\u001b[38;5;241m3\u001b[39m,\u001b[38;5;241m4\u001b[39m]])\n\u001b[0;32m 9\u001b[0m expected\u001b[38;5;241m=\u001b[39mpoisson_pmfs\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m60\u001b[39m\n\u001b[1;32m---> 11\u001b[0m st\u001b[38;5;241m.\u001b[39mchisquare(f_obs\u001b[38;5;241m=\u001b[39mobserved,f_exp\u001b[38;5;241m=\u001b[39mexpected)\n",
"File \u001b[1;32mc:\\Users\\valer.DESKTOP-8CM37D7\\anaconda3\\Lib\\site-packages\\scipy\\stats\\_stats_py.py:7553\u001b[0m, in \u001b[0;36mchisquare\u001b[1;34m(f_obs, f_exp, ddof, axis)\u001b[0m\n\u001b[0;32m 7428\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mchisquare\u001b[39m(f_obs, f_exp\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, ddof\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m):\n\u001b[0;32m 7429\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Calculate a one-way chi-square test.\u001b[39;00m\n\u001b[0;32m 7430\u001b[0m \n\u001b[0;32m 7431\u001b[0m \u001b[38;5;124;03m The chi-square test tests the null hypothesis that the categorical data\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 7551\u001b[0m \n\u001b[0;32m 7552\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 7553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m power_divergence(f_obs, f_exp\u001b[38;5;241m=\u001b[39mf_exp, ddof\u001b[38;5;241m=\u001b[39mddof, axis\u001b[38;5;241m=\u001b[39maxis,\n\u001b[0;32m 7554\u001b[0m lambda_\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpearson\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
"File \u001b[1;32mc:\\Users\\valer.DESKTOP-8CM37D7\\anaconda3\\Lib\\site-packages\\scipy\\stats\\_stats_py.py:7394\u001b[0m, in \u001b[0;36mpower_divergence\u001b[1;34m(f_obs, f_exp, ddof, axis, lambda_)\u001b[0m\n\u001b[0;32m 7388\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m diff_gt_tol:\n\u001b[0;32m 7389\u001b[0m msg \u001b[38;5;241m=\u001b[39m (\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFor each axis slice, the sum of the observed \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 7390\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfrequencies must agree with the sum of the \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 7391\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexpected frequencies to a relative tolerance \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 7392\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mof \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrtol\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, but the percent differences are:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 7393\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrelative_diff\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m-> 7394\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(msg)\n\u001b[0;32m 7396\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 7397\u001b[0m \u001b[38;5;66;03m# Ignore 'invalid' errors so the edge case of a data set with length 0\u001b[39;00m\n\u001b[0;32m 7398\u001b[0m \u001b[38;5;66;03m# is handled without spurious warnings.\u001b[39;00m\n\u001b[0;32m 7399\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m np\u001b[38;5;241m.\u001b[39merrstate(invalid\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m'\u001b[39m):\n",
"\u001b[1;31mValueError\u001b[0m: For each axis slice, the sum of the observed frequencies must agree with the sum of the expected frequencies to a relative tolerance of 1e-08, but the percent differences are:\n0.4778112197861302"
]
}
],
"source": [
"#H0: our observations follow Poisson\n",
"#H1: our observations do not follow Poisson\n",
"observed=np.array([32,15,9,4])\n",
"mu=2\n",
"poisson_dist=poisson(mu)\n",
"\n",
"poisson_pmfs=np.array([poisson_dist.pmf(i) for i in [0,1,3,4]])\n",
"\n",
"expected=poisson_pmfs*60\n",
"\n",
"st.chisquare(f_obs=observed,f_exp=expected)"
]
},
{
Expand All @@ -60,11 +200,44 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Power_divergenceResult(statistic=8.306179519542805, pvalue=0.015715783395950887)"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"n=10\n",
"p=0.05\n",
"observed=np.array([138,53,9])\n",
"\n",
"from scipy.stats import binom\n",
"binom_dist=binom(n,p)\n",
"\n",
"binom_pmfs=np.array([binom_dist.pmf(i) for i in range(0,2)])\n",
"\n",
"with_tail=np.append(binom_pmfs,1-binom_pmfs.sum())\n",
"\n",
"expected=with_tail*200\n",
"\n",
"st.chisquare(f_obs=observed,f_exp=expected)"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"# your answer here"
"#we can reject that our observations follow a binomial distribution"
]
},
{
Expand All @@ -79,11 +252,46 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"#contingency table\n",
"\n",
"#H0: physical activity is independent of consumption of sugary drinks\n",
"#H1: physical activity is dependent on consumption of sugary drinks\n"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Chi2ContingencyResult(statistic=10.712198008709638, pvalue=0.004719280137040844, dof=2, expected_freq=array([[24.08421053, 19.91578947],\n",
" [19.70526316, 16.29473684],\n",
" [ 8.21052632, 6.78947368]]))"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"table=np.array([[32,12],[14,22],[6,9]])\n",
"st.chi2_contingency(table)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"#your answer here"
"#we can reject the null hypothesis, so there seems to be a correlation between physical activity and consumption of sugary drinks"
]
}
],
Expand All @@ -103,7 +311,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
"version": "3.11.4"
}
},
"nbformat": 4,
Expand Down