From e45d1946ca3f031e4a4c39fc32eb75a8ac8df561 Mon Sep 17 00:00:00 2001 From: Jasper Tielmann Date: Tue, 21 Nov 2023 18:07:34 +0000 Subject: [PATCH] Lab done --- your_code/main.ipynb | 230 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 219 insertions(+), 11 deletions(-) diff --git a/your_code/main.ipynb b/your_code/main.ipynb index 7810ccf..bd74f1b 100644 --- a/your_code/main.ipynb +++ b/your_code/main.ipynb @@ -1,5 +1,18 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import scipy.stats as st\n", + "from scipy.stats import bernoulli, binom, geom, poisson, uniform, expon, norm,chisquare\n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -14,17 +27,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 39.24379068 95.5586303 116.34263239 94.43143662 57.48513704\n", + " 27.99526174 11.36141039 5.58170083]\n", + "0.05\n", + "0.4836889068537269\n", + "Can not Reject H0\n" + ] + } + ], "source": [ - "# your answer here" + "#H0: follows poisson\n", + "#H1: does not follow poisson\n", + "\n", + "f_obs = np.array([35, 99, 104, 110, 62, 25, 10, 3])\n", + "\n", + "mu = 2.435\n", + "\n", + "poisson_dist = poisson(mu)\n", + "\n", + "pmfs = np.array([poisson_dist.pmf(i) for i in range(7)])\n", + "with_tail = np.append(pmfs, 1 - pmfs.sum())\n", + "f_exp = with_tail*sum(f_obs)\n", + "print(f_exp)\n", + "p_value = st.chisquare(f_obs = f_obs, f_exp = f_exp)[1]\n", + "print(0.05)\n", + "print(p_value)\n", + "print(\"Can not Reject H0\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ + "\n", + "\n", "## BONUS/OPTIONAL - Question 2\n", "Let's analyze a discrete distribution. To analyze the number of defective items in a factory in the city of MedellĂ­n, we took a random sample of n = 60 articles and observed the number of defectives in the following table:\n", "\n", @@ -60,11 +103,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[119.74738785 63.02494097 17.22767118]\n", + "0.05\n", + "0.015715783395950887\n", + "Reject H0\n" + ] + } + ], "source": [ - "# your answer here" + "#H0: follows Binominal\n", + "#H1: Does not follow Binominal\n", + "\n", + "f_obs = np.array([138,53,9])\n", + "\n", + "n = 10\n", + "p = 0.05\n", + "\n", + "binom_dist = binom(n,p)\n", + "pmfs = np.array([binom_dist.pmf(i) for i in range(2)])\n", + "\n", + "with_tail = np.append(pmfs, 1 - pmfs.sum())\n", + "f_exp = with_tail*sum(f_obs)\n", + "print(f_exp)\n", + "p_value = st.chisquare(f_obs = f_obs, f_exp = f_exp)[1]\n", + "print(0.05)\n", + "print(p_value)\n", + "print(\"Reject H0\")" ] }, { @@ -79,17 +150,154 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YesNoTotal
Physical Activity
low321244
medium142236
high6915
total514395
\n", + "
" + ], + "text/plain": [ + " Yes No Total\n", + "Physical Activity \n", + "low 32 12 44\n", + "medium 14 22 36\n", + "high 6 9 15\n", + "total 51 43 95" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df =pd.DataFrame({\"Yes\":[32,14,6,51], \"No\":[12,22,9,43], \"Total\":[44,36,15,95], \"Physical Activity\":[\"low\", \"medium\", \"high\", \"total\"]})\n", + "df = df.set_index(\"Physical Activity\")\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.05\n", + "0.004719280137040844\n" + ] + } + ], + "source": [ + "#H0: Pattern Association\n", + "#H1: Not a Pattern Association\n", + "\n", + "from scipy import stats\n", + "\n", + "f_obs = [\n", + " [32, 12],\n", + " [14, 22],\n", + " [6, 9]\n", + "]\n", + "\n", + "p = stats.chi2_contingency(f_obs)[1]\n", + "print(0.05)\n", + "print(p)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nWe can not reject the null Hypothesis. Therefore, there is an association between both.\\n'" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your answer here" + "\"\"\"\n", + "We can not reject the null Hypothesis. Therefore, there is an association between both.\n", + "\"\"\"" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -103,7 +311,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.11.3" } }, "nbformat": 4,