ta-data-lis · danielmdepaoli · Aug 11, 2023
diff --git a/your-code/main.ipynb → your-code/ConfidenceIntervalsLab.ipynb b/your-code/main.ipynb → your-code/ConfidenceIntervalsLab.ipynb
@@ -9,11 +9,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Libraries"
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import scipy.stats as st"
    ]
   },
   {
@@ -32,30 +34,207 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your code here"
+    "heights = np.array([167, 167, 168, 168, 168, 169, 171, 172, 173, 175, 175, 175, 177, 182, 195])\n",
+    "\n",
+    "# Because we have less than 30 observations, we should use the T interval, and not the norm. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Using normal distribution\n",
+    "\n",
+    "mean = np.mean(heights)\n",
+    "4 == np.std(heights)\n",
+    "n = 15"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(172.14308590115726, 174.79024743217607)"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "st.norm.interval(0.8,loc=mean,scale=4/np.sqrt(n))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(170.9117270472475, 176.02160628608584)"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Using T distribution\n",
+    "\n",
+    "s = heights.std(ddof=1)\n",
+    "mean = heights.mean()\n",
+    "n = len(heights)\n",
+    "\n",
+    "st.t.interval(0.8, n-1, loc=mean, scale=s/np.sqrt(n))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Challenge 2 \n",
-    "In a sample of 105 shops selected randomly from an area, we note that 27 of them have had losses in this month. Get an interval for the proportion of businesses in the area with losses to a confidence level of 80% and a confidence level of 90%.\n",
+    "In a sample of 105 shops selected randomly from an area, we noted that 27 of them have had losses in this month. Get an interval for the proportion of businesses in the area with losses to a confidence level of 80% and a confidence level of 90%.\n",
     "\n",
     "**Hint**: function `stats.norm.interval` from `scipy` can help you get through this exercise. "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 50,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# your code here"
+    "losses = 27\n",
+    "p = losses/n\n",
+    "n = 105"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.2815515655446004"
+      ]
+     },
+     "execution_count": 51,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "z_value = st.norm.ppf(1-(1-0.8)/2)\n",
+    "z_value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "z_value = st.norm.ppf(1 - (1-0.8)/2)\n",
+    "se = np.sqrt((p * (1- p))/n)\n",
+    "margin_of_error = z_value * se"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lower_bound = p - margin_of_error\n",
+    "upper_bound = p + margin_of_error"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Our confidence interval for proportion is [0.20248138545542083, 0.3118043288302934]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Results for 80%\n",
+    "\n",
+    "print(f\"Our confidence interval for proportion is [{lower_bound}, {upper_bound}]\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PYTHON WAY 80%\n",
+    "\n",
+    "Use this!!!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0.20248138545542083, 0.3118043288302934)"
+      ]
+     },
+     "execution_count": 56,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 80%\n",
+    "\n",
+    "st.norm.interval(0.8, loc=p, scale=np.sqrt((p * (1- p))/n))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0.1869856177645281, 0.3273000965211861)"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 90%\n",
+    "\n",
+    "st.norm.interval(0.9, loc=p, scale=np.sqrt((p * (1- p))/n))"
    ]
   },
   {
@@ -131,7 +310,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -145,7 +324,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,