From c9b1fc3de60662b8e9b35b05e1281f0be4f64d3c Mon Sep 17 00:00:00 2001
From: AnaCarvalho84 <131803922+AnaCarvalho84@users.noreply.github.com>
Date: Tue, 15 Aug 2023 09:17:12 +0100
Subject: [PATCH 1/3] lab done

---
 main.ipynb | 821 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 821 insertions(+)
 create mode 100644 main.ipynb
diff --git a/main.ipynb b/main.ipynb
new file mode 100644
index 0000000..a9460c2
--- /dev/null
+++ b/main.ipynb
@@ -0,0 +1,821 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Before your start:\n",
+    "- Read the README.md file\n",
+    "- Comment as much as you can and use the resources (README.md file)\n",
+    "- Happy learning!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import numpy and pandas\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from scipy.stats import ttest_1samp\n",
+    "import scipy.stats as st\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Challenge 1 - Exploring the Data\n",
+    "\n",
+    "In this challenge, we will examine all salaries of employees of the City of Chicago. We will start by loading the dataset and examining its contents."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chicago = pd.read_csv(\"Current_Employee_Names__Salaries__and_Position_Titles.csv\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Examine the `salaries` dataset using the `head` function below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Job Titles</th>\n",
+       "      <th>Department</th>\n",
+       "      <th>Full or Part-Time</th>\n",
+       "      <th>Salary or Hourly</th>\n",
+       "      <th>Typical Hours</th>\n",
+       "      <th>Annual Salary</th>\n",
+       "      <th>Hourly Rate</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>AARON,  JEFFERY M</td>\n",
+       "      <td>SERGEANT</td>\n",
+       "      <td>POLICE</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Salary</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>101442.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AARON,  KARINA</td>\n",
+       "      <td>POLICE OFFICER (ASSIGNED AS DETECTIVE)</td>\n",
+       "      <td>POLICE</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Salary</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>94122.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AARON,  KIMBERLEI R</td>\n",
+       "      <td>CHIEF CONTRACT EXPEDITER</td>\n",
+       "      <td>GENERAL SERVICES</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Salary</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>101592.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>ABAD JR,  VICENTE M</td>\n",
+       "      <td>CIVIL ENGINEER IV</td>\n",
+       "      <td>WATER MGMNT</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Salary</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>110064.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>ABASCAL,  REECE E</td>\n",
+       "      <td>TRAFFIC CONTROL AIDE-HOURLY</td>\n",
+       "      <td>OEMC</td>\n",
+       "      <td>P</td>\n",
+       "      <td>Hourly</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>19.86</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  Name                              Job Titles  \\\n",
+       "0    AARON,  JEFFERY M                                SERGEANT   \n",
+       "1      AARON,  KARINA   POLICE OFFICER (ASSIGNED AS DETECTIVE)   \n",
+       "2  AARON,  KIMBERLEI R                CHIEF CONTRACT EXPEDITER   \n",
+       "3  ABAD JR,  VICENTE M                       CIVIL ENGINEER IV   \n",
+       "4    ABASCAL,  REECE E             TRAFFIC CONTROL AIDE-HOURLY   \n",
+       "\n",
+       "         Department Full or Part-Time Salary or Hourly  Typical Hours  \\\n",
+       "0            POLICE                 F           Salary            NaN   \n",
+       "1            POLICE                 F           Salary            NaN   \n",
+       "2  GENERAL SERVICES                 F           Salary            NaN   \n",
+       "3       WATER MGMNT                 F           Salary            NaN   \n",
+       "4              OEMC                 P           Hourly           20.0   \n",
+       "\n",
+       "   Annual Salary  Hourly Rate  \n",
+       "0       101442.0          NaN  \n",
+       "1        94122.0          NaN  \n",
+       "2       101592.0          NaN  \n",
+       "3       110064.0          NaN  \n",
+       "4            NaN        19.86  "
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chicago.head()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We see from looking at the `head` function that there is quite a bit of missing data. Let's examine how much missing data is in each column. Produce this output in the cell below"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Name                     0\n",
+       "Job Titles               0\n",
+       "Department               0\n",
+       "Full or Part-Time        0\n",
+       "Salary or Hourly         0\n",
+       "Typical Hours        25161\n",
+       "Annual Salary         8022\n",
+       "Hourly Rate          25161\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chicago_null = chicago.isnull().sum()\n",
+    "chicago_null\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's also look at the count of hourly vs. salaried employees. Write the code in the cell below"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Salary    25161\n",
+       "Hourly     8022\n",
+       "Name: Salary or Hourly, dtype: int64"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chicago['Salary or Hourly'].value_counts()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "What this information indicates is that the table contains information about two types of employees - salaried and hourly. Some columns apply only to one type of employee while other columns only apply to another kind. This is why there are so many missing values. Therefore, we will not do anything to handle the missing values."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "There are different departments in the city. List all departments and the count of employees in each department."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Name</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Department</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>ADMIN HEARNG</th>\n",
+       "      <td>39</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>ANIMAL CONTRL</th>\n",
+       "      <td>81</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVIATION</th>\n",
+       "      <td>1629</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BOARD OF ELECTION</th>\n",
+       "      <td>107</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BOARD OF ETHICS</th>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BUDGET &amp; MGMT</th>\n",
+       "      <td>46</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BUILDINGS</th>\n",
+       "      <td>269</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BUSINESS AFFAIRS</th>\n",
+       "      <td>171</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CITY CLERK</th>\n",
+       "      <td>84</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CITY COUNCIL</th>\n",
+       "      <td>411</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>COMMUNITY DEVELOPMENT</th>\n",
+       "      <td>207</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>COPA</th>\n",
+       "      <td>116</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CULTURAL AFFAIRS</th>\n",
+       "      <td>65</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>DISABILITIES</th>\n",
+       "      <td>28</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>DoIT</th>\n",
+       "      <td>99</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>FAMILY &amp; SUPPORT</th>\n",
+       "      <td>615</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>FINANCE</th>\n",
+       "      <td>560</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>FIRE</th>\n",
+       "      <td>4641</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GENERAL SERVICES</th>\n",
+       "      <td>980</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>HEALTH</th>\n",
+       "      <td>488</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>HUMAN RELATIONS</th>\n",
+       "      <td>16</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>HUMAN RESOURCES</th>\n",
+       "      <td>79</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>INSPECTOR GEN</th>\n",
+       "      <td>87</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>LAW</th>\n",
+       "      <td>407</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>LICENSE APPL COMM</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>MAYOR'S OFFICE</th>\n",
+       "      <td>85</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>OEMC</th>\n",
+       "      <td>2102</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>POLICE</th>\n",
+       "      <td>13414</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>POLICE BOARD</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>PROCUREMENT</th>\n",
+       "      <td>92</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>PUBLIC LIBRARY</th>\n",
+       "      <td>1015</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>STREETS &amp; SAN</th>\n",
+       "      <td>2198</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>TRANSPORTN</th>\n",
+       "      <td>1140</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>TREASURER</th>\n",
+       "      <td>22</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WATER MGMNT</th>\n",
+       "      <td>1879</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        Name\n",
+       "Department                  \n",
+       "ADMIN HEARNG              39\n",
+       "ANIMAL CONTRL             81\n",
+       "AVIATION                1629\n",
+       "BOARD OF ELECTION        107\n",
+       "BOARD OF ETHICS            8\n",
+       "BUDGET & MGMT             46\n",
+       "BUILDINGS                269\n",
+       "BUSINESS AFFAIRS         171\n",
+       "CITY CLERK                84\n",
+       "CITY COUNCIL             411\n",
+       "COMMUNITY DEVELOPMENT    207\n",
+       "COPA                     116\n",
+       "CULTURAL AFFAIRS          65\n",
+       "DISABILITIES              28\n",
+       "DoIT                      99\n",
+       "FAMILY & SUPPORT         615\n",
+       "FINANCE                  560\n",
+       "FIRE                    4641\n",
+       "GENERAL SERVICES         980\n",
+       "HEALTH                   488\n",
+       "HUMAN RELATIONS           16\n",
+       "HUMAN RESOURCES           79\n",
+       "INSPECTOR GEN             87\n",
+       "LAW                      407\n",
+       "LICENSE APPL COMM          1\n",
+       "MAYOR'S OFFICE            85\n",
+       "OEMC                    2102\n",
+       "POLICE                 13414\n",
+       "POLICE BOARD               2\n",
+       "PROCUREMENT               92\n",
+       "PUBLIC LIBRARY          1015\n",
+       "STREETS & SAN           2198\n",
+       "TRANSPORTN              1140\n",
+       "TREASURER                 22\n",
+       "WATER MGMNT             1879"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "chicago.groupby(\"Department\").agg({\"Name\": \"count\"})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Challenge 2 - Hypothesis Tests\n",
+    "\n",
+    "In this section of the lab, we will test whether the hourly wage of all hourly workers is significantly different from $30/hr. Import the correct one sample test function from scipy and perform the hypothesis test for a 95% two sided confidence interval."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "TtestResult(statistic=20.6198057854942, pvalue=4.3230240486229894e-92, df=8021)\n",
+      "Reject the null hypothesis: The average hourly wage is significantly different from $30/hr.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "#H0 = The average hourly wage of hourly workers is equal to $30/hour.\n",
+    "#H1 = The average hourly wage of hourly workers is different from $30/hour.\n",
+    "\n",
+    "# 2. Significance level\n",
+    "alpha = 0.05\n",
+    "mu = 30\n",
+    "\n",
+    "# 3. Sample\n",
+    "sample = chicago[chicago[\"Salary or Hourly\"]== \"Hourly\"][\"Hourly Rate\"]\n",
+    "\n",
+    "# 4. Compute statistics / 5. Get p-value\n",
+    "t_statistic, p_value = st.ttest_1samp(sample, mu)\n",
+    "print(st.ttest_1samp(sample, mu))\n",
+    "\n",
+    "if p_value < alpha:\n",
+    "    print(\"Reject the null hypothesis: The average hourly wage is significantly different from $30/hr.\")\n",
+    "else:\n",
+    "    print(\"Fail to reject the null hypothesis: The average hourly wage is not significantly different from $30/hr.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We are also curious about salaries in the police force. The chief of police in Chicago claimed in a press briefing that salaries this year are higher than last year's mean of $86000/year a year for all salaried employees. Test this one sided hypothesis using a 95% confidence interval.\n",
+    "\n",
+    "Hint: A one tailed test has a p-value that is half of the two tailed p-value. If our hypothesis is greater than, then to reject, the test statistic must also be positive."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-statistic: 5.932870515690814\n",
+      "P-value: 0.9999999984921207\n",
+      "We can not reject the null hypothesis\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Hypothesis\n",
+    "#H0 = salaries >= $86000/year for all salaried employees\n",
+    "#H1 =  salaries < $86000\n",
+    "\n",
+    "#Significance\n",
+    "alpha = 0.05\n",
+    "\n",
+    "#Sample\n",
+    "sample = chicago[chicago[\"Salary or Hourly\"] == \"Salary\"]\n",
+    "mu = 86000\n",
+    "annual_salary = sample[\"Annual Salary\"]\n",
+    "\n",
+    "#Compute stastic\n",
+    "t_statistic, p_value = st.ttest_1samp(annual_salary, mu, alternative='less')\n",
+    "\n",
+    "print(f\"T-statistic: {t_statistic}\")\n",
+    "print(f\"P-value: {p_value}\")\n",
+    "\n",
+    "if p_value < alpha:\n",
+    "    print(\"We can reject the null hypothesis\")\n",
+    "else:\n",
+    "    print(\"We can not reject the null hypothesis\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Using the `crosstab` function, find the department that has the most hourly workers. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 92,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Department with the most hourly workers: STREETS & SAN\n",
+      "Number of hourly workers in STREETS & SAN: 1862\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "cross_tab = pd.crosstab(chicago['Department'], chicago['Salary or Hourly'])\n",
+    "\n",
+    "department_with_most_hourly_workers = cross_tab['Hourly'].idxmax()\n",
+    "\n",
+    "hourly_workers_in_streets_san = cross_tab.loc['STREETS & SAN', 'Hourly']\n",
+    "\n",
+    "print(\"Department with the most hourly workers:\", department_with_most_hourly_workers)\n",
+    "print(\"Number of hourly workers in STREETS & SAN:\", hourly_workers_in_streets_san)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The workers from the department with the most hourly workers have complained that their hourly wage is less than $35/hour. Using a one sample t-test, test this one-sided hypothesis at the 95% confidence level."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-statistic: -5.096278326234201\n",
+      "P-value: 0.9999997626711307\n",
+      "We cannot reject the null hypothesis\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "# Hypothesis \n",
+    "#H0 = Workers from Streets & San hourly wage <= 35$\n",
+    "# H1 = Workers from Streets & San hourly wage > 35$\n",
+    "\n",
+    "#Significance\n",
+    "alpha = 0.05\n",
+    "\n",
+    "#Sample\n",
+    "#on my sample i choose 560 from my population 1862 Streets & San (about 30% of the hourly workers)\n",
+    "sample = chicago[(chicago[\"Department\"]==\"STREETS & SAN\") & (chicago[\"Salary or Hourly\"] ==\"Hourly\")][\"Hourly Rate\"].sample(560)\n",
+    "mu = 35\n",
+    "\n",
+    "#stats\n",
+    "t_statistic, p_value = st.ttest_1samp(sample, mu, alternative=\"greater\")\n",
+    "\n",
+    "print(f\"T-statistic: {t_statistic}\")\n",
+    "print(f\"P-value: {p_value}\")\n",
+    "\n",
+    "if p_value < alpha:\n",
+    "    print(\"We can reject the null hypothesis\")\n",
+    "else:\n",
+    "    print(\"We cannot reject the null hypothesis\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Challenge 3: To practice - Constructing Confidence Intervals\n",
+    "\n",
+    "While testing our hypothesis is a great way to gather empirical evidence for accepting or rejecting the hypothesis, another way to gather evidence is by creating a confidence interval. A confidence interval gives us information about the true mean of the population. So for a 95% confidence interval, we are 95% sure that the mean of the population is within the confidence interval. \n",
+    ").\n",
+    "\n",
+    "To read more about confidence intervals, click [here](https://en.wikipedia.org/wiki/Confidence_interval).\n",
+    "\n",
+    "\n",
+    "In the cell below, we will construct a 95% confidence interval for the mean hourly wage of all hourly workers. \n",
+    "\n",
+    "The confidence interval is computed in SciPy using the `t.interval` function. You can read more about this function [here](https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.t.html).\n",
+    "\n",
+    "To compute the confidence interval of the hourly wage, use the 0.95 for the confidence level, number of rows - 1 for degrees of freedom, the mean of the sample for the location parameter and the standard error for the scale. The standard error can be computed using [this](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.sem.html) function in SciPy."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "95% Confidence Interval for Mean Hourly Wage: (32.52345834488425, 33.05365708767623)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Your code here:\n",
+    "\n",
+    "#sample\n",
+    "hourly_workers = chicago[chicago['Salary or Hourly'] == 'Hourly']\n",
+    "\n",
+    "#Mean and std error\n",
+    "mean_hourly_wage = hourly_workers['Hourly Rate'].mean()\n",
+    "standard_error = st.sem(hourly_workers['Hourly Rate'])\n",
+    "\n",
+    "#confidence level\n",
+    "confidence_level = 0.95\n",
+    "\n",
+    "#degrees of freedom\n",
+    "degrees_of_freedom = len(hourly_workers) - 1\n",
+    "\n",
+    "#confidence interval using t.interval\n",
+    "confidence_interval = st.t.interval(confidence_level, degrees_of_freedom, loc=mean_hourly_wage, scale=standard_error)\n",
+    "\n",
+    "print(f\"95% Confidence Interval for Mean Hourly Wage: {confidence_interval}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "***My insights***\n",
+    "I'm  quite confident that the true average hourly wage of all hourly workers lies between approximately $32.52 and $33.05 with a 95% confidence level. This provides me an idea of the likely range within which the true average lies based on the available sample."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now construct the 95% confidence interval for all salaried employeed in the police in the cell below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "95% Confidence Interval for Annual Salary employees in Police: (86476.5176546444, 86496.31135162238)\n"
+     ]
+    }
+   ],
+   "source": [
+    "#sample\n",
+    "salaried_employeed = chicago[(chicago[\"Department\"]==\"POLICE\") & (chicago[\"Salary or Hourly\"] ==\"Salary\")][\"Annual Salary\"]\n",
+    "\n",
+    "#confidance level\n",
+    "confidance_level = 0.95\n",
+    "alpha = 1 - confidance_level\n",
+    "\n",
+    "#degrees of freedom\n",
+    "ddof = len(salaried_employeed) - 1\n",
+    "\n",
+    "#compute mean and std error\n",
+    "mean = salaried_employeed.mean()\n",
+    "standard_error = st.sem(salaried_employeed)\n",
+    "\n",
+    "# Calculate the confidence interval using t.interval\n",
+    "confidence_interval = st.t.interval(alpha, df=ddof, loc=mean, scale=standard_error)\n",
+    "\n",
+    "print(f\"95% Confidence Interval for Annual Salary employees in Police: {confidence_interval}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "***My insights***\n",
+    "I'm quite confident that the true average annual salary of employees in the \"POLICE\" department lies between approximately $86476.52 and $86496.31 with a 95% confidence level. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Bonus Challenge - Hypothesis Tests of Proportions\n",
+    "\n",
+    "Another type of one sample test is a hypothesis test of proportions. In this test, we examine whether the proportion of a group in our sample is significantly different than a fraction. \n",
+    "\n",
+    "You can read more about one sample proportion tests [here](http://sphweb.bumc.bu.edu/otlt/MPH-Modules/BS/SAS/SAS6-CategoricalData/SAS6-CategoricalData2.html).\n",
+    "\n",
+    "In the cell below, use the `proportions_ztest` function from `statsmodels` to perform a hypothesis test that will determine whether the number of hourly workers in the City of Chicago is significantly different from 25% at the 95% confidence level."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Your code here:\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From d9cd6c9cb6253b9007dc48dd81c7f0f488eab201 Mon Sep 17 00:00:00 2001
From: AnaCarvalho84 <131803922+AnaCarvalho84@users.noreply.github.com>
Date: Tue, 15 Aug 2023 09:22:11 +0100
Subject: [PATCH 2/3] lab done

---
 your-code/main.ipynb | 622 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 582 insertions(+), 40 deletions(-)

diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 59b955a..a9460c2 100755
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,12 +12,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [],
    "source": [
     "# import numpy and pandas\n",
-    "\n"
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from scipy.stats import ttest_1samp\n",
+    "import scipy.stats as st\n",
+    "import matplotlib.pyplot as plt"
    ]
   },
   {
@@ -31,11 +35,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here:\n"
+    "chicago = pd.read_csv(\"Current_Employee_Names__Salaries__and_Position_Titles.csv\")\n"
    ]
   },
   {
@@ -47,11 +51,130 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Job Titles</th>\n",
+       "      <th>Department</th>\n",
+       "      <th>Full or Part-Time</th>\n",
+       "      <th>Salary or Hourly</th>\n",
+       "      <th>Typical Hours</th>\n",
+       "      <th>Annual Salary</th>\n",
+       "      <th>Hourly Rate</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>AARON,  JEFFERY M</td>\n",
+       "      <td>SERGEANT</td>\n",
+       "      <td>POLICE</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Salary</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>101442.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>AARON,  KARINA</td>\n",
+       "      <td>POLICE OFFICER (ASSIGNED AS DETECTIVE)</td>\n",
+       "      <td>POLICE</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Salary</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>94122.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>AARON,  KIMBERLEI R</td>\n",
+       "      <td>CHIEF CONTRACT EXPEDITER</td>\n",
+       "      <td>GENERAL SERVICES</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Salary</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>101592.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>ABAD JR,  VICENTE M</td>\n",
+       "      <td>CIVIL ENGINEER IV</td>\n",
+       "      <td>WATER MGMNT</td>\n",
+       "      <td>F</td>\n",
+       "      <td>Salary</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>110064.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>ABASCAL,  REECE E</td>\n",
+       "      <td>TRAFFIC CONTROL AIDE-HOURLY</td>\n",
+       "      <td>OEMC</td>\n",
+       "      <td>P</td>\n",
+       "      <td>Hourly</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>19.86</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  Name                              Job Titles  \\\n",
+       "0    AARON,  JEFFERY M                                SERGEANT   \n",
+       "1      AARON,  KARINA   POLICE OFFICER (ASSIGNED AS DETECTIVE)   \n",
+       "2  AARON,  KIMBERLEI R                CHIEF CONTRACT EXPEDITER   \n",
+       "3  ABAD JR,  VICENTE M                       CIVIL ENGINEER IV   \n",
+       "4    ABASCAL,  REECE E             TRAFFIC CONTROL AIDE-HOURLY   \n",
+       "\n",
+       "         Department Full or Part-Time Salary or Hourly  Typical Hours  \\\n",
+       "0            POLICE                 F           Salary            NaN   \n",
+       "1            POLICE                 F           Salary            NaN   \n",
+       "2  GENERAL SERVICES                 F           Salary            NaN   \n",
+       "3       WATER MGMNT                 F           Salary            NaN   \n",
+       "4              OEMC                 P           Hourly           20.0   \n",
+       "\n",
+       "   Annual Salary  Hourly Rate  \n",
+       "0       101442.0          NaN  \n",
+       "1        94122.0          NaN  \n",
+       "2       101592.0          NaN  \n",
+       "3       110064.0          NaN  \n",
+       "4            NaN        19.86  "
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n",
+    "chicago.head()\n",
     "\n"
    ]
   },
@@ -64,11 +187,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Name                     0\n",
+       "Job Titles               0\n",
+       "Department               0\n",
+       "Full or Part-Time        0\n",
+       "Salary or Hourly         0\n",
+       "Typical Hours        25161\n",
+       "Annual Salary         8022\n",
+       "Hourly Rate          25161\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n",
+    "chicago_null = chicago.isnull().sum()\n",
+    "chicago_null\n",
     "\n"
    ]
   },
@@ -81,12 +224,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 57,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Salary    25161\n",
+       "Hourly     8022\n",
+       "Name: Salary or Hourly, dtype: int64"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "# Your code here:\n",
-    "\n"
+    "chicago['Salary or Hourly'].value_counts()\n"
    ]
   },
   {
@@ -105,12 +260,230 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 49,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Name</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Department</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>ADMIN HEARNG</th>\n",
+       "      <td>39</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>ANIMAL CONTRL</th>\n",
+       "      <td>81</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>AVIATION</th>\n",
+       "      <td>1629</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BOARD OF ELECTION</th>\n",
+       "      <td>107</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BOARD OF ETHICS</th>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BUDGET &amp; MGMT</th>\n",
+       "      <td>46</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BUILDINGS</th>\n",
+       "      <td>269</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>BUSINESS AFFAIRS</th>\n",
+       "      <td>171</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CITY CLERK</th>\n",
+       "      <td>84</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CITY COUNCIL</th>\n",
+       "      <td>411</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>COMMUNITY DEVELOPMENT</th>\n",
+       "      <td>207</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>COPA</th>\n",
+       "      <td>116</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>CULTURAL AFFAIRS</th>\n",
+       "      <td>65</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>DISABILITIES</th>\n",
+       "      <td>28</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>DoIT</th>\n",
+       "      <td>99</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>FAMILY &amp; SUPPORT</th>\n",
+       "      <td>615</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>FINANCE</th>\n",
+       "      <td>560</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>FIRE</th>\n",
+       "      <td>4641</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>GENERAL SERVICES</th>\n",
+       "      <td>980</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>HEALTH</th>\n",
+       "      <td>488</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>HUMAN RELATIONS</th>\n",
+       "      <td>16</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>HUMAN RESOURCES</th>\n",
+       "      <td>79</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>INSPECTOR GEN</th>\n",
+       "      <td>87</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>LAW</th>\n",
+       "      <td>407</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>LICENSE APPL COMM</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>MAYOR'S OFFICE</th>\n",
+       "      <td>85</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>OEMC</th>\n",
+       "      <td>2102</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>POLICE</th>\n",
+       "      <td>13414</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>POLICE BOARD</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>PROCUREMENT</th>\n",
+       "      <td>92</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>PUBLIC LIBRARY</th>\n",
+       "      <td>1015</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>STREETS &amp; SAN</th>\n",
+       "      <td>2198</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>TRANSPORTN</th>\n",
+       "      <td>1140</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>TREASURER</th>\n",
+       "      <td>22</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>WATER MGMNT</th>\n",
+       "      <td>1879</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                        Name\n",
+       "Department                  \n",
+       "ADMIN HEARNG              39\n",
+       "ANIMAL CONTRL             81\n",
+       "AVIATION                1629\n",
+       "BOARD OF ELECTION        107\n",
+       "BOARD OF ETHICS            8\n",
+       "BUDGET & MGMT             46\n",
+       "BUILDINGS                269\n",
+       "BUSINESS AFFAIRS         171\n",
+       "CITY CLERK                84\n",
+       "CITY COUNCIL             411\n",
+       "COMMUNITY DEVELOPMENT    207\n",
+       "COPA                     116\n",
+       "CULTURAL AFFAIRS          65\n",
+       "DISABILITIES              28\n",
+       "DoIT                      99\n",
+       "FAMILY & SUPPORT         615\n",
+       "FINANCE                  560\n",
+       "FIRE                    4641\n",
+       "GENERAL SERVICES         980\n",
+       "HEALTH                   488\n",
+       "HUMAN RELATIONS           16\n",
+       "HUMAN RESOURCES           79\n",
+       "INSPECTOR GEN             87\n",
+       "LAW                      407\n",
+       "LICENSE APPL COMM          1\n",
+       "MAYOR'S OFFICE            85\n",
+       "OEMC                    2102\n",
+       "POLICE                 13414\n",
+       "POLICE BOARD               2\n",
+       "PROCUREMENT               92\n",
+       "PUBLIC LIBRARY          1015\n",
+       "STREETS & SAN           2198\n",
+       "TRANSPORTN              1140\n",
+       "TREASURER                 22\n",
+       "WATER MGMNT             1879"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "chicago.groupby(\"Department\").agg({\"Name\": \"count\"})"
    ]
   },
   {
@@ -124,12 +497,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 65,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "TtestResult(statistic=20.6198057854942, pvalue=4.3230240486229894e-92, df=8021)\n",
+      "Reject the null hypothesis: The average hourly wage is significantly different from $30/hr.\n"
+     ]
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "#H0 = The average hourly wage of hourly workers is equal to $30/hour.\n",
+    "#H1 = The average hourly wage of hourly workers is different from $30/hour.\n",
+    "\n",
+    "# 2. Significance level\n",
+    "alpha = 0.05\n",
+    "mu = 30\n",
+    "\n",
+    "# 3. Sample\n",
+    "sample = chicago[chicago[\"Salary or Hourly\"]== \"Hourly\"][\"Hourly Rate\"]\n",
+    "\n",
+    "# 4. Compute statistics / 5. Get p-value\n",
+    "t_statistic, p_value = st.ttest_1samp(sample, mu)\n",
+    "print(st.ttest_1samp(sample, mu))\n",
+    "\n",
+    "if p_value < alpha:\n",
+    "    print(\"Reject the null hypothesis: The average hourly wage is significantly different from $30/hr.\")\n",
+    "else:\n",
+    "    print(\"Fail to reject the null hypothesis: The average hourly wage is not significantly different from $30/hr.\")"
    ]
   },
   {
@@ -143,12 +542,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 79,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-statistic: 5.932870515690814\n",
+      "P-value: 0.9999999984921207\n",
+      "We can not reject the null hypothesis\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n",
-    "\n"
+    "#Hypothesis\n",
+    "#H0 = salaries >= $86000/year for all salaried employees\n",
+    "#H1 =  salaries < $86000\n",
+    "\n",
+    "#Significance\n",
+    "alpha = 0.05\n",
+    "\n",
+    "#Sample\n",
+    "sample = chicago[chicago[\"Salary or Hourly\"] == \"Salary\"]\n",
+    "mu = 86000\n",
+    "annual_salary = sample[\"Annual Salary\"]\n",
+    "\n",
+    "#Compute stastic\n",
+    "t_statistic, p_value = st.ttest_1samp(annual_salary, mu, alternative='less')\n",
+    "\n",
+    "print(f\"T-statistic: {t_statistic}\")\n",
+    "print(f\"P-value: {p_value}\")\n",
+    "\n",
+    "if p_value < alpha:\n",
+    "    print(\"We can reject the null hypothesis\")\n",
+    "else:\n",
+    "    print(\"We can not reject the null hypothesis\")"
    ]
   },
   {
@@ -160,12 +589,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 92,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Department with the most hourly workers: STREETS & SAN\n",
+      "Number of hourly workers in STREETS & SAN: 1862\n"
+     ]
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "\n",
+    "cross_tab = pd.crosstab(chicago['Department'], chicago['Salary or Hourly'])\n",
+    "\n",
+    "department_with_most_hourly_workers = cross_tab['Hourly'].idxmax()\n",
+    "\n",
+    "hourly_workers_in_streets_san = cross_tab.loc['STREETS & SAN', 'Hourly']\n",
+    "\n",
+    "print(\"Department with the most hourly workers:\", department_with_most_hourly_workers)\n",
+    "print(\"Number of hourly workers in STREETS & SAN:\", hourly_workers_in_streets_san)\n"
    ]
   },
   {
@@ -177,12 +623,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 91,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "T-statistic: -5.096278326234201\n",
+      "P-value: 0.9999997626711307\n",
+      "We cannot reject the null hypothesis\n"
+     ]
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "# Hypothesis \n",
+    "#H0 = Workers from Streets & San hourly wage <= 35$\n",
+    "# H1 = Workers from Streets & San hourly wage > 35$\n",
+    "\n",
+    "#Significance\n",
+    "alpha = 0.05\n",
+    "\n",
+    "#Sample\n",
+    "#on my sample i choose 560 from my population 1862 Streets & San (about 30% of the hourly workers)\n",
+    "sample = chicago[(chicago[\"Department\"]==\"STREETS & SAN\") & (chicago[\"Salary or Hourly\"] ==\"Hourly\")][\"Hourly Rate\"].sample(560)\n",
+    "mu = 35\n",
+    "\n",
+    "#stats\n",
+    "t_statistic, p_value = st.ttest_1samp(sample, mu, alternative=\"greater\")\n",
+    "\n",
+    "print(f\"T-statistic: {t_statistic}\")\n",
+    "print(f\"P-value: {p_value}\")\n",
+    "\n",
+    "if p_value < alpha:\n",
+    "    print(\"We can reject the null hypothesis\")\n",
+    "else:\n",
+    "    print(\"We cannot reject the null hypothesis\")"
    ]
   },
   {
@@ -206,12 +683,45 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 93,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "95% Confidence Interval for Mean Hourly Wage: (32.52345834488425, 33.05365708767623)\n"
+     ]
+    }
+   ],
    "source": [
     "# Your code here:\n",
-    "\n"
+    "\n",
+    "#sample\n",
+    "hourly_workers = chicago[chicago['Salary or Hourly'] == 'Hourly']\n",
+    "\n",
+    "#Mean and std error\n",
+    "mean_hourly_wage = hourly_workers['Hourly Rate'].mean()\n",
+    "standard_error = st.sem(hourly_workers['Hourly Rate'])\n",
+    "\n",
+    "#confidence level\n",
+    "confidence_level = 0.95\n",
+    "\n",
+    "#degrees of freedom\n",
+    "degrees_of_freedom = len(hourly_workers) - 1\n",
+    "\n",
+    "#confidence interval using t.interval\n",
+    "confidence_interval = st.t.interval(confidence_level, degrees_of_freedom, loc=mean_hourly_wage, scale=standard_error)\n",
+    "\n",
+    "print(f\"95% Confidence Interval for Mean Hourly Wage: {confidence_interval}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "***My insights***\n",
+    "I'm  quite confident that the true average hourly wage of all hourly workers lies between approximately $32.52 and $33.05 with a 95% confidence level. This provides me an idea of the likely range within which the true average lies based on the available sample."
    ]
   },
   {
@@ -223,12 +733,44 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 99,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "95% Confidence Interval for Annual Salary employees in Police: (86476.5176546444, 86496.31135162238)\n"
+     ]
+    }
+   ],
    "source": [
-    "# Your code here:\n",
-    "\n"
+    "#sample\n",
+    "salaried_employeed = chicago[(chicago[\"Department\"]==\"POLICE\") & (chicago[\"Salary or Hourly\"] ==\"Salary\")][\"Annual Salary\"]\n",
+    "\n",
+    "#confidance level\n",
+    "confidance_level = 0.95\n",
+    "alpha = 1 - confidance_level\n",
+    "\n",
+    "#degrees of freedom\n",
+    "ddof = len(salaried_employeed) - 1\n",
+    "\n",
+    "#compute mean and std error\n",
+    "mean = salaried_employeed.mean()\n",
+    "standard_error = st.sem(salaried_employeed)\n",
+    "\n",
+    "# Calculate the confidence interval using t.interval\n",
+    "confidence_interval = st.t.interval(alpha, df=ddof, loc=mean, scale=standard_error)\n",
+    "\n",
+    "print(f\"95% Confidence Interval for Annual Salary employees in Police: {confidence_interval}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "***My insights***\n",
+    "I'm quite confident that the true average annual salary of employees in the \"POLICE\" department lies between approximately $86476.52 and $86496.31 with a 95% confidence level. "
    ]
   },
   {
@@ -246,7 +788,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -257,7 +799,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -271,7 +813,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.10.9"
   }
  },
  "nbformat": 4,

From 9f0824cbb7db750f34e88de63be126da586f768f Mon Sep 17 00:00:00 2001
From: AnaCarvalho84 <131803922+AnaCarvalho84@users.noreply.github.com>
Date: Sat, 19 Aug 2023 09:20:40 +0100
Subject: [PATCH 3/3] Lab done

---
 main.ipynb | 47 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/main.ipynb b/main.ipynb
index a9460c2..56e93ad 100644
--- a/main.ipynb
+++ b/main.ipynb
@@ -12,7 +12,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -35,9 +35,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: 'Current_Employee_Names__Salaries__and_Position_Titles.csv'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[2], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m chicago \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39;49mread_csv(\u001b[39m\"\u001b[39;49m\u001b[39mCurrent_Employee_Names__Salaries__and_Position_Titles.csv\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n",
+      "File \u001b[1;32mc:\\Users\\USER\\anaconda3\\lib\\site-packages\\pandas\\util\\_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    209\u001b[0m     \u001b[39melse\u001b[39;00m:\n\u001b[0;32m    210\u001b[0m         kwargs[new_arg_name] \u001b[39m=\u001b[39m new_arg_value\n\u001b[1;32m--> 211\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
+      "File \u001b[1;32mc:\\Users\\USER\\anaconda3\\lib\\site-packages\\pandas\\util\\_decorators.py:331\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    325\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(args) \u001b[39m>\u001b[39m num_allow_args:\n\u001b[0;32m    326\u001b[0m     warnings\u001b[39m.\u001b[39mwarn(\n\u001b[0;32m    327\u001b[0m         msg\u001b[39m.\u001b[39mformat(arguments\u001b[39m=\u001b[39m_format_argument_list(allow_args)),\n\u001b[0;32m    328\u001b[0m         \u001b[39mFutureWarning\u001b[39;00m,\n\u001b[0;32m    329\u001b[0m         stacklevel\u001b[39m=\u001b[39mfind_stack_level(),\n\u001b[0;32m    330\u001b[0m     )\n\u001b[1;32m--> 331\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
+      "File \u001b[1;32mc:\\Users\\USER\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:950\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)\u001b[0m\n\u001b[0;32m    935\u001b[0m kwds_defaults \u001b[39m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m    936\u001b[0m     dialect,\n\u001b[0;32m    937\u001b[0m     delimiter,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    946\u001b[0m     defaults\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39mdelimiter\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39m\"\u001b[39m\u001b[39m,\u001b[39m\u001b[39m\"\u001b[39m},\n\u001b[0;32m    947\u001b[0m )\n\u001b[0;32m    948\u001b[0m kwds\u001b[39m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m--> 950\u001b[0m \u001b[39mreturn\u001b[39;00m _read(filepath_or_buffer, kwds)\n",
+      "File \u001b[1;32mc:\\Users\\USER\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:605\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m    602\u001b[0m _validate_names(kwds\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mnames\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m))\n\u001b[0;32m    604\u001b[0m \u001b[39m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 605\u001b[0m parser \u001b[39m=\u001b[39m TextFileReader(filepath_or_buffer, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwds)\n\u001b[0;32m    607\u001b[0m \u001b[39mif\u001b[39;00m chunksize \u001b[39mor\u001b[39;00m iterator:\n\u001b[0;32m    608\u001b[0m     \u001b[39mreturn\u001b[39;00m parser\n",
+      "File \u001b[1;32mc:\\Users\\USER\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:1442\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m   1439\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39moptions[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m kwds[\u001b[39m\"\u001b[39m\u001b[39mhas_index_names\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m   1441\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles: IOHandles \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m-> 1442\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_engine(f, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mengine)\n",
+      "File \u001b[1;32mc:\\Users\\USER\\anaconda3\\lib\\site-packages\\pandas\\io\\parsers\\readers.py:1735\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m   1733\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m mode:\n\u001b[0;32m   1734\u001b[0m         mode \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m-> 1735\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39m=\u001b[39m get_handle(\n\u001b[0;32m   1736\u001b[0m     f,\n\u001b[0;32m   1737\u001b[0m     mode,\n\u001b[0;32m   1738\u001b[0m     encoding\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m   1739\u001b[0m     compression\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mcompression\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m   1740\u001b[0m     memory_map\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mmemory_map\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mFalse\u001b[39;49;00m),\n\u001b[0;32m   1741\u001b[0m     is_text\u001b[39m=\u001b[39;49mis_text,\n\u001b[0;32m   1742\u001b[0m     errors\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mencoding_errors\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mstrict\u001b[39;49m\u001b[39m\"\u001b[39;49m),\n\u001b[0;32m   1743\u001b[0m     storage_options\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49moptions\u001b[39m.\u001b[39;49mget(\u001b[39m\"\u001b[39;49m\u001b[39mstorage_options\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39mNone\u001b[39;49;00m),\n\u001b[0;32m   1744\u001b[0m )\n\u001b[0;32m   1745\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m   1746\u001b[0m f \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhandles\u001b[39m.\u001b[39mhandle\n",
+      "File \u001b[1;32mc:\\Users\\USER\\anaconda3\\lib\\site-packages\\pandas\\io\\common.py:856\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m    851\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39misinstance\u001b[39m(handle, \u001b[39mstr\u001b[39m):\n\u001b[0;32m    852\u001b[0m     \u001b[39m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[0;32m    853\u001b[0m     \u001b[39m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[0;32m    854\u001b[0m     \u001b[39mif\u001b[39;00m ioargs\u001b[39m.\u001b[39mencoding \u001b[39mand\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mb\u001b[39m\u001b[39m\"\u001b[39m \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m ioargs\u001b[39m.\u001b[39mmode:\n\u001b[0;32m    855\u001b[0m         \u001b[39m# Encoding\u001b[39;00m\n\u001b[1;32m--> 856\u001b[0m         handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39;49m(\n\u001b[0;32m    857\u001b[0m             handle,\n\u001b[0;32m    858\u001b[0m             ioargs\u001b[39m.\u001b[39;49mmode,\n\u001b[0;32m    859\u001b[0m             encoding\u001b[39m=\u001b[39;49mioargs\u001b[39m.\u001b[39;49mencoding,\n\u001b[0;32m    860\u001b[0m             errors\u001b[39m=\u001b[39;49merrors,\n\u001b[0;32m    861\u001b[0m             newline\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[0;32m    862\u001b[0m         )\n\u001b[0;32m    863\u001b[0m     \u001b[39melse\u001b[39;00m:\n\u001b[0;32m    864\u001b[0m         \u001b[39m# Binary mode\u001b[39;00m\n\u001b[0;32m    865\u001b[0m         handle \u001b[39m=\u001b[39m \u001b[39mopen\u001b[39m(handle, ioargs\u001b[39m.\u001b[39mmode)\n",
+      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'Current_Employee_Names__Salaries__and_Position_Titles.csv'"
+     ]
+    }
+   ],
    "source": [
     "chicago = pd.read_csv(\"Current_Employee_Names__Salaries__and_Position_Titles.csv\")\n"
    ]
@@ -51,7 +70,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -187,7 +206,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -224,7 +243,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -260,7 +279,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -497,7 +516,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -542,7 +561,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 79,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -589,7 +608,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 92,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -623,7 +642,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 91,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -683,7 +702,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 93,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -733,7 +752,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 99,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -788,7 +807,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [

	Name	Job Titles	Department	Full or Part-Time	Salary or Hourly	Typical Hours	Annual Salary	Hourly Rate
0	AARON, JEFFERY M	SERGEANT	POLICE	F	Salary	NaN	101442.0	NaN
1	AARON, KARINA	POLICE OFFICER (ASSIGNED AS DETECTIVE)	POLICE	F	Salary	NaN	94122.0	NaN
2	AARON, KIMBERLEI R	CHIEF CONTRACT EXPEDITER	GENERAL SERVICES	F	Salary	NaN	101592.0	NaN
3	ABAD JR, VICENTE M	CIVIL ENGINEER IV	WATER MGMNT	F	Salary	NaN	110064.0	NaN
4	ABASCAL, REECE E	TRAFFIC CONTROL AIDE-HOURLY	OEMC	P	Hourly	20.0	NaN	19.86
	Name
Department
ADMIN HEARNG	39
ANIMAL CONTRL	81
AVIATION	1629
BOARD OF ELECTION	107
BOARD OF ETHICS	8
BUDGET & MGMT	46
BUILDINGS	269
BUSINESS AFFAIRS	171
CITY CLERK	84
CITY COUNCIL	411
COMMUNITY DEVELOPMENT	207
COPA	116
CULTURAL AFFAIRS	65
DISABILITIES	28
DoIT	99
FAMILY & SUPPORT	615
FINANCE	560
FIRE	4641
GENERAL SERVICES	980
HEALTH	488
HUMAN RELATIONS	16
HUMAN RESOURCES	79
INSPECTOR GEN	87
LAW	407
LICENSE APPL COMM	1
MAYOR'S OFFICE	85
OEMC	2102
POLICE	13414
POLICE BOARD	2
PROCUREMENT	92
PUBLIC LIBRARY	1015
STREETS & SAN	2198
TRANSPORTN	1140
TREASURER	22
WATER MGMNT	1879