diff --git a/your-code/main.ipynb b/your-code/main.ipynb
old mode 100755
new mode 100644
index 59b955a..197ae9e
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,12 +12,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# import numpy and pandas\n",
- "\n"
+ "import numpy as np\n",
+ "import pandas as pd"
]
},
{
@@ -31,11 +32,132 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Job Titles | \n",
+ " Department | \n",
+ " Full or Part-Time | \n",
+ " Salary or Hourly | \n",
+ " Typical Hours | \n",
+ " Annual Salary | \n",
+ " Hourly Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " AARON, JEFFERY M | \n",
+ " SERGEANT | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101442.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " AARON, KARINA | \n",
+ " POLICE OFFICER (ASSIGNED AS DETECTIVE) | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 94122.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AARON, KIMBERLEI R | \n",
+ " CHIEF CONTRACT EXPEDITER | \n",
+ " GENERAL SERVICES | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101592.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ABAD JR, VICENTE M | \n",
+ " CIVIL ENGINEER IV | \n",
+ " WATER MGMNT | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 110064.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ABASCAL, REECE E | \n",
+ " TRAFFIC CONTROL AIDE-HOURLY | \n",
+ " OEMC | \n",
+ " P | \n",
+ " Hourly | \n",
+ " 20.0 | \n",
+ " NaN | \n",
+ " 19.86 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Job Titles \\\n",
+ "0 AARON, JEFFERY M SERGEANT \n",
+ "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n",
+ "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n",
+ "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n",
+ "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n",
+ "\n",
+ " Department Full or Part-Time Salary or Hourly Typical Hours \\\n",
+ "0 POLICE F Salary NaN \n",
+ "1 POLICE F Salary NaN \n",
+ "2 GENERAL SERVICES F Salary NaN \n",
+ "3 WATER MGMNT F Salary NaN \n",
+ "4 OEMC P Hourly 20.0 \n",
+ "\n",
+ " Annual Salary Hourly Rate \n",
+ "0 101442.0 NaN \n",
+ "1 94122.0 NaN \n",
+ "2 101592.0 NaN \n",
+ "3 110064.0 NaN \n",
+ "4 NaN 19.86 "
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "data = pd.read_csv(\"Current_Employee_Names__Salaries__and_Position_Titles.csv\")\n",
+ "data.head()"
]
},
{
@@ -47,12 +169,132 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Job Titles | \n",
+ " Department | \n",
+ " Full or Part-Time | \n",
+ " Salary or Hourly | \n",
+ " Typical Hours | \n",
+ " Annual Salary | \n",
+ " Hourly Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " AARON, JEFFERY M | \n",
+ " SERGEANT | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101442.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " AARON, KARINA | \n",
+ " POLICE OFFICER (ASSIGNED AS DETECTIVE) | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 94122.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AARON, KIMBERLEI R | \n",
+ " CHIEF CONTRACT EXPEDITER | \n",
+ " GENERAL SERVICES | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101592.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ABAD JR, VICENTE M | \n",
+ " CIVIL ENGINEER IV | \n",
+ " WATER MGMNT | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 110064.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ABASCAL, REECE E | \n",
+ " TRAFFIC CONTROL AIDE-HOURLY | \n",
+ " OEMC | \n",
+ " P | \n",
+ " Hourly | \n",
+ " 20.0 | \n",
+ " NaN | \n",
+ " 19.86 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Job Titles \\\n",
+ "0 AARON, JEFFERY M SERGEANT \n",
+ "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n",
+ "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n",
+ "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n",
+ "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n",
+ "\n",
+ " Department Full or Part-Time Salary or Hourly Typical Hours \\\n",
+ "0 POLICE F Salary NaN \n",
+ "1 POLICE F Salary NaN \n",
+ "2 GENERAL SERVICES F Salary NaN \n",
+ "3 WATER MGMNT F Salary NaN \n",
+ "4 OEMC P Hourly 20.0 \n",
+ "\n",
+ " Annual Salary Hourly Rate \n",
+ "0 101442.0 NaN \n",
+ "1 94122.0 NaN \n",
+ "2 101592.0 NaN \n",
+ "3 110064.0 NaN \n",
+ "4 NaN 19.86 "
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "data.head()"
]
},
{
@@ -64,12 +306,32 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Name 0\n",
+ "Job Titles 0\n",
+ "Department 0\n",
+ "Full or Part-Time 0\n",
+ "Salary or Hourly 0\n",
+ "Typical Hours 25161\n",
+ "Annual Salary 8022\n",
+ "Hourly Rate 25161\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "data.isna().sum()"
]
},
{
@@ -81,12 +343,66 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Salary or Hourly | \n",
+ "
\n",
+ " \n",
+ " | Salary or Hourly | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Hourly | \n",
+ " 8022 | \n",
+ "
\n",
+ " \n",
+ " | Salary | \n",
+ " 25161 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Salary or Hourly\n",
+ "Salary or Hourly \n",
+ "Hourly 8022\n",
+ "Salary 25161"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "data.groupby(\"Salary or Hourly\").agg({\"Salary or Hourly\":\"count\"})"
]
},
{
@@ -105,12 +421,262 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Department | \n",
+ " Name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " ADMIN HEARNG | \n",
+ " 39 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " ANIMAL CONTRL | \n",
+ " 81 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AVIATION | \n",
+ " 1629 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " BOARD OF ELECTION | \n",
+ " 107 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " BOARD OF ETHICS | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " BUDGET & MGMT | \n",
+ " 46 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " BUILDINGS | \n",
+ " 269 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " BUSINESS AFFAIRS | \n",
+ " 171 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " CITY CLERK | \n",
+ " 84 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " CITY COUNCIL | \n",
+ " 411 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " COMMUNITY DEVELOPMENT | \n",
+ " 207 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " COPA | \n",
+ " 116 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " CULTURAL AFFAIRS | \n",
+ " 65 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " DISABILITIES | \n",
+ " 28 | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " DoIT | \n",
+ " 99 | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " FAMILY & SUPPORT | \n",
+ " 615 | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " FINANCE | \n",
+ " 560 | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " FIRE | \n",
+ " 4641 | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " GENERAL SERVICES | \n",
+ " 980 | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " HEALTH | \n",
+ " 488 | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " HUMAN RELATIONS | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " HUMAN RESOURCES | \n",
+ " 79 | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " INSPECTOR GEN | \n",
+ " 87 | \n",
+ "
\n",
+ " \n",
+ " | 23 | \n",
+ " LAW | \n",
+ " 407 | \n",
+ "
\n",
+ " \n",
+ " | 24 | \n",
+ " LICENSE APPL COMM | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 25 | \n",
+ " MAYOR'S OFFICE | \n",
+ " 85 | \n",
+ "
\n",
+ " \n",
+ " | 26 | \n",
+ " OEMC | \n",
+ " 2102 | \n",
+ "
\n",
+ " \n",
+ " | 27 | \n",
+ " POLICE | \n",
+ " 13414 | \n",
+ "
\n",
+ " \n",
+ " | 28 | \n",
+ " POLICE BOARD | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | 29 | \n",
+ " PROCUREMENT | \n",
+ " 92 | \n",
+ "
\n",
+ " \n",
+ " | 30 | \n",
+ " PUBLIC LIBRARY | \n",
+ " 1015 | \n",
+ "
\n",
+ " \n",
+ " | 31 | \n",
+ " STREETS & SAN | \n",
+ " 2198 | \n",
+ "
\n",
+ " \n",
+ " | 32 | \n",
+ " TRANSPORTN | \n",
+ " 1140 | \n",
+ "
\n",
+ " \n",
+ " | 33 | \n",
+ " TREASURER | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " | 34 | \n",
+ " WATER MGMNT | \n",
+ " 1879 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Department Name\n",
+ "0 ADMIN HEARNG 39\n",
+ "1 ANIMAL CONTRL 81\n",
+ "2 AVIATION 1629\n",
+ "3 BOARD OF ELECTION 107\n",
+ "4 BOARD OF ETHICS 8\n",
+ "5 BUDGET & MGMT 46\n",
+ "6 BUILDINGS 269\n",
+ "7 BUSINESS AFFAIRS 171\n",
+ "8 CITY CLERK 84\n",
+ "9 CITY COUNCIL 411\n",
+ "10 COMMUNITY DEVELOPMENT 207\n",
+ "11 COPA 116\n",
+ "12 CULTURAL AFFAIRS 65\n",
+ "13 DISABILITIES 28\n",
+ "14 DoIT 99\n",
+ "15 FAMILY & SUPPORT 615\n",
+ "16 FINANCE 560\n",
+ "17 FIRE 4641\n",
+ "18 GENERAL SERVICES 980\n",
+ "19 HEALTH 488\n",
+ "20 HUMAN RELATIONS 16\n",
+ "21 HUMAN RESOURCES 79\n",
+ "22 INSPECTOR GEN 87\n",
+ "23 LAW 407\n",
+ "24 LICENSE APPL COMM 1\n",
+ "25 MAYOR'S OFFICE 85\n",
+ "26 OEMC 2102\n",
+ "27 POLICE 13414\n",
+ "28 POLICE BOARD 2\n",
+ "29 PROCUREMENT 92\n",
+ "30 PUBLIC LIBRARY 1015\n",
+ "31 STREETS & SAN 2198\n",
+ "32 TRANSPORTN 1140\n",
+ "33 TREASURER 22\n",
+ "34 WATER MGMNT 1879"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "data.groupby('Department')['Name'].count().reset_index()"
]
},
{
@@ -124,12 +690,38 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "p-value: 3.601493021499262e-05\n",
+ "We can reject the null hypothesis\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "import scipy.stats as st\n",
+ "\n",
+ "# H0: Hourly wage = 30\n",
+ "# H1: Hourly wage != 30\n",
+ "\n",
+ "alpha = 0.05\n",
+ "\n",
+ "hourly_salaries = data[data[\"Salary or Hourly\"] == \"Hourly\"][\"Hourly Rate\"].sample(500) # Did the statistic analysis with a sample of 500\n",
+ "\n",
+ "stat, p_value = st.ttest_1samp(hourly_salaries,30)\n",
+ "\n",
+ "print(\"p-value: \",p_value)\n",
+ "\n",
+ "if p_value < alpha:\n",
+ " print(\"We can reject the null hypothesis\")\n",
+ "else:\n",
+ " print(\"We cannot reject the null hypothesis\")\n"
]
},
{
@@ -143,12 +735,36 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "p-value: 0.7750192405829639\n",
+ "We cannot reject the null hypothesis\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "# H0: Annual salary >= 86000\n",
+ "# H1: Annual salary < 86000\n",
+ "\n",
+ "alpha = 0.05\n",
+ "\n",
+ "police_salaries = data[(data[\"Department\"]==\"POLICE\") & (data[\"Salary or Hourly\"] == \"Salary\")][\"Annual Salary\"].sample(500)\n",
+ "\n",
+ "stat, p_value = st.ttest_1samp(police_salaries,86000,alternative=\"less\")\n",
+ "\n",
+ "print(\"p-value: \",p_value)\n",
+ "\n",
+ "if p_value < alpha:\n",
+ " print(\"We can reject the null hypothesis\")\n",
+ "else:\n",
+ " print(\"We cannot reject the null hypothesis\")"
]
},
{
@@ -160,11 +776,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Department with the most hourly workers: STREETS & SAN\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
+ "\n",
+ "cross_tab = pd.crosstab(data['Department'], data['Salary or Hourly'])\n",
+ "\n",
+ "department_with_most_hourly_workers = cross_tab['Hourly'].idxmax()\n",
+ "\n",
+ "print(\"Department with the most hourly workers:\", department_with_most_hourly_workers)\n",
+ "\n",
"\n"
]
},
@@ -177,12 +808,37 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "p-value: 0.9999996387725908\n",
+ "We cannot reject the null hypothesis\n"
+ ]
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "# H0: Hourly wage <= 35\n",
+ "# H1: Hourly wage > 35\n",
+ "\n",
+ "alpha = 0.05\n",
+ "\n",
+ "streets_salaries = data[(data[\"Department\"]==\"STREETS & SAN\") & (data[\"Salary or Hourly\"] ==\"Hourly\")][\"Hourly Rate\"].sample(500)\n",
+ "\n",
+ "stat, p_value = st.ttest_1samp(streets_salaries,35,alternative=\"greater\")\n",
+ "\n",
+ "print(\"p-value: \",p_value)\n",
+ "\n",
+ "if p_value < alpha:\n",
+ " print(\"We can reject the null hypothesis\")\n",
+ "else:\n",
+ " print(\"We cannot reject the null hypothesis\")\n",
+ " "
]
},
{
@@ -206,11 +862,29 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(32.24671619247653, 32.315363807523475)"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
+ "\n",
+ "alpha = 0.05\n",
+ "mean = hourly_salaries.mean()\n",
+ "df = len(hourly_salaries)-1 #degrees of freedom\n",
+ "sem = st.sem(hourly_salaries)\n",
+ "\n",
+ "st.t.interval(alpha, df, loc=mean, scale=sem)\n",
"\n"
]
},
@@ -223,12 +897,29 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(86565.63318462267, 86667.99881537734)"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "alpha = 0.05\n",
+ "mean = police_salaries.mean()\n",
+ "df = len(police_salaries)-1 #degrees of freedom\n",
+ "sem = st.sem(police_salaries)\n",
+ "\n",
+ "st.t.interval(alpha, df, loc=mean, scale=sem)"
]
},
{
@@ -257,7 +948,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -271,7 +962,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.10.9"
}
},
"nbformat": 4,