diff --git a/your-code/main.ipynb b/your-code/main.ipynb old mode 100755 new mode 100644 index 59b955a..197ae9e --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,12 +12,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# import numpy and pandas\n", - "\n" + "import numpy as np\n", + "import pandas as pd" ] }, { @@ -31,11 +32,132 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameJob TitlesDepartmentFull or Part-TimeSalary or HourlyTypical HoursAnnual SalaryHourly Rate
0AARON, JEFFERY MSERGEANTPOLICEFSalaryNaN101442.0NaN
1AARON, KARINAPOLICE OFFICER (ASSIGNED AS DETECTIVE)POLICEFSalaryNaN94122.0NaN
2AARON, KIMBERLEI RCHIEF CONTRACT EXPEDITERGENERAL SERVICESFSalaryNaN101592.0NaN
3ABAD JR, VICENTE MCIVIL ENGINEER IVWATER MGMNTFSalaryNaN110064.0NaN
4ABASCAL, REECE ETRAFFIC CONTROL AIDE-HOURLYOEMCPHourly20.0NaN19.86
\n", + "
" + ], + "text/plain": [ + " Name Job Titles \\\n", + "0 AARON, JEFFERY M SERGEANT \n", + "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n", + "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n", + "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n", + "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n", + "\n", + " Department Full or Part-Time Salary or Hourly Typical Hours \\\n", + "0 POLICE F Salary NaN \n", + "1 POLICE F Salary NaN \n", + "2 GENERAL SERVICES F Salary NaN \n", + "3 WATER MGMNT F Salary NaN \n", + "4 OEMC P Hourly 20.0 \n", + "\n", + " Annual Salary Hourly Rate \n", + "0 101442.0 NaN \n", + "1 94122.0 NaN \n", + "2 101592.0 NaN \n", + "3 110064.0 NaN \n", + "4 NaN 19.86 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "data = pd.read_csv(\"Current_Employee_Names__Salaries__and_Position_Titles.csv\")\n", + "data.head()" ] }, { @@ -47,12 +169,132 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameJob TitlesDepartmentFull or Part-TimeSalary or HourlyTypical HoursAnnual SalaryHourly Rate
0AARON, JEFFERY MSERGEANTPOLICEFSalaryNaN101442.0NaN
1AARON, KARINAPOLICE OFFICER (ASSIGNED AS DETECTIVE)POLICEFSalaryNaN94122.0NaN
2AARON, KIMBERLEI RCHIEF CONTRACT EXPEDITERGENERAL SERVICESFSalaryNaN101592.0NaN
3ABAD JR, VICENTE MCIVIL ENGINEER IVWATER MGMNTFSalaryNaN110064.0NaN
4ABASCAL, REECE ETRAFFIC CONTROL AIDE-HOURLYOEMCPHourly20.0NaN19.86
\n", + "
" + ], + "text/plain": [ + " Name Job Titles \\\n", + "0 AARON, JEFFERY M SERGEANT \n", + "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n", + "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n", + "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n", + "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n", + "\n", + " Department Full or Part-Time Salary or Hourly Typical Hours \\\n", + "0 POLICE F Salary NaN \n", + "1 POLICE F Salary NaN \n", + "2 GENERAL SERVICES F Salary NaN \n", + "3 WATER MGMNT F Salary NaN \n", + "4 OEMC P Hourly 20.0 \n", + "\n", + " Annual Salary Hourly Rate \n", + "0 101442.0 NaN \n", + "1 94122.0 NaN \n", + "2 101592.0 NaN \n", + "3 110064.0 NaN \n", + "4 NaN 19.86 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "data.head()" ] }, { @@ -64,12 +306,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Name 0\n", + "Job Titles 0\n", + "Department 0\n", + "Full or Part-Time 0\n", + "Salary or Hourly 0\n", + "Typical Hours 25161\n", + "Annual Salary 8022\n", + "Hourly Rate 25161\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "data.isna().sum()" ] }, { @@ -81,12 +343,66 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Salary or Hourly
Salary or Hourly
Hourly8022
Salary25161
\n", + "
" + ], + "text/plain": [ + " Salary or Hourly\n", + "Salary or Hourly \n", + "Hourly 8022\n", + "Salary 25161" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "data.groupby(\"Salary or Hourly\").agg({\"Salary or Hourly\":\"count\"})" ] }, { @@ -105,12 +421,262 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DepartmentName
0ADMIN HEARNG39
1ANIMAL CONTRL81
2AVIATION1629
3BOARD OF ELECTION107
4BOARD OF ETHICS8
5BUDGET & MGMT46
6BUILDINGS269
7BUSINESS AFFAIRS171
8CITY CLERK84
9CITY COUNCIL411
10COMMUNITY DEVELOPMENT207
11COPA116
12CULTURAL AFFAIRS65
13DISABILITIES28
14DoIT99
15FAMILY & SUPPORT615
16FINANCE560
17FIRE4641
18GENERAL SERVICES980
19HEALTH488
20HUMAN RELATIONS16
21HUMAN RESOURCES79
22INSPECTOR GEN87
23LAW407
24LICENSE APPL COMM1
25MAYOR'S OFFICE85
26OEMC2102
27POLICE13414
28POLICE BOARD2
29PROCUREMENT92
30PUBLIC LIBRARY1015
31STREETS & SAN2198
32TRANSPORTN1140
33TREASURER22
34WATER MGMNT1879
\n", + "
" + ], + "text/plain": [ + " Department Name\n", + "0 ADMIN HEARNG 39\n", + "1 ANIMAL CONTRL 81\n", + "2 AVIATION 1629\n", + "3 BOARD OF ELECTION 107\n", + "4 BOARD OF ETHICS 8\n", + "5 BUDGET & MGMT 46\n", + "6 BUILDINGS 269\n", + "7 BUSINESS AFFAIRS 171\n", + "8 CITY CLERK 84\n", + "9 CITY COUNCIL 411\n", + "10 COMMUNITY DEVELOPMENT 207\n", + "11 COPA 116\n", + "12 CULTURAL AFFAIRS 65\n", + "13 DISABILITIES 28\n", + "14 DoIT 99\n", + "15 FAMILY & SUPPORT 615\n", + "16 FINANCE 560\n", + "17 FIRE 4641\n", + "18 GENERAL SERVICES 980\n", + "19 HEALTH 488\n", + "20 HUMAN RELATIONS 16\n", + "21 HUMAN RESOURCES 79\n", + "22 INSPECTOR GEN 87\n", + "23 LAW 407\n", + "24 LICENSE APPL COMM 1\n", + "25 MAYOR'S OFFICE 85\n", + "26 OEMC 2102\n", + "27 POLICE 13414\n", + "28 POLICE BOARD 2\n", + "29 PROCUREMENT 92\n", + "30 PUBLIC LIBRARY 1015\n", + "31 STREETS & SAN 2198\n", + "32 TRANSPORTN 1140\n", + "33 TREASURER 22\n", + "34 WATER MGMNT 1879" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "data.groupby('Department')['Name'].count().reset_index()" ] }, { @@ -124,12 +690,38 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "p-value: 3.601493021499262e-05\n", + "We can reject the null hypothesis\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "import scipy.stats as st\n", + "\n", + "# H0: Hourly wage = 30\n", + "# H1: Hourly wage != 30\n", + "\n", + "alpha = 0.05\n", + "\n", + "hourly_salaries = data[data[\"Salary or Hourly\"] == \"Hourly\"][\"Hourly Rate\"].sample(500) # Did the statistic analysis with a sample of 500\n", + "\n", + "stat, p_value = st.ttest_1samp(hourly_salaries,30)\n", + "\n", + "print(\"p-value: \",p_value)\n", + "\n", + "if p_value < alpha:\n", + " print(\"We can reject the null hypothesis\")\n", + "else:\n", + " print(\"We cannot reject the null hypothesis\")\n" ] }, { @@ -143,12 +735,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "p-value: 0.7750192405829639\n", + "We cannot reject the null hypothesis\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "# H0: Annual salary >= 86000\n", + "# H1: Annual salary < 86000\n", + "\n", + "alpha = 0.05\n", + "\n", + "police_salaries = data[(data[\"Department\"]==\"POLICE\") & (data[\"Salary or Hourly\"] == \"Salary\")][\"Annual Salary\"].sample(500)\n", + "\n", + "stat, p_value = st.ttest_1samp(police_salaries,86000,alternative=\"less\")\n", + "\n", + "print(\"p-value: \",p_value)\n", + "\n", + "if p_value < alpha:\n", + " print(\"We can reject the null hypothesis\")\n", + "else:\n", + " print(\"We cannot reject the null hypothesis\")" ] }, { @@ -160,11 +776,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Department with the most hourly workers: STREETS & SAN\n" + ] + } + ], "source": [ "# Your code here:\n", + "\n", + "cross_tab = pd.crosstab(data['Department'], data['Salary or Hourly'])\n", + "\n", + "department_with_most_hourly_workers = cross_tab['Hourly'].idxmax()\n", + "\n", + "print(\"Department with the most hourly workers:\", department_with_most_hourly_workers)\n", + "\n", "\n" ] }, @@ -177,12 +808,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "p-value: 0.9999996387725908\n", + "We cannot reject the null hypothesis\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "# H0: Hourly wage <= 35\n", + "# H1: Hourly wage > 35\n", + "\n", + "alpha = 0.05\n", + "\n", + "streets_salaries = data[(data[\"Department\"]==\"STREETS & SAN\") & (data[\"Salary or Hourly\"] ==\"Hourly\")][\"Hourly Rate\"].sample(500)\n", + "\n", + "stat, p_value = st.ttest_1samp(streets_salaries,35,alternative=\"greater\")\n", + "\n", + "print(\"p-value: \",p_value)\n", + "\n", + "if p_value < alpha:\n", + " print(\"We can reject the null hypothesis\")\n", + "else:\n", + " print(\"We cannot reject the null hypothesis\")\n", + " " ] }, { @@ -206,11 +862,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(32.24671619247653, 32.315363807523475)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", + "\n", + "alpha = 0.05\n", + "mean = hourly_salaries.mean()\n", + "df = len(hourly_salaries)-1 #degrees of freedom\n", + "sem = st.sem(hourly_salaries)\n", + "\n", + "st.t.interval(alpha, df, loc=mean, scale=sem)\n", "\n" ] }, @@ -223,12 +897,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(86565.63318462267, 86667.99881537734)" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "alpha = 0.05\n", + "mean = police_salaries.mean()\n", + "df = len(police_salaries)-1 #degrees of freedom\n", + "sem = st.sem(police_salaries)\n", + "\n", + "st.t.interval(alpha, df, loc=mean, scale=sem)" ] }, { @@ -257,7 +948,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -271,7 +962,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.10.9" } }, "nbformat": 4,