diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 59b955a..6af3906 100755 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,12 +12,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "# import numpy and pandas\n", - "\n" + "import numpy as np\n", + "import pandas as pd\n", + "import scipy.stats as st" ] }, { @@ -31,11 +32,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "salaries = pd.read_csv(\"Current_Employee_Names__Salaries__and_Position_Titles.csv\")" ] }, { @@ -47,12 +48,130 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameJob TitlesDepartmentFull or Part-TimeSalary or HourlyTypical HoursAnnual SalaryHourly Rate
0AARON, JEFFERY MSERGEANTPOLICEFSalaryNaN101442.0NaN
1AARON, KARINAPOLICE OFFICER (ASSIGNED AS DETECTIVE)POLICEFSalaryNaN94122.0NaN
2AARON, KIMBERLEI RCHIEF CONTRACT EXPEDITERGENERAL SERVICESFSalaryNaN101592.0NaN
3ABAD JR, VICENTE MCIVIL ENGINEER IVWATER MGMNTFSalaryNaN110064.0NaN
4ABASCAL, REECE ETRAFFIC CONTROL AIDE-HOURLYOEMCPHourly20.0NaN19.86
\n", + "
" + ], + "text/plain": [ + " Name Job Titles \\\n", + "0 AARON, JEFFERY M SERGEANT \n", + "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n", + "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n", + "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n", + "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n", + "\n", + " Department Full or Part-Time Salary or Hourly Typical Hours \\\n", + "0 POLICE F Salary NaN \n", + "1 POLICE F Salary NaN \n", + "2 GENERAL SERVICES F Salary NaN \n", + "3 WATER MGMNT F Salary NaN \n", + "4 OEMC P Hourly 20.0 \n", + "\n", + " Annual Salary Hourly Rate \n", + "0 101442.0 NaN \n", + "1 94122.0 NaN \n", + "2 101592.0 NaN \n", + "3 110064.0 NaN \n", + "4 NaN 19.86 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "salaries.head()" ] }, { @@ -64,12 +183,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Name 0\n", + "Job Titles 0\n", + "Department 0\n", + "Full or Part-Time 0\n", + "Salary or Hourly 0\n", + "Typical Hours 25161\n", + "Annual Salary 8022\n", + "Hourly Rate 25161\n", + "dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "salaries.isnull().sum()" ] }, { @@ -81,12 +218,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Salary 25161\n", + "Hourly 8022\n", + "Name: Salary or Hourly, dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "salaries[\"Salary or Hourly\"].value_counts()" ] }, { @@ -105,12 +254,57 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "POLICE 13414\n", + "FIRE 4641\n", + "STREETS & SAN 2198\n", + "OEMC 2102\n", + "WATER MGMNT 1879\n", + "AVIATION 1629\n", + "TRANSPORTN 1140\n", + "PUBLIC LIBRARY 1015\n", + "GENERAL SERVICES 980\n", + "FAMILY & SUPPORT 615\n", + "FINANCE 560\n", + "HEALTH 488\n", + "CITY COUNCIL 411\n", + "LAW 407\n", + "BUILDINGS 269\n", + "COMMUNITY DEVELOPMENT 207\n", + "BUSINESS AFFAIRS 171\n", + "COPA 116\n", + "BOARD OF ELECTION 107\n", + "DoIT 99\n", + "PROCUREMENT 92\n", + "INSPECTOR GEN 87\n", + "MAYOR'S OFFICE 85\n", + "CITY CLERK 84\n", + "ANIMAL CONTRL 81\n", + "HUMAN RESOURCES 79\n", + "CULTURAL AFFAIRS 65\n", + "BUDGET & MGMT 46\n", + "ADMIN HEARNG 39\n", + "DISABILITIES 28\n", + "TREASURER 22\n", + "HUMAN RELATIONS 16\n", + "BOARD OF ETHICS 8\n", + "POLICE BOARD 2\n", + "LICENSE APPL COMM 1\n", + "Name: Department, dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "salaries[\"Department\"].value_counts()" ] }, { @@ -124,12 +318,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4.3230240486229894e-92\n", + "Reject Null Hypothesis\n" + ] + } + ], "source": [ - "# Your code here:\n", - "\n" + "# Hypothesis\n", + "# H0: average hourly wage != ~30\n", + "# H1: average hours wage = ~30\n", + "\n", + "# Significance Level\n", + "alpha = 0.05\n", + "\n", + "#3 Sample\n", + "sample = salaries[\"Hourly Rate\"]\n", + "sample.dropna(inplace=True)\n", + "\n", + "#4 Compute Statistics\n", + "\n", + "mean_sample = sample.mean()\n", + "std_sample = sample.std(ddof=1)\n", + "n = len(sample)\n", + "\n", + "#5 Get p-value\n", + "print(st.ttest_1samp(sample, 30, alternative = \"two-sided\")[1])\n", + "print(\"Reject Null Hypothesis\")" ] }, { @@ -143,12 +364,56 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.0010301701775482569\n", + "P-value: 0.0010301701775482569\n", + "Reject Null Hypothesis\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jacob\\AppData\\Local\\Temp\\ipykernel_10136\\2356867737.py:10: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " sample.dropna(inplace=True)\n" + ] + } + ], "source": [ - "# Your code here:\n", - "\n" + "# Hypothesis\n", + "# H0: average police salary <= 86.00\n", + "# H1: average police salary > 86.00\n", + "\n", + "# Significance Level\n", + "alpha = 0.05\n", + "\n", + "#3 Sample\n", + "sample = salaries[salaries[\"Department\"]==\"POLICE\"][\"Annual Salary\"]\n", + "sample.dropna(inplace=True)\n", + "\n", + "#4 Compute Statistics\n", + "\n", + "mean_sample = sample.mean()\n", + "std_sample = sample.std(ddof=1)\n", + "n = len(sample)\n", + "\n", + "#5 Get p-value\n", + "print(st.ttest_1samp(sample,86000 , alternative = \"greater\")[1])\n", + "p_value = st.ttest_1samp(sample, 86000, alternative=\"greater\")[1]\n", + "print(\"P-value:\", p_value)\n", + "if p_value < alpha:\n", + " print(\"Reject Null Hypothesis\")\n", + "else:\n", + " print(\"Fail to Reject Null Hypothesis\")" ] }, { @@ -160,12 +425,267 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Salary or HourlyHourlySalary
Department
ADMIN HEARNG039
ANIMAL CONTRL1962
AVIATION1082547
BOARD OF ELECTION0107
BOARD OF ETHICS08
BUDGET & MGMT244
BUILDINGS0269
BUSINESS AFFAIRS7164
CITY CLERK084
CITY COUNCIL64347
COMMUNITY DEVELOPMENT4203
COPA0116
CULTURAL AFFAIRS758
DISABILITIES028
DoIT099
FAMILY & SUPPORT287328
FINANCE44516
FIRE24639
GENERAL SERVICES765215
HEALTH3485
HUMAN RELATIONS016
HUMAN RESOURCES475
INSPECTOR GEN087
LAW40367
LICENSE APPL COMM01
MAYOR'S OFFICE877
OEMC1273829
POLICE1013404
POLICE BOARD02
PROCUREMENT290
PUBLIC LIBRARY299716
STREETS & SAN1862336
TRANSPORTN725415
TREASURER022
WATER MGMNT1513366
\n", + "
" + ], + "text/plain": [ + "Salary or Hourly Hourly Salary\n", + "Department \n", + "ADMIN HEARNG 0 39\n", + "ANIMAL CONTRL 19 62\n", + "AVIATION 1082 547\n", + "BOARD OF ELECTION 0 107\n", + "BOARD OF ETHICS 0 8\n", + "BUDGET & MGMT 2 44\n", + "BUILDINGS 0 269\n", + "BUSINESS AFFAIRS 7 164\n", + "CITY CLERK 0 84\n", + "CITY COUNCIL 64 347\n", + "COMMUNITY DEVELOPMENT 4 203\n", + "COPA 0 116\n", + "CULTURAL AFFAIRS 7 58\n", + "DISABILITIES 0 28\n", + "DoIT 0 99\n", + "FAMILY & SUPPORT 287 328\n", + "FINANCE 44 516\n", + "FIRE 2 4639\n", + "GENERAL SERVICES 765 215\n", + "HEALTH 3 485\n", + "HUMAN RELATIONS 0 16\n", + "HUMAN RESOURCES 4 75\n", + "INSPECTOR GEN 0 87\n", + "LAW 40 367\n", + "LICENSE APPL COMM 0 1\n", + "MAYOR'S OFFICE 8 77\n", + "OEMC 1273 829\n", + "POLICE 10 13404\n", + "POLICE BOARD 0 2\n", + "PROCUREMENT 2 90\n", + "PUBLIC LIBRARY 299 716\n", + "STREETS & SAN 1862 336\n", + "TRANSPORTN 725 415\n", + "TREASURER 0 22\n", + "WATER MGMNT 1513 366" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "df = pd.crosstab(salaries[\"Department\"],salaries[\"Salary or Hourly\"])\n", + "df" ] }, { @@ -177,12 +697,54 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "P-value: 1.6689265282353859e-21\n", + "Reject Null Hypothesis\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jacob\\AppData\\Local\\Temp\\ipykernel_10136\\2920897562.py:10: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " sample.dropna(inplace=True)\n" + ] + } + ], "source": [ - "# Your code here:\n", - "\n" + "# Hypothesis\n", + "# H0: mu > 86.00\n", + "# H1: mu < 86.00\n", + "\n", + "# Significance Level\n", + "alpha = 0.05\n", + "\n", + "#3 Sample\n", + "sample = salaries[salaries[\"Department\"]==\"STREETS & SAN\"][\"Hourly Rate\"]\n", + "sample.dropna(inplace=True)\n", + "\n", + "#4 Compute Statistics\n", + "\n", + "mean_sample = sample.mean()\n", + "std_sample = sample.std(ddof=1)\n", + "n = len(sample)\n", + "\n", + "#5 Get p-value\n", + "p_value = st.ttest_1samp(sample, 35, alternative=\"less\")[1]\n", + "print(\"P-value:\", p_value)\n", + "if p_value < alpha:\n", + " print(\"Reject Null Hypothesis\")\n", + "else:\n", + " print(\"Fail to Reject Null Hypothesis\")" ] }, { @@ -206,12 +768,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(32.52345834488425, 33.05365708767623)" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "sample = salaries[\"Hourly Rate\"]\n", + "sample.dropna(inplace=True)\n", + "mean_sample = sample.mean()\n", + "std_sample = sample.std(ddof=1)\n", + "n = len(sample)\n", + "\n", + "st.t.interval(0.95,n-1, loc=mean_sample, scale = std_sample/np.sqrt(n))" ] }, { @@ -223,12 +801,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Jacob\\AppData\\Local\\Temp\\ipykernel_10136\\604985524.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " sample.dropna(inplace=True)\n" + ] + }, + { + "data": { + "text/plain": [ + "(86177.05631531785, 86795.77269094893)" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "sample = salaries[salaries[\"Department\"]==\"POLICE\"][\"Annual Salary\"]\n", + "sample.dropna(inplace=True)\n", + "\n", + "mean_sample = sample.mean()\n", + "std_sample = sample.std(ddof=1)\n", + "n = len(sample)\n", + "\n", + "st.t.interval(0.95,n-1, loc=mean_sample, scale = std_sample/np.sqrt(n))" ] }, { @@ -257,7 +863,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -271,7 +877,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.11.3" } }, "nbformat": 4,