From 82dead61932f2aa194bf41f0ee734404b2837b4c Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 20 Nov 2023 00:40:32 +0000 Subject: [PATCH] lab done --- your-code/main.ipynb | 613 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 576 insertions(+), 37 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 59b955a..4b2716f 100755 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,12 +12,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# import numpy and pandas\n", - "\n" + "import numpy as np\n", + "import pandas as pd" ] }, { @@ -31,11 +32,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "salaries = pd.read_csv(\"Current_Employee_Names__Salaries__and_Position_Titles.csv\")" ] }, { @@ -47,12 +49,162 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameJob TitlesDepartmentFull or Part-TimeSalary or HourlyTypical HoursAnnual SalaryHourly Rate
0AARON, JEFFERY MSERGEANTPOLICEFSalaryNaN101442.0NaN
1AARON, KARINAPOLICE OFFICER (ASSIGNED AS DETECTIVE)POLICEFSalaryNaN94122.0NaN
2AARON, KIMBERLEI RCHIEF CONTRACT EXPEDITERGENERAL SERVICESFSalaryNaN101592.0NaN
3ABAD JR, VICENTE MCIVIL ENGINEER IVWATER MGMNTFSalaryNaN110064.0NaN
4ABASCAL, REECE ETRAFFIC CONTROL AIDE-HOURLYOEMCPHourly20.0NaN19.86
\n", + "
" + ], + "text/plain": [ + " Name Job Titles \\\n", + "0 AARON, JEFFERY M SERGEANT \n", + "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n", + "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n", + "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n", + "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n", + "\n", + " Department Full or Part-Time Salary or Hourly Typical Hours \\\n", + "0 POLICE F Salary NaN \n", + "1 POLICE F Salary NaN \n", + "2 GENERAL SERVICES F Salary NaN \n", + "3 WATER MGMNT F Salary NaN \n", + "4 OEMC P Hourly 20.0 \n", + "\n", + " Annual Salary Hourly Rate \n", + "0 101442.0 NaN \n", + "1 94122.0 NaN \n", + "2 101592.0 NaN \n", + "3 110064.0 NaN \n", + "4 NaN 19.86 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "salaries.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 33183 entries, 0 to 33182\n", + "Data columns (total 8 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Name 33183 non-null object \n", + " 1 Job Titles 33183 non-null object \n", + " 2 Department 33183 non-null object \n", + " 3 Full or Part-Time 33183 non-null object \n", + " 4 Salary or Hourly 33183 non-null object \n", + " 5 Typical Hours 8022 non-null float64\n", + " 6 Annual Salary 25161 non-null float64\n", + " 7 Hourly Rate 8022 non-null float64\n", + "dtypes: float64(3), object(5)\n", + "memory usage: 2.0+ MB\n" + ] + } + ], + "source": [ + "salaries.info()" ] }, { @@ -64,12 +216,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The amount nulls in Name is 0\n", + "The amount nulls in Job Titles is 0\n", + "The amount nulls in Department is 0\n", + "The amount nulls in Full or Part-Time is 0\n", + "The amount nulls in Salary or Hourly is 0\n", + "The amount nulls in Typical Hours is 25161\n", + "The amount nulls in Annual Salary is 8022\n", + "The amount nulls in Hourly Rate is 25161\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "columns = list(salaries.columns)\n", + "\n", + "columns = list(salaries.columns)\n", + "for x in columns:\n", + " print(f\"The amount nulls in {x} is {salaries[x].isnull().sum()}\")" ] }, { @@ -81,12 +252,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Salary 25161\n", + "Hourly 8022\n", + "Name: Salary or Hourly, dtype: int64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "salaries[\"Salary or Hourly\"].value_counts()" ] }, { @@ -105,12 +289,230 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Name
Department
ADMIN HEARNG39
ANIMAL CONTRL81
AVIATION1629
BOARD OF ELECTION107
BOARD OF ETHICS8
BUDGET & MGMT46
BUILDINGS269
BUSINESS AFFAIRS171
CITY CLERK84
CITY COUNCIL411
COMMUNITY DEVELOPMENT207
COPA116
CULTURAL AFFAIRS65
DISABILITIES28
DoIT99
FAMILY & SUPPORT615
FINANCE560
FIRE4641
GENERAL SERVICES980
HEALTH488
HUMAN RELATIONS16
HUMAN RESOURCES79
INSPECTOR GEN87
LAW407
LICENSE APPL COMM1
MAYOR'S OFFICE85
OEMC2102
POLICE13414
POLICE BOARD2
PROCUREMENT92
PUBLIC LIBRARY1015
STREETS & SAN2198
TRANSPORTN1140
TREASURER22
WATER MGMNT1879
\n", + "
" + ], + "text/plain": [ + " Name\n", + "Department \n", + "ADMIN HEARNG 39\n", + "ANIMAL CONTRL 81\n", + "AVIATION 1629\n", + "BOARD OF ELECTION 107\n", + "BOARD OF ETHICS 8\n", + "BUDGET & MGMT 46\n", + "BUILDINGS 269\n", + "BUSINESS AFFAIRS 171\n", + "CITY CLERK 84\n", + "CITY COUNCIL 411\n", + "COMMUNITY DEVELOPMENT 207\n", + "COPA 116\n", + "CULTURAL AFFAIRS 65\n", + "DISABILITIES 28\n", + "DoIT 99\n", + "FAMILY & SUPPORT 615\n", + "FINANCE 560\n", + "FIRE 4641\n", + "GENERAL SERVICES 980\n", + "HEALTH 488\n", + "HUMAN RELATIONS 16\n", + "HUMAN RESOURCES 79\n", + "INSPECTOR GEN 87\n", + "LAW 407\n", + "LICENSE APPL COMM 1\n", + "MAYOR'S OFFICE 85\n", + "OEMC 2102\n", + "POLICE 13414\n", + "POLICE BOARD 2\n", + "PROCUREMENT 92\n", + "PUBLIC LIBRARY 1015\n", + "STREETS & SAN 2198\n", + "TRANSPORTN 1140\n", + "TREASURER 22\n", + "WATER MGMNT 1879" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "salaries.groupby(\"Department\").agg({\"Name\": \"count\"})" ] }, { @@ -124,12 +526,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=20.6198057854942, pvalue=4.3230240486229894e-92, df=8021)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "# Your code\n", + "import scipy.stats as st\n", + "# 1. Set the hipothesis\n", + "# H0: mu hourly wage = 30$/hr\n", + "# H1: mu hourly wage != 30$/hr\n", + "\n", + "# 2. Significance level\n", + "alpha = 0.05\n", + "\n", + "# 3. Sample\n", + "sample = salaries[salaries[\"Salary or Hourly\"]== \"Hourly\"][\"Hourly Rate\"]\n", + "\n", + "# 4. Compute statistics / 5. Get p-value\n", + "st.ttest_1samp(sample, 30)\n", + "\n", + "# 6. Decide: p value is bigger that the significance level --> we do not reject" ] }, { @@ -143,12 +570,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=3.081997005712994, pvalue=0.0010301701775482569, df=13403)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "# 1. Set the hipothesis\n", + "# H0: mu annual salary <= 86000$\n", + "# H1: mu annual salary > 86000$\n", + "\n", + "# 2. Significance level\n", + "alpha = 0.05\n", + "\n", + "# 3. Sample\n", + "sample_salary = salaries[(salaries[\"Department\"] == \"POLICE\") & (salaries[\"Salary or Hourly\"] == \"Salary\")][\"Annual Salary\"]\n", + "\n", + "# 4. Compute statistics / 5. Get p-value\n", + "st.ttest_1samp(sample_salary, 86000, alternative = \"greater\")\n", + "\n", + "# 6. Decide: p value is smaller that the significance level --> we reject the null" ] }, { @@ -160,12 +611,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'POLICE'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "table = pd.crosstab(salaries[\"Department\"],[\"Salary or Hourly\"])\n", + "\n", + "department = table[\"Salary or Hourly\"].idxmax()\n", + "department" ] }, { @@ -177,12 +642,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=-825.6069638307035, pvalue=1.4284243665683163e-23, df=9)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "# 1. Set the hipothesis\n", + "# H0: mu hourly wage >= 35$\n", + "# H1: mu hourly wage < 35$\n", + "\n", + "# 2. Significance level\n", + "alpha = 0.05\n", + "\n", + "# 3. Sample\n", + "hourly_wage = salaries[(salaries[\"Department\"] == \"POLICE\") & (salaries[\"Salary or Hourly\"] == \"Hourly\")][\"Hourly Rate\"]\n", + "\n", + "# 4. Compute statistics / 5. Get p-value\n", + "st.ttest_1samp(hourly_wage, 35, alternative = \"less\")\n", + "\n", + "# 6. Decide: p value is bigger that the significance level --> we do not reject the null hypothesis." ] }, { @@ -206,12 +695,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Confidence interval of the hourly wage: (32.52345834488425, 33.05365708767623)\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "# Sample\n", + "hourly_wage = salaries[salaries[\"Salary or Hourly\"] == \"Hourly\"][\"Hourly Rate\"]\n", + "\n", + "# Confidence level\n", + "confidence_level = 0.95\n", + "\n", + "# Degrees of freedom\n", + "ddof = len(hourly_wage) - 1\n", + "\n", + "# Compute the mean and standard error\n", + "mean = hourly_wage.mean()\n", + "standard_error = st.sem(hourly_wage)\n", + "\n", + "# Calculate the confidence interval using t.interval\n", + "confidence_interval = st.t.interval(confidence_level, df=ddof, loc=mean, scale=standard_error)\n", + "\n", + "print(f\"Confidence interval of the hourly wage: {confidence_interval}\")\n", + "\n", + "# This means that we are 95% confident that the true population mean hourly wage for all hourly workers \n", + "# lies within the range of $32.52 to $33.05 per hour." ] }, { @@ -223,12 +739,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "95% Confidence Interval for Annual Salary employees in Police: (86177.05631531784, 86795.77269094894)\n" + ] + } + ], "source": [ "# Your code here:\n", - "\n" + "police_employee = salaries[(salaries[\"Department\"] == \"POLICE\") & (salaries[\"Salary or Hourly\"] == \"Salary\")][\"Annual Salary\"]\n", + "\n", + "# Confidence level\n", + "confidence_level = 0.95\n", + "\n", + "# Degrees of freedom\n", + "ddof = len(police_employee) - 1\n", + "\n", + "# Compute the mean and standard error\n", + "mean = police_employee.mean()\n", + "standard_error = st.sem(police_employee)\n", + "\n", + "# Calculate the confidence interval using t.interval\n", + "confidence_interval = st.t.interval(confidence_level, df=ddof, loc=mean, scale=standard_error)\n", + "\n", + "print(f\"95% Confidence Interval for Annual Salary employees in Police: {confidence_interval}\")" ] }, { @@ -257,7 +796,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -271,7 +810,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.11.4" } }, "nbformat": 4,