From 03cb439811e7ef9fc92de775be56e374475ef690 Mon Sep 17 00:00:00 2001 From: Alexandre Garanhao Date: Thu, 18 May 2023 15:21:38 +0100 Subject: [PATCH] lab done --- your-code/main.ipynb | 840 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 806 insertions(+), 34 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 59b955a..2384f0c 100755 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -12,15 +13,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# import numpy and pandas\n", - "\n" + "import numpy as np\n", + "import pandas as pd\n", + "import scipy.stats as st\n" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -31,14 +35,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 71, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "# Your code here:\n", + "salaries = pd.read_csv(\"Current_Employee_Names__Salaries__and_Position_Titles.csv\")" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -47,15 +53,135 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameJob TitlesDepartmentFull or Part-TimeSalary or HourlyTypical HoursAnnual SalaryHourly Rate
0AARON, JEFFERY MSERGEANTPOLICEFSalaryNaN101442.0NaN
1AARON, KARINAPOLICE OFFICER (ASSIGNED AS DETECTIVE)POLICEFSalaryNaN94122.0NaN
2AARON, KIMBERLEI RCHIEF CONTRACT EXPEDITERGENERAL SERVICESFSalaryNaN101592.0NaN
3ABAD JR, VICENTE MCIVIL ENGINEER IVWATER MGMNTFSalaryNaN110064.0NaN
4ABASCAL, REECE ETRAFFIC CONTROL AIDE-HOURLYOEMCPHourly20.0NaN19.86
\n", + "
" + ], + "text/plain": [ + " Name Job Titles \n", + "0 AARON, JEFFERY M SERGEANT \\\n", + "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n", + "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n", + "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n", + "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n", + "\n", + " Department Full or Part-Time Salary or Hourly Typical Hours \n", + "0 POLICE F Salary NaN \\\n", + "1 POLICE F Salary NaN \n", + "2 GENERAL SERVICES F Salary NaN \n", + "3 WATER MGMNT F Salary NaN \n", + "4 OEMC P Hourly 20.0 \n", + "\n", + " Annual Salary Hourly Rate \n", + "0 101442.0 NaN \n", + "1 94122.0 NaN \n", + "2 101592.0 NaN \n", + "3 110064.0 NaN \n", + "4 NaN 19.86 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "salaries.head()\n" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -64,15 +190,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Name 0\n", + "Job Titles 0\n", + "Department 0\n", + "Full or Part-Time 0\n", + "Salary or Hourly 0\n", + "Typical Hours 25161\n", + "Annual Salary 8022\n", + "Hourly Rate 25161\n", + "dtype: int64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "salaries.isnull().sum()\n" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -81,15 +227,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Salary or Hourly\n", + "Salary 25161\n", + "Hourly 8022\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "salaries[\"Salary or Hourly\"].value_counts()\n" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -97,6 +258,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -105,15 +267,234 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Name
Department
ADMIN HEARNG39
ANIMAL CONTRL81
AVIATION1629
BOARD OF ELECTION107
BOARD OF ETHICS8
BUDGET & MGMT46
BUILDINGS269
BUSINESS AFFAIRS171
CITY CLERK84
CITY COUNCIL411
COMMUNITY DEVELOPMENT207
COPA116
CULTURAL AFFAIRS65
DISABILITIES28
DoIT99
FAMILY & SUPPORT615
FINANCE560
FIRE4641
GENERAL SERVICES980
HEALTH488
HUMAN RELATIONS16
HUMAN RESOURCES79
INSPECTOR GEN87
LAW407
LICENSE APPL COMM1
MAYOR'S OFFICE85
OEMC2102
POLICE13414
POLICE BOARD2
PROCUREMENT92
PUBLIC LIBRARY1015
STREETS & SAN2198
TRANSPORTN1140
TREASURER22
WATER MGMNT1879
\n", + "
" + ], + "text/plain": [ + " Name\n", + "Department \n", + "ADMIN HEARNG 39\n", + "ANIMAL CONTRL 81\n", + "AVIATION 1629\n", + "BOARD OF ELECTION 107\n", + "BOARD OF ETHICS 8\n", + "BUDGET & MGMT 46\n", + "BUILDINGS 269\n", + "BUSINESS AFFAIRS 171\n", + "CITY CLERK 84\n", + "CITY COUNCIL 411\n", + "COMMUNITY DEVELOPMENT 207\n", + "COPA 116\n", + "CULTURAL AFFAIRS 65\n", + "DISABILITIES 28\n", + "DoIT 99\n", + "FAMILY & SUPPORT 615\n", + "FINANCE 560\n", + "FIRE 4641\n", + "GENERAL SERVICES 980\n", + "HEALTH 488\n", + "HUMAN RELATIONS 16\n", + "HUMAN RESOURCES 79\n", + "INSPECTOR GEN 87\n", + "LAW 407\n", + "LICENSE APPL COMM 1\n", + "MAYOR'S OFFICE 85\n", + "OEMC 2102\n", + "POLICE 13414\n", + "POLICE BOARD 2\n", + "PROCUREMENT 92\n", + "PUBLIC LIBRARY 1015\n", + "STREETS & SAN 2198\n", + "TRANSPORTN 1140\n", + "TREASURER 22\n", + "WATER MGMNT 1879" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "salaries.groupby(\"Department\").agg({\"Name\":\"count\"})" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -124,15 +505,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "alpha = 0.05\n", + "sample=salaries[salaries[\"Salary or Hourly\"]==\"Hourly\"][\"Hourly Rate\"]\n", + "\n", + "# H0 : mu = 30\n", + "#H1 : mu != 30\n", + "\n", + "p_value = st.ttest_1samp(sample, 30)[1]\n", + "\n", + "p_value < alpha" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -143,15 +544,51 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 58, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.081997005712994\n", + "0.0010301701775482569\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "# H0: mu salaries department police <= 86000€\n", + "# H1: mu salaries department police > 86000€\n", + "\n", + "#2\n", + "alpha=0.05\n", + "\n", + "#3 sample\n", + "sample=salaries.loc[(salaries[\"Department\"]==\"POLICE\")&(salaries[\"Salary or Hourly\"]==\"Salary\")]\n", + "\n", + "#4. & 5. Compute stats one tailed t-test and p_value\n", + "\n", + "stat, p_value=st.ttest_1samp(sample['Annual Salary'], 86000)\n", + "print(stat)\n", + "print(p_value/2 )\n", + "\n", + "p_value\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Salary or HourlyFalseTrue
Department
ADMIN HEARNG390
ANIMAL CONTRL6219
AVIATION5471082
BOARD OF ELECTION1070
BOARD OF ETHICS80
BUDGET & MGMT442
BUILDINGS2690
BUSINESS AFFAIRS1647
CITY CLERK840
CITY COUNCIL34764
COMMUNITY DEVELOPMENT2034
COPA1160
CULTURAL AFFAIRS587
DISABILITIES280
DoIT990
FAMILY & SUPPORT328287
FINANCE51644
FIRE46392
GENERAL SERVICES215765
HEALTH4853
HUMAN RELATIONS160
HUMAN RESOURCES754
INSPECTOR GEN870
LAW36740
LICENSE APPL COMM10
MAYOR'S OFFICE778
OEMC8291273
POLICE1340410
POLICE BOARD20
PROCUREMENT902
PUBLIC LIBRARY716299
STREETS & SAN3361862
TRANSPORTN415725
TREASURER220
WATER MGMNT3661513
\n", + "" + ], + "text/plain": [ + "Salary or Hourly False True \n", + "Department \n", + "ADMIN HEARNG 39 0\n", + "ANIMAL CONTRL 62 19\n", + "AVIATION 547 1082\n", + "BOARD OF ELECTION 107 0\n", + "BOARD OF ETHICS 8 0\n", + "BUDGET & MGMT 44 2\n", + "BUILDINGS 269 0\n", + "BUSINESS AFFAIRS 164 7\n", + "CITY CLERK 84 0\n", + "CITY COUNCIL 347 64\n", + "COMMUNITY DEVELOPMENT 203 4\n", + "COPA 116 0\n", + "CULTURAL AFFAIRS 58 7\n", + "DISABILITIES 28 0\n", + "DoIT 99 0\n", + "FAMILY & SUPPORT 328 287\n", + "FINANCE 516 44\n", + "FIRE 4639 2\n", + "GENERAL SERVICES 215 765\n", + "HEALTH 485 3\n", + "HUMAN RELATIONS 16 0\n", + "HUMAN RESOURCES 75 4\n", + "INSPECTOR GEN 87 0\n", + "LAW 367 40\n", + "LICENSE APPL COMM 1 0\n", + "MAYOR'S OFFICE 77 8\n", + "OEMC 829 1273\n", + "POLICE 13404 10\n", + "POLICE BOARD 2 0\n", + "PROCUREMENT 90 2\n", + "PUBLIC LIBRARY 716 299\n", + "STREETS & SAN 336 1862\n", + "TRANSPORTN 415 725\n", + "TREASURER 22 0\n", + "WATER MGMNT 366 1513" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "pd.crosstab(salaries[\"Department\"], salaries[\"Salary or Hourly\"]==\"Hourly\")" ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -177,15 +870,47 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-9.567447887848152\n", + "8.344632641176929e-22\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "# H0: mu hourly wage of STREETS & SAN hourly workers >= 35€/hour\n", + "# H1: mu hourly wage of STREETS & SAN hourly workers < 35€/hour\n", + "\n", + "alpha=0.05\n", + "\n", + "sample= salaries[(salaries[\"Department\"]==\"STREETS & SAN\")&(salaries[\"Salary or Hourly\"]==\"Hourly\")]\n", + "\n", + "stat, p_value=st.ttest_1samp(sample['Hourly Rate'], 35, alternative=\"less\")\n", + "print(stat)\n", + "print(p_value/2 )\n", + "\n", + "p_value