diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 59b955a..82e57d1 100755
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,12 +12,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 74,
"metadata": {},
"outputs": [],
"source": [
- "# import numpy and pandas\n",
- "\n"
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "import scipy.stats as st\n"
]
},
{
@@ -31,11 +33,219 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 75,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Job Titles | \n",
+ " Department | \n",
+ " Full or Part-Time | \n",
+ " Salary or Hourly | \n",
+ " Typical Hours | \n",
+ " Annual Salary | \n",
+ " Hourly Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " AARON, JEFFERY M | \n",
+ " SERGEANT | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101442.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " AARON, KARINA | \n",
+ " POLICE OFFICER (ASSIGNED AS DETECTIVE) | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 94122.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AARON, KIMBERLEI R | \n",
+ " CHIEF CONTRACT EXPEDITER | \n",
+ " GENERAL SERVICES | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101592.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ABAD JR, VICENTE M | \n",
+ " CIVIL ENGINEER IV | \n",
+ " WATER MGMNT | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 110064.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ABASCAL, REECE E | \n",
+ " TRAFFIC CONTROL AIDE-HOURLY | \n",
+ " OEMC | \n",
+ " P | \n",
+ " Hourly | \n",
+ " 20.0 | \n",
+ " NaN | \n",
+ " 19.86 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 33178 | \n",
+ " ZYLINSKA, KATARZYNA | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 72510.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33179 | \n",
+ " ZYMANTAS, LAURA C | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 48078.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33180 | \n",
+ " ZYMANTAS, MARK E | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 90024.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33181 | \n",
+ " ZYRKOWSKI, CARLO E | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 93354.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33182 | \n",
+ " ZYSKOWSKI, DARIUSZ | \n",
+ " CHIEF DATA BASE ANALYST | \n",
+ " DoIT | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 115932.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
33183 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Job Titles \\\n",
+ "0 AARON, JEFFERY M SERGEANT \n",
+ "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n",
+ "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n",
+ "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n",
+ "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n",
+ "... ... ... \n",
+ "33178 ZYLINSKA, KATARZYNA POLICE OFFICER \n",
+ "33179 ZYMANTAS, LAURA C POLICE OFFICER \n",
+ "33180 ZYMANTAS, MARK E POLICE OFFICER \n",
+ "33181 ZYRKOWSKI, CARLO E POLICE OFFICER \n",
+ "33182 ZYSKOWSKI, DARIUSZ CHIEF DATA BASE ANALYST \n",
+ "\n",
+ " Department Full or Part-Time Salary or Hourly Typical Hours \\\n",
+ "0 POLICE F Salary NaN \n",
+ "1 POLICE F Salary NaN \n",
+ "2 GENERAL SERVICES F Salary NaN \n",
+ "3 WATER MGMNT F Salary NaN \n",
+ "4 OEMC P Hourly 20.0 \n",
+ "... ... ... ... ... \n",
+ "33178 POLICE F Salary NaN \n",
+ "33179 POLICE F Salary NaN \n",
+ "33180 POLICE F Salary NaN \n",
+ "33181 POLICE F Salary NaN \n",
+ "33182 DoIT F Salary NaN \n",
+ "\n",
+ " Annual Salary Hourly Rate \n",
+ "0 101442.0 NaN \n",
+ "1 94122.0 NaN \n",
+ "2 101592.0 NaN \n",
+ "3 110064.0 NaN \n",
+ "4 NaN 19.86 \n",
+ "... ... ... \n",
+ "33178 72510.0 NaN \n",
+ "33179 48078.0 NaN \n",
+ "33180 90024.0 NaN \n",
+ "33181 93354.0 NaN \n",
+ "33182 115932.0 NaN \n",
+ "\n",
+ "[33183 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 75,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "salaries = pd.read_csv(\"Current_Employee_Names__Salaries__and_Position_Titles.csv\")\n",
+ "\n",
+ "salaries"
]
},
{
@@ -47,12 +257,130 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 76,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Job Titles | \n",
+ " Department | \n",
+ " Full or Part-Time | \n",
+ " Salary or Hourly | \n",
+ " Typical Hours | \n",
+ " Annual Salary | \n",
+ " Hourly Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " AARON, JEFFERY M | \n",
+ " SERGEANT | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101442.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " AARON, KARINA | \n",
+ " POLICE OFFICER (ASSIGNED AS DETECTIVE) | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 94122.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AARON, KIMBERLEI R | \n",
+ " CHIEF CONTRACT EXPEDITER | \n",
+ " GENERAL SERVICES | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101592.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ABAD JR, VICENTE M | \n",
+ " CIVIL ENGINEER IV | \n",
+ " WATER MGMNT | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 110064.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ABASCAL, REECE E | \n",
+ " TRAFFIC CONTROL AIDE-HOURLY | \n",
+ " OEMC | \n",
+ " P | \n",
+ " Hourly | \n",
+ " 20.0 | \n",
+ " NaN | \n",
+ " 19.86 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Job Titles \\\n",
+ "0 AARON, JEFFERY M SERGEANT \n",
+ "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n",
+ "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n",
+ "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n",
+ "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n",
+ "\n",
+ " Department Full or Part-Time Salary or Hourly Typical Hours \\\n",
+ "0 POLICE F Salary NaN \n",
+ "1 POLICE F Salary NaN \n",
+ "2 GENERAL SERVICES F Salary NaN \n",
+ "3 WATER MGMNT F Salary NaN \n",
+ "4 OEMC P Hourly 20.0 \n",
+ "\n",
+ " Annual Salary Hourly Rate \n",
+ "0 101442.0 NaN \n",
+ "1 94122.0 NaN \n",
+ "2 101592.0 NaN \n",
+ "3 110064.0 NaN \n",
+ "4 NaN 19.86 "
+ ]
+ },
+ "execution_count": 76,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "salaries.head()"
]
},
{
@@ -64,11 +392,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 77,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Typical Hours 25161\n",
+ "Annual Salary 8022\n",
+ "Hourly Rate 25161\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 77,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
+ "null_cols = salaries.isnull().sum()\n",
+ "null_cols[null_cols >0]\n",
+ "\n",
"\n"
]
},
@@ -81,12 +425,96 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 78,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Job Titles | \n",
+ " Department | \n",
+ " Full or Part-Time | \n",
+ " Typical Hours | \n",
+ " Annual Salary | \n",
+ " Hourly Rate | \n",
+ "
\n",
+ " \n",
+ " | Salary or Hourly | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Hourly | \n",
+ " 8022 | \n",
+ " 8022 | \n",
+ " 8022 | \n",
+ " 8022 | \n",
+ " 8022 | \n",
+ " 0 | \n",
+ " 8022 | \n",
+ "
\n",
+ " \n",
+ " | Salary | \n",
+ " 25161 | \n",
+ " 25161 | \n",
+ " 25161 | \n",
+ " 25161 | \n",
+ " 0 | \n",
+ " 25161 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Job Titles Department Full or Part-Time \\\n",
+ "Salary or Hourly \n",
+ "Hourly 8022 8022 8022 8022 \n",
+ "Salary 25161 25161 25161 25161 \n",
+ "\n",
+ " Typical Hours Annual Salary Hourly Rate \n",
+ "Salary or Hourly \n",
+ "Hourly 8022 0 8022 \n",
+ "Salary 0 25161 0 "
+ ]
+ },
+ "execution_count": 78,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "count = salaries.groupby([\"Salary or Hourly\"]).count()\n",
+ "\n",
+ "\n",
+ "count"
]
},
{
@@ -105,12 +533,60 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 79,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Department\n",
+ "ADMIN HEARNG 39\n",
+ "ANIMAL CONTRL 81\n",
+ "AVIATION 1629\n",
+ "BOARD OF ELECTION 107\n",
+ "BOARD OF ETHICS 8\n",
+ "BUDGET & MGMT 46\n",
+ "BUILDINGS 269\n",
+ "BUSINESS AFFAIRS 171\n",
+ "CITY CLERK 84\n",
+ "CITY COUNCIL 411\n",
+ "COMMUNITY DEVELOPMENT 207\n",
+ "COPA 116\n",
+ "CULTURAL AFFAIRS 65\n",
+ "DISABILITIES 28\n",
+ "DoIT 99\n",
+ "FAMILY & SUPPORT 615\n",
+ "FINANCE 560\n",
+ "FIRE 4641\n",
+ "GENERAL SERVICES 980\n",
+ "HEALTH 488\n",
+ "HUMAN RELATIONS 16\n",
+ "HUMAN RESOURCES 79\n",
+ "INSPECTOR GEN 87\n",
+ "LAW 407\n",
+ "LICENSE APPL COMM 1\n",
+ "MAYOR'S OFFICE 85\n",
+ "OEMC 2102\n",
+ "POLICE 13414\n",
+ "POLICE BOARD 2\n",
+ "PROCUREMENT 92\n",
+ "PUBLIC LIBRARY 1015\n",
+ "STREETS & SAN 2198\n",
+ "TRANSPORTN 1140\n",
+ "TREASURER 22\n",
+ "WATER MGMNT 1879\n",
+ "Name: Name, dtype: int64"
+ ]
+ },
+ "execution_count": 79,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "count_dep= salaries.groupby([\"Department\"]).count()\n",
+ "\n",
+ "count_dep[\"Name\"]"
]
},
{
@@ -124,12 +600,82 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 103,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4 19.86\n",
+ "6 46.10\n",
+ "7 35.60\n",
+ "10 2.65\n",
+ "18 17.68\n",
+ " ... \n",
+ "33164 46.10\n",
+ "33168 17.68\n",
+ "33169 35.60\n",
+ "33174 46.35\n",
+ "33175 48.85\n",
+ "Name: Hourly Rate, Length: 8022, dtype: float64"
+ ]
+ },
+ "execution_count": 103,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 1) hypothesis\n",
+ "\n",
+ "# H0: hourly_wage = 30\n",
+ "# H1: hourly_wage != 30\n",
+ "\n",
+ "# 2) significance:\n",
+ "\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# 3) sample \n",
+ "\n",
+ "sample = salaries[salaries[\"Salary or Hourly\"]== \"Hourly\"][\"Hourly Rate\"]\n",
+ "\n",
+ "sample\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=20.6198057854942, pvalue=4.3230240486229894e-92, df=8021)"
+ ]
+ },
+ "execution_count": 104,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# 4) Compute statistics \n",
+ "\n",
+ "st.ttest_1samp(sample, 30)\n",
+ "\n",
+ "#st.ttest_1samp(c3_sample, 17)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n",
- "\n"
+ "# p_value e less than alpha, so i have to reject H0. hourly rate of the workers is \n",
+ "# different from 30$"
]
},
{
@@ -143,12 +689,86 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 105,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=653.6034847441706, pvalue=1.0, df=25160)"
+ ]
+ },
+ "execution_count": 105,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# H0: salaries >= 86\n",
+ "# H1: salaries < 86\n",
+ "\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# using a sample\n",
+ "\n",
+ "sample_salaries = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][\"Annual Salary\"]\n",
+ "\n",
+ "\n",
+ "st.ttest_1samp(sample_salaries, 86, alternative = \"less\")\n",
+ "\n",
+ "#st.ttest_1samp(c3_age_sample, 17, alternative = \"greater\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 135,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/ms/vrwr3_md2xl156mcvfmyskx80000gn/T/ipykernel_77248/27391849.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
+ " sample_salaries = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"]== \"POLICE\"][\"Annual Salary\"]\n"
+ ]
+ }
+ ],
+ "source": [
+ "sample_salaries = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"]== \"POLICE\"][\"Annual Salary\"]\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n",
- "\n"
+ "## p_value greater than significance level, so i can NOT reject H0, salaries seem to be \n",
+ "# greater than 86000/year."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 106,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=653.6034847441706, pvalue=1.0, df=25160)"
+ ]
+ },
+ "execution_count": 106,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# using total workers from the dataframe that has a salary.\n",
+ "\n",
+ "salaries1 = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][\"Annual Salary\"]\n",
+ "\n",
+ "st.ttest_1samp(salaries1, 86, alternative = \"less\")\n"
]
},
{
@@ -160,12 +780,253 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 99,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Salary or Hourly\n",
+ "Hourly STREETS & SAN\n",
+ "Salary POLICE\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 99,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dep_hourly = pd.crosstab(salaries[\"Department\"], salaries[\"Salary or Hourly\"])\n",
+ "\n",
+ "dep_max = dep_hourly.idxmax()\n",
+ "\n",
+ "dep_max"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 100,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n",
- "\n"
+ "# The departmente with the most hourly workers is Streets & San. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 101,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Job Titles | \n",
+ " Department | \n",
+ " Full or Part-Time | \n",
+ " Salary or Hourly | \n",
+ " Typical Hours | \n",
+ " Annual Salary | \n",
+ " Hourly Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " AARON, JEFFERY M | \n",
+ " SERGEANT | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101442.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " AARON, KARINA | \n",
+ " POLICE OFFICER (ASSIGNED AS DETECTIVE) | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 94122.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AARON, KIMBERLEI R | \n",
+ " CHIEF CONTRACT EXPEDITER | \n",
+ " GENERAL SERVICES | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101592.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ABAD JR, VICENTE M | \n",
+ " CIVIL ENGINEER IV | \n",
+ " WATER MGMNT | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 110064.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ABASCAL, REECE E | \n",
+ " TRAFFIC CONTROL AIDE-HOURLY | \n",
+ " OEMC | \n",
+ " P | \n",
+ " Hourly | \n",
+ " 20.0 | \n",
+ " NaN | \n",
+ " 19.86 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 33178 | \n",
+ " ZYLINSKA, KATARZYNA | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 72510.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33179 | \n",
+ " ZYMANTAS, LAURA C | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 48078.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33180 | \n",
+ " ZYMANTAS, MARK E | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 90024.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33181 | \n",
+ " ZYRKOWSKI, CARLO E | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 93354.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33182 | \n",
+ " ZYSKOWSKI, DARIUSZ | \n",
+ " CHIEF DATA BASE ANALYST | \n",
+ " DoIT | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 115932.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
33183 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Job Titles \\\n",
+ "0 AARON, JEFFERY M SERGEANT \n",
+ "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n",
+ "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n",
+ "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n",
+ "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n",
+ "... ... ... \n",
+ "33178 ZYLINSKA, KATARZYNA POLICE OFFICER \n",
+ "33179 ZYMANTAS, LAURA C POLICE OFFICER \n",
+ "33180 ZYMANTAS, MARK E POLICE OFFICER \n",
+ "33181 ZYRKOWSKI, CARLO E POLICE OFFICER \n",
+ "33182 ZYSKOWSKI, DARIUSZ CHIEF DATA BASE ANALYST \n",
+ "\n",
+ " Department Full or Part-Time Salary or Hourly Typical Hours \\\n",
+ "0 POLICE F Salary NaN \n",
+ "1 POLICE F Salary NaN \n",
+ "2 GENERAL SERVICES F Salary NaN \n",
+ "3 WATER MGMNT F Salary NaN \n",
+ "4 OEMC P Hourly 20.0 \n",
+ "... ... ... ... ... \n",
+ "33178 POLICE F Salary NaN \n",
+ "33179 POLICE F Salary NaN \n",
+ "33180 POLICE F Salary NaN \n",
+ "33181 POLICE F Salary NaN \n",
+ "33182 DoIT F Salary NaN \n",
+ "\n",
+ " Annual Salary Hourly Rate \n",
+ "0 101442.0 NaN \n",
+ "1 94122.0 NaN \n",
+ "2 101592.0 NaN \n",
+ "3 110064.0 NaN \n",
+ "4 NaN 19.86 \n",
+ "... ... ... \n",
+ "33178 72510.0 NaN \n",
+ "33179 48078.0 NaN \n",
+ "33180 90024.0 NaN \n",
+ "33181 93354.0 NaN \n",
+ "33182 115932.0 NaN \n",
+ "\n",
+ "[33183 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 101,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "salaries"
]
},
{
@@ -175,14 +1036,43 @@
"The workers from the department with the most hourly workers have complained that their hourly wage is less than $35/hour. Using a one sample t-test, test this one-sided hypothesis at the 95% confidence level."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=-59075.69614118866, pvalue=1.0, df=8021)"
+ ]
+ },
+ "execution_count": 108,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# H0 = hourly rate =< 35\n",
+ "# H1 = hourly rate > 35\n",
+ "\n",
+ "alpha = 0.05\n",
+ "\n",
+ "workers_hourly = salaries[salaries[\"Salary or Hourly\"]== \"Hourly\"][\"Hourly Rate\"]\n",
+ "\n",
+ "\n",
+ "st.ttest_1samp(workers_hourly, len(workers_hourly), alternative = \"greater\")\n",
+ "\n",
+ "#st.ttest_1samp(salaries1, 86, alternative = \"less\")\n"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n",
- "\n"
+ "## P value is greater than alpha, so i can NOT reject H0."
]
},
{
@@ -206,11 +1096,30 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 114,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(32.52349834813791, 33.05361708442256)"
+ ]
+ },
+ "execution_count": 114,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
+ "\n",
+ "hourly_wage_mean = salaries[salaries[\"Salary or Hourly\"]==\"Hourly\"][\"Hourly Rate\"].mean()\n",
+ "\n",
+ "hourly_wage_std = salaries[salaries[\"Salary or Hourly\"]==\"Hourly\"][\"Hourly Rate\"].std()\n",
+ "\n",
+ "n = len(salaries[salaries[\"Salary or Hourly\"]==\"Hourly\"][\"Hourly Rate\"])\n",
+ "\n",
+ "\n",
+ "st.norm.interval(0.95, loc=hourly_wage_mean, scale = hourly_wage_std/np.sqrt(n))\n",
"\n"
]
},
@@ -223,12 +1132,74 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 130,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/ms/vrwr3_md2xl156mcvfmyskx80000gn/T/ipykernel_77248/4047882935.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
+ " salary_police_mean = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"] == \"POLICE\"][\"Annual Salary\"].mean()\n",
+ "/var/folders/ms/vrwr3_md2xl156mcvfmyskx80000gn/T/ipykernel_77248/4047882935.py:3: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
+ " salary_police_std = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"] == \"POLICE\"][\"Annual Salary\"].std()\n",
+ "/var/folders/ms/vrwr3_md2xl156mcvfmyskx80000gn/T/ipykernel_77248/4047882935.py:5: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
+ " n = len(salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"] == \"POLICE\"][\"Annual Salary\"])\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(86177.08425202279, 86795.74475424399)"
+ ]
+ },
+ "execution_count": 130,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "\n",
+ "salary_police_mean = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"] == \"POLICE\"][\"Annual Salary\"].mean()\n",
+ "\n",
+ "salary_police_std = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"] == \"POLICE\"][\"Annual Salary\"].std()\n",
+ "\n",
+ "n = len(salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"] == \"POLICE\"][\"Annual Salary\"])\n",
+ "\n",
+ "st.norm.interval(0.95, loc=salary_police_mean, scale = salary_police_std/np.sqrt(n))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 126,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/ms/vrwr3_md2xl156mcvfmyskx80000gn/T/ipykernel_77248/1646062075.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
+ " salary_police_mean = salaries[salaries[\"Department\"] == \"POLICE\"][salaries[\"Salary or Hourly\"] == \"Salary\"][\"Annual Salary\"].mean()\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "86486.41450313339"
+ ]
+ },
+ "execution_count": 126,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "## can i have two conditions on a boolean mask???? why is it giving this error???\n",
+ "\n",
+ "salary_police_mean = salaries[salaries[\"Department\"] == \"POLICE\"][\"Annual Salary\"].mean()\n",
+ "\n",
+ "salary_police_mean"
]
},
{
@@ -246,7 +1217,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 92,
"metadata": {},
"outputs": [],
"source": [
@@ -257,7 +1228,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -271,7 +1242,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.10.9"
}
},
"nbformat": 4,