diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 59b955a..628defd 100755
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,12 +12,13 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 61,
"metadata": {},
"outputs": [],
"source": [
"# import numpy and pandas\n",
- "\n"
+ "import numpy as np\n",
+ "import pandas as pd"
]
},
{
@@ -31,11 +32,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "# Your code here:\n",
+ "df = pd.read_csv('Current_Employee_Names__Salaries__and_Position_Titles.csv')"
]
},
{
@@ -47,12 +49,220 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 63,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Job Titles | \n",
+ " Department | \n",
+ " Full or Part-Time | \n",
+ " Salary or Hourly | \n",
+ " Typical Hours | \n",
+ " Annual Salary | \n",
+ " Hourly Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " AARON, JEFFERY M | \n",
+ " SERGEANT | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101442.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " AARON, KARINA | \n",
+ " POLICE OFFICER (ASSIGNED AS DETECTIVE) | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 94122.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AARON, KIMBERLEI R | \n",
+ " CHIEF CONTRACT EXPEDITER | \n",
+ " GENERAL SERVICES | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101592.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ABAD JR, VICENTE M | \n",
+ " CIVIL ENGINEER IV | \n",
+ " WATER MGMNT | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 110064.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ABASCAL, REECE E | \n",
+ " TRAFFIC CONTROL AIDE-HOURLY | \n",
+ " OEMC | \n",
+ " P | \n",
+ " Hourly | \n",
+ " 20.0 | \n",
+ " NaN | \n",
+ " 19.86 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 33178 | \n",
+ " ZYLINSKA, KATARZYNA | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 72510.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33179 | \n",
+ " ZYMANTAS, LAURA C | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 48078.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33180 | \n",
+ " ZYMANTAS, MARK E | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 90024.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33181 | \n",
+ " ZYRKOWSKI, CARLO E | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 93354.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33182 | \n",
+ " ZYSKOWSKI, DARIUSZ | \n",
+ " CHIEF DATA BASE ANALYST | \n",
+ " DoIT | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 115932.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
33183 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Job Titles \\\n",
+ "0 AARON, JEFFERY M SERGEANT \n",
+ "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n",
+ "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n",
+ "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n",
+ "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n",
+ "... ... ... \n",
+ "33178 ZYLINSKA, KATARZYNA POLICE OFFICER \n",
+ "33179 ZYMANTAS, LAURA C POLICE OFFICER \n",
+ "33180 ZYMANTAS, MARK E POLICE OFFICER \n",
+ "33181 ZYRKOWSKI, CARLO E POLICE OFFICER \n",
+ "33182 ZYSKOWSKI, DARIUSZ CHIEF DATA BASE ANALYST \n",
+ "\n",
+ " Department Full or Part-Time Salary or Hourly Typical Hours \\\n",
+ "0 POLICE F Salary NaN \n",
+ "1 POLICE F Salary NaN \n",
+ "2 GENERAL SERVICES F Salary NaN \n",
+ "3 WATER MGMNT F Salary NaN \n",
+ "4 OEMC P Hourly 20.0 \n",
+ "... ... ... ... ... \n",
+ "33178 POLICE F Salary NaN \n",
+ "33179 POLICE F Salary NaN \n",
+ "33180 POLICE F Salary NaN \n",
+ "33181 POLICE F Salary NaN \n",
+ "33182 DoIT F Salary NaN \n",
+ "\n",
+ " Annual Salary Hourly Rate \n",
+ "0 101442.0 NaN \n",
+ "1 94122.0 NaN \n",
+ "2 101592.0 NaN \n",
+ "3 110064.0 NaN \n",
+ "4 NaN 19.86 \n",
+ "... ... ... \n",
+ "33178 72510.0 NaN \n",
+ "33179 48078.0 NaN \n",
+ "33180 90024.0 NaN \n",
+ "33181 93354.0 NaN \n",
+ "33182 115932.0 NaN \n",
+ "\n",
+ "[33183 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 63,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "## Your code here:\n",
+ "df"
]
},
{
@@ -64,12 +274,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 64,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Name False\n",
+ "Job Titles False\n",
+ "Department False\n",
+ "Full or Part-Time False\n",
+ "Salary or Hourly False\n",
+ "Typical Hours True\n",
+ "Annual Salary True\n",
+ "Hourly Rate True\n",
+ "dtype: bool"
+ ]
+ },
+ "execution_count": 64,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "df.isnull().any()"
]
},
{
@@ -81,12 +310,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 65,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Salary or Hourly\n",
+ "Salary 25161\n",
+ "Hourly 8022\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "df[\"Salary or Hourly\"].value_counts()"
]
},
{
@@ -105,12 +348,61 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 66,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Department\n",
+ "POLICE 13414\n",
+ "FIRE 4641\n",
+ "STREETS & SAN 2198\n",
+ "OEMC 2102\n",
+ "WATER MGMNT 1879\n",
+ "AVIATION 1629\n",
+ "TRANSPORTN 1140\n",
+ "PUBLIC LIBRARY 1015\n",
+ "GENERAL SERVICES 980\n",
+ "FAMILY & SUPPORT 615\n",
+ "FINANCE 560\n",
+ "HEALTH 488\n",
+ "CITY COUNCIL 411\n",
+ "LAW 407\n",
+ "BUILDINGS 269\n",
+ "COMMUNITY DEVELOPMENT 207\n",
+ "BUSINESS AFFAIRS 171\n",
+ "COPA 116\n",
+ "BOARD OF ELECTION 107\n",
+ "DoIT 99\n",
+ "PROCUREMENT 92\n",
+ "INSPECTOR GEN 87\n",
+ "MAYOR'S OFFICE 85\n",
+ "CITY CLERK 84\n",
+ "ANIMAL CONTRL 81\n",
+ "HUMAN RESOURCES 79\n",
+ "CULTURAL AFFAIRS 65\n",
+ "BUDGET & MGMT 46\n",
+ "ADMIN HEARNG 39\n",
+ "DISABILITIES 28\n",
+ "TREASURER 22\n",
+ "HUMAN RELATIONS 16\n",
+ "BOARD OF ETHICS 8\n",
+ "POLICE BOARD 2\n",
+ "LICENSE APPL COMM 1\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "df[\"Department\"].value_counts()"
]
},
{
@@ -124,12 +416,128 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"# Your code here:\n",
- "\n"
+ "import scipy.stats as st\n",
+ "import numpy as np "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "##H0: mu hour rate =30/h\n",
+ "##H1: mu hour rate != 30/h "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Choose significance / confidence level\n",
+ "# significance level -> 5%\n",
+ "\n",
+ "alpha = 0.05"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2275 35.60\n",
+ "7836 36.21\n",
+ "10038 NaN\n",
+ "19748 NaN\n",
+ "22819 NaN\n",
+ "Name: Hourly Rate, dtype: float64"
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c_sample = df['Hourly Rate'].sample(1000)\n",
+ "c_sample.head()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5.791876996946556"
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mean = c_sample.mean()\n",
+ "std = c_sample.std(ddof=1)\n",
+ "#display(mean)\n",
+ "#display(std)\n",
+ "\n",
+ "stat = (mean - 30)/ (std/ np.sqrt(1000))\n",
+ "stat"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9.31861114265164e-09"
+ ]
+ },
+ "execution_count": 72,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "p_value = st.t.sf(abs(stat), 1000 -1) *2\n",
+ "p_value #if the mean is 17, it can happen 32% of the times."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 73,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "p_value < 0.05 #REJECT "
]
},
{
@@ -143,46 +551,238 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 74,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Job Titles | \n",
+ " Department | \n",
+ " Full or Part-Time | \n",
+ " Salary or Hourly | \n",
+ " Typical Hours | \n",
+ " Annual Salary | \n",
+ " Hourly Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 5371 | \n",
+ " COLLIER, DWAYNE A | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 90024.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 255 | \n",
+ " AHEARN, DORY E | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 93354.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 31888 | \n",
+ " WILLIAMS, ALEXIS | \n",
+ " POLICE ADMINISTRATIVE CLERK | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 40392.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4896 | \n",
+ " CHRISTIAN, JOHNNY R | \n",
+ " SERGEANT | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 107988.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 23497 | \n",
+ " POPELKA, ROBIN L | \n",
+ " POLICE OFFICER (ASGND AS MOUNTED PATROL OFFICER) | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 94524.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Job Titles \\\n",
+ "5371 COLLIER, DWAYNE A POLICE OFFICER \n",
+ "255 AHEARN, DORY E POLICE OFFICER \n",
+ "31888 WILLIAMS, ALEXIS POLICE ADMINISTRATIVE CLERK \n",
+ "4896 CHRISTIAN, JOHNNY R SERGEANT \n",
+ "23497 POPELKA, ROBIN L POLICE OFFICER (ASGND AS MOUNTED PATROL OFFICER) \n",
+ "\n",
+ " Department Full or Part-Time Salary or Hourly Typical Hours \\\n",
+ "5371 POLICE F Salary NaN \n",
+ "255 POLICE F Salary NaN \n",
+ "31888 POLICE F Salary NaN \n",
+ "4896 POLICE F Salary NaN \n",
+ "23497 POLICE F Salary NaN \n",
+ "\n",
+ " Annual Salary Hourly Rate \n",
+ "5371 90024.0 NaN \n",
+ "255 93354.0 NaN \n",
+ "31888 40392.0 NaN \n",
+ "4896 107988.0 NaN \n",
+ "23497 94524.0 NaN "
+ ]
+ },
+ "execution_count": 74,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "c2_sample = df[df['Department'] ==\"POLICE\"].sample(30)\n",
+ "c2_sample.head()"
]
},
{
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": 75,
"metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5371 90024.0\n",
+ "255 93354.0\n",
+ "31888 40392.0\n",
+ "4896 107988.0\n",
+ "23497 94524.0\n",
+ "Name: Annual Salary, dtype: float64"
+ ]
+ },
+ "execution_count": 75,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "Using the `crosstab` function, find the department that has the most hourly workers. "
+ "c2_sample_f = c2_sample['Annual Salary']\n",
+ "c2_sample_f.head()"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 76,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1.7895134031130342"
+ ]
+ },
+ "execution_count": 76,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "mean = c2_sample_f.mean()\n",
+ "std = c2_sample_f.std(ddof=1)\n",
+ "mu = 86000 # the mean given to us \n",
+ "\n",
+ "stat = (mean - 86000)/ (std/ np.sqrt(30))\n",
+ "stat"
]
},
{
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": 77,
"metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.08398139008129338"
+ ]
+ },
+ "execution_count": 77,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "The workers from the department with the most hourly workers have complained that their hourly wage is less than $35/hour. Using a one sample t-test, test this one-sided hypothesis at the 95% confidence level."
+ "p_value = st.t.sf(abs(stat), 30-1) *2\n",
+ "p_value #if the mean is 17, it can happen 32% of the times. "
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 78,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "False"
+ ]
+ },
+ "execution_count": 78,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "p_value < 0.05 #DON'T REJECT "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Using the `crosstab` function, find the department that has the most hourly workers. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The workers from the department with the most hourly workers have complained that their hourly wage is less than $35/hour. Using a one sample t-test, test this one-sided hypothesis at the 95% confidence level."
]
},
{
@@ -206,12 +806,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 90,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(32.52345834488425, 33.05365708767623)"
+ ]
+ },
+ "execution_count": 90,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "\n",
+ "import numpy as np\n",
+ "from scipy.stats import t\n",
+ "from scipy.stats import sem\n",
+ "\n",
+ "hourly_rate = df['Hourly Rate'].dropna()\n",
+ "\n",
+ "confidence_interval = st.t.interval(0.95, len(hourly_rate)-1, loc=hourly_rate.mean(), scale=st.sem(hourly_rate))\n",
+ "confidence_interval"
]
},
{
@@ -223,12 +842,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 97,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(9.31381234362183, 9.45418765637817)"
+ ]
+ },
+ "execution_count": 97,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Your code here:\n",
- "\n"
+ "hourly_rate_police = df.loc[df['Department'] == 'POLICE', 'Hourly Rate'].dropna()\n",
+ "\n",
+ "confidence_interval = st.t.interval(0.95, len(hourly_rate_police) - 1, loc=hourly_rate_police.mean(), scale=st.sem(hourly_rate_police))\n",
+ "confidence_interval"
]
},
{
@@ -257,7 +890,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -271,7 +904,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.11.5"
}
},
"nbformat": 4,