diff --git a/your-code/main.ipynb b/your-code/main.ipynb
old mode 100755
new mode 100644
index 59b955a..027f30a
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,12 +12,15 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
- "# import numpy and pandas\n",
- "\n"
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from scipy.stats import ttest_1samp\n",
+ "from scipy.stats import t\n",
+ "import statsmodels.api as sm"
]
},
{
@@ -31,11 +34,218 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Job Titles | \n",
+ " Department | \n",
+ " Full or Part-Time | \n",
+ " Salary or Hourly | \n",
+ " Typical Hours | \n",
+ " Annual Salary | \n",
+ " Hourly Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " AARON, JEFFERY M | \n",
+ " SERGEANT | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101442.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " AARON, KARINA | \n",
+ " POLICE OFFICER (ASSIGNED AS DETECTIVE) | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 94122.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AARON, KIMBERLEI R | \n",
+ " CHIEF CONTRACT EXPEDITER | \n",
+ " GENERAL SERVICES | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101592.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ABAD JR, VICENTE M | \n",
+ " CIVIL ENGINEER IV | \n",
+ " WATER MGMNT | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 110064.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ABASCAL, REECE E | \n",
+ " TRAFFIC CONTROL AIDE-HOURLY | \n",
+ " OEMC | \n",
+ " P | \n",
+ " Hourly | \n",
+ " 20.0 | \n",
+ " NaN | \n",
+ " 19.86 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 33178 | \n",
+ " ZYLINSKA, KATARZYNA | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 72510.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33179 | \n",
+ " ZYMANTAS, LAURA C | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 48078.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33180 | \n",
+ " ZYMANTAS, MARK E | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 90024.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33181 | \n",
+ " ZYRKOWSKI, CARLO E | \n",
+ " POLICE OFFICER | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 93354.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 33182 | \n",
+ " ZYSKOWSKI, DARIUSZ | \n",
+ " CHIEF DATA BASE ANALYST | \n",
+ " DoIT | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 115932.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
33183 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Job Titles \\\n",
+ "0 AARON, JEFFERY M SERGEANT \n",
+ "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n",
+ "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n",
+ "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n",
+ "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n",
+ "... ... ... \n",
+ "33178 ZYLINSKA, KATARZYNA POLICE OFFICER \n",
+ "33179 ZYMANTAS, LAURA C POLICE OFFICER \n",
+ "33180 ZYMANTAS, MARK E POLICE OFFICER \n",
+ "33181 ZYRKOWSKI, CARLO E POLICE OFFICER \n",
+ "33182 ZYSKOWSKI, DARIUSZ CHIEF DATA BASE ANALYST \n",
+ "\n",
+ " Department Full or Part-Time Salary or Hourly Typical Hours \\\n",
+ "0 POLICE F Salary NaN \n",
+ "1 POLICE F Salary NaN \n",
+ "2 GENERAL SERVICES F Salary NaN \n",
+ "3 WATER MGMNT F Salary NaN \n",
+ "4 OEMC P Hourly 20.0 \n",
+ "... ... ... ... ... \n",
+ "33178 POLICE F Salary NaN \n",
+ "33179 POLICE F Salary NaN \n",
+ "33180 POLICE F Salary NaN \n",
+ "33181 POLICE F Salary NaN \n",
+ "33182 DoIT F Salary NaN \n",
+ "\n",
+ " Annual Salary Hourly Rate \n",
+ "0 101442.0 NaN \n",
+ "1 94122.0 NaN \n",
+ "2 101592.0 NaN \n",
+ "3 110064.0 NaN \n",
+ "4 NaN 19.86 \n",
+ "... ... ... \n",
+ "33178 72510.0 NaN \n",
+ "33179 48078.0 NaN \n",
+ "33180 90024.0 NaN \n",
+ "33181 93354.0 NaN \n",
+ "33182 115932.0 NaN \n",
+ "\n",
+ "[33183 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n"
+ "data = pd.read_csv(\"Current_Employee_Names__Salaries__and_Position_Titles.csv\")\n",
+ "data\n"
]
},
{
@@ -47,12 +257,38 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Name Job Titles \\\n",
+ "0 AARON, JEFFERY M SERGEANT \n",
+ "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n",
+ "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n",
+ "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n",
+ "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n",
+ "\n",
+ " Department Full or Part-Time Salary or Hourly Typical Hours \\\n",
+ "0 POLICE F Salary NaN \n",
+ "1 POLICE F Salary NaN \n",
+ "2 GENERAL SERVICES F Salary NaN \n",
+ "3 WATER MGMNT F Salary NaN \n",
+ "4 OEMC P Hourly 20.0 \n",
+ "\n",
+ " Annual Salary Hourly Rate \n",
+ "0 101442.0 NaN \n",
+ "1 94122.0 NaN \n",
+ "2 101592.0 NaN \n",
+ "3 110064.0 NaN \n",
+ "4 NaN 19.86 \n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "print(data.head())"
]
},
{
@@ -64,12 +300,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Name 0\n",
+ "Job Titles 0\n",
+ "Department 0\n",
+ "Full or Part-Time 0\n",
+ "Salary or Hourly 0\n",
+ "Typical Hours 25161\n",
+ "Annual Salary 8022\n",
+ "Hourly Rate 25161\n",
+ "dtype: int64\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "missing_data = data.isnull().sum()\n",
+ "print(missing_data)"
]
},
{
@@ -81,12 +333,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "40.0 5833\n",
+ "20.0 1901\n",
+ "10.0 184\n",
+ "35.0 104\n",
+ "Name: Typical Hours, dtype: int64\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "employment_counts = data['Typical Hours'].value_counts()\n",
+ "print(employment_counts)"
]
},
{
@@ -105,12 +369,55 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "POLICE 13414\n",
+ "FIRE 4641\n",
+ "STREETS & SAN 2198\n",
+ "OEMC 2102\n",
+ "WATER MGMNT 1879\n",
+ "AVIATION 1629\n",
+ "TRANSPORTN 1140\n",
+ "PUBLIC LIBRARY 1015\n",
+ "GENERAL SERVICES 980\n",
+ "FAMILY & SUPPORT 615\n",
+ "FINANCE 560\n",
+ "HEALTH 488\n",
+ "CITY COUNCIL 411\n",
+ "LAW 407\n",
+ "BUILDINGS 269\n",
+ "COMMUNITY DEVELOPMENT 207\n",
+ "BUSINESS AFFAIRS 171\n",
+ "COPA 116\n",
+ "BOARD OF ELECTION 107\n",
+ "DoIT 99\n",
+ "PROCUREMENT 92\n",
+ "INSPECTOR GEN 87\n",
+ "MAYOR'S OFFICE 85\n",
+ "CITY CLERK 84\n",
+ "ANIMAL CONTRL 81\n",
+ "HUMAN RESOURCES 79\n",
+ "CULTURAL AFFAIRS 65\n",
+ "BUDGET & MGMT 46\n",
+ "ADMIN HEARNG 39\n",
+ "DISABILITIES 28\n",
+ "TREASURER 22\n",
+ "HUMAN RELATIONS 16\n",
+ "BOARD OF ETHICS 8\n",
+ "POLICE BOARD 2\n",
+ "LICENSE APPL COMM 1\n",
+ "Name: Department, dtype: int64\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "department_counts = data['Department'].value_counts()\n",
+ "print(department_counts)"
]
},
{
@@ -124,12 +431,33 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Null hypothesis rejected: Hourly wage is significantly different from $30/hr.\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "hourly_wage_data = data[data['Salary or Hourly'] == 'Hourly']\n",
+ "\n",
+ "hourly_wage_data = hourly_wage_data.dropna(subset=['Hourly Rate'])\n",
+ "\n",
+ "hourly_wage_mean = hourly_wage_data['Hourly Rate'].mean()\n",
+ "population_mean = 30 \n",
+ "alpha = 0.05 \n",
+ "\n",
+ "t_statistic, p_value = ttest_1samp(hourly_wage_data['Hourly Rate'], population_mean)\n",
+ "\n",
+ "\n",
+ "if p_value < alpha:\n",
+ " print(\"Null hypothesis rejected: Hourly wage is significantly different from $30/hr.\")\n",
+ "else:\n",
+ " print(\"Null hypothesis cannot be rejected: Hourly wage is not significantly different from $30/hr.\")"
]
},
{
@@ -143,12 +471,36 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Null hypothesis rejected: Police salaries are higher than last year's mean.\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "police_salary_data = data[(data['Salary or Hourly'] == 'Salary') & (data['Department'] == 'POLICE')]\n",
+ "\n",
+ "police_salary_data = police_salary_data.dropna(subset=['Annual Salary'])\n",
+ "\n",
+ "police_salary_mean = police_salary_data['Annual Salary'].mean()\n",
+ "population_mean = 86000 \n",
+ "alpha = 0.05 \n",
+ "\n",
+ "t_statistic, p_value = ttest_1samp(police_salary_data['Annual Salary'], population_mean)\n",
+ "\n",
+ "\n",
+ "p_value /= 2\n",
+ "\n",
+ "\n",
+ "if p_value < alpha and t_statistic > 0:\n",
+ " print(\"Null hypothesis rejected: Police salaries are higher than last year's mean.\")\n",
+ "else:\n",
+ " print(\"Null hypothesis cannot be rejected: Police salaries are not higher than last year's mean.\")"
]
},
{
@@ -160,29 +512,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Department with the most hourly workers: STREETS & SAN\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The workers from the department with the most hourly workers have complained that their hourly wage is less than $35/hour. Using a one sample t-test, test this one-sided hypothesis at the 95% confidence level."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Your code here:\n",
- "\n"
+ "department_hourly_cross = pd.crosstab(data['Department'], data['Salary or Hourly'])\n",
+ "department_with_most_hourly = department_hourly_cross['Hourly'].idxmax()\n",
+ "print(\"Department with the most hourly workers:\", department_with_most_hourly)"
]
},
{
@@ -206,12 +550,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "95% Confidence Interval for Mean Hourly Wage: (32.52345834488425, 33.05365708767623)\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "mean_hourly_wage = hourly_wage_data['Hourly Rate'].mean()\n",
+ "std_error = np.std(hourly_wage_data['Hourly Rate'], ddof=1) / np.sqrt(len(hourly_wage_data))\n",
+ "confidence_level = 0.95\n",
+ "degrees_of_freedom = len(hourly_wage_data) - 1\n",
+ "conf_interval = t.interval(confidence_level, degrees_of_freedom, mean_hourly_wage, std_error)\n",
+ "print(\"95% Confidence Interval for Mean Hourly Wage:\", conf_interval)"
]
},
{
@@ -223,12 +579,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 22,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "95% Confidence Interval for Mean Annual Salary of Police Salaried Employees: (86177.05631531785, 86795.77269094893)\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "police_salary_data = data[(data['Salary or Hourly'] == 'Salary') & (data['Department'] == 'POLICE')]\n",
+ "police_salary_data = police_salary_data.dropna(subset=['Annual Salary'])\n",
+ "mean_annual_salary = police_salary_data['Annual Salary'].mean()\n",
+ "std_error = np.std(police_salary_data['Annual Salary'], ddof=1) / np.sqrt(len(police_salary_data))\n",
+ "confidence_level = 0.95\n",
+ "degrees_of_freedom = len(police_salary_data) - 1\n",
+ "conf_interval = t.interval(confidence_level, degrees_of_freedom, mean_annual_salary, std_error)\n",
+ "print(\"95% Confidence Interval for Mean Annual Salary of Police Salaried Employees:\", conf_interval)"
]
},
{
@@ -246,18 +616,37 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Null hypothesis rejected: The proportion of hourly workers is significantly different from 25%.\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "num_hourly_workers = len(data[data['Salary or Hourly'] == 'Hourly'])\n",
+ "total_employees = len(data)\n",
+ "proportion_hourly = num_hourly_workers / total_employees\n",
+ "null_proportion = 0.25 # 25%\n",
+ "confidence_level = 0.95\n",
+ "\n",
+ "\n",
+ "z_statistic, p_value = sm.stats.proportions_ztest(num_hourly_workers, total_employees, null_proportion, alternative='two-sided')\n",
+ "alpha = 1 - confidence_level\n",
+ "if p_value < alpha:\n",
+ " print(\"Null hypothesis rejected: The proportion of hourly workers is significantly different from 25%.\")\n",
+ "else:\n",
+ " print(\"Null hypothesis cannot be rejected: The proportion of hourly workers is not significantly different from 25%.\")"
]
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -271,7 +660,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.10.9"
}
},
"nbformat": 4,