diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 59b955a..4db305b 100755
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,11 +12,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
- "# import numpy and pandas\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
"\n"
]
},
@@ -31,11 +32,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "salaries = pd.read_csv(\"Current_Employee_Names__Salaries__and_Position_Titles.csv\")"
]
},
{
@@ -47,12 +48,130 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Job Titles | \n",
+ " Department | \n",
+ " Full or Part-Time | \n",
+ " Salary or Hourly | \n",
+ " Typical Hours | \n",
+ " Annual Salary | \n",
+ " Hourly Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " AARON, JEFFERY M | \n",
+ " SERGEANT | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101442.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " AARON, KARINA | \n",
+ " POLICE OFFICER (ASSIGNED AS DETECTIVE) | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 94122.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AARON, KIMBERLEI R | \n",
+ " CHIEF CONTRACT EXPEDITER | \n",
+ " GENERAL SERVICES | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101592.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ABAD JR, VICENTE M | \n",
+ " CIVIL ENGINEER IV | \n",
+ " WATER MGMNT | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 110064.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ABASCAL, REECE E | \n",
+ " TRAFFIC CONTROL AIDE-HOURLY | \n",
+ " OEMC | \n",
+ " P | \n",
+ " Hourly | \n",
+ " 20.0 | \n",
+ " NaN | \n",
+ " 19.86 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Job Titles \\\n",
+ "0 AARON, JEFFERY M SERGEANT \n",
+ "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n",
+ "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n",
+ "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n",
+ "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n",
+ "\n",
+ " Department Full or Part-Time Salary or Hourly Typical Hours \\\n",
+ "0 POLICE F Salary NaN \n",
+ "1 POLICE F Salary NaN \n",
+ "2 GENERAL SERVICES F Salary NaN \n",
+ "3 WATER MGMNT F Salary NaN \n",
+ "4 OEMC P Hourly 20.0 \n",
+ "\n",
+ " Annual Salary Hourly Rate \n",
+ "0 101442.0 NaN \n",
+ "1 94122.0 NaN \n",
+ "2 101592.0 NaN \n",
+ "3 110064.0 NaN \n",
+ "4 NaN 19.86 "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "salaries.head()"
]
},
{
@@ -64,12 +183,30 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Name 0\n",
+ "Job Titles 0\n",
+ "Department 0\n",
+ "Full or Part-Time 0\n",
+ "Salary or Hourly 0\n",
+ "Typical Hours 25161\n",
+ "Annual Salary 8022\n",
+ "Hourly Rate 25161\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "salaries.isna().sum()"
]
},
{
@@ -81,12 +218,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Salary 25161\n",
+ "Hourly 8022\n",
+ "Name: Salary or Hourly, dtype: int64"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "salaries[\"Salary or Hourly\"].value_counts()"
]
},
{
@@ -105,12 +254,238 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ "
\n",
+ " \n",
+ " | \n",
+ " count | \n",
+ "
\n",
+ " \n",
+ " | Department | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | ADMIN HEARNG | \n",
+ " 39 | \n",
+ "
\n",
+ " \n",
+ " | ANIMAL CONTRL | \n",
+ " 81 | \n",
+ "
\n",
+ " \n",
+ " | AVIATION | \n",
+ " 1629 | \n",
+ "
\n",
+ " \n",
+ " | BOARD OF ELECTION | \n",
+ " 107 | \n",
+ "
\n",
+ " \n",
+ " | BOARD OF ETHICS | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " | BUDGET & MGMT | \n",
+ " 46 | \n",
+ "
\n",
+ " \n",
+ " | BUILDINGS | \n",
+ " 269 | \n",
+ "
\n",
+ " \n",
+ " | BUSINESS AFFAIRS | \n",
+ " 171 | \n",
+ "
\n",
+ " \n",
+ " | CITY CLERK | \n",
+ " 84 | \n",
+ "
\n",
+ " \n",
+ " | CITY COUNCIL | \n",
+ " 411 | \n",
+ "
\n",
+ " \n",
+ " | COMMUNITY DEVELOPMENT | \n",
+ " 207 | \n",
+ "
\n",
+ " \n",
+ " | COPA | \n",
+ " 116 | \n",
+ "
\n",
+ " \n",
+ " | CULTURAL AFFAIRS | \n",
+ " 65 | \n",
+ "
\n",
+ " \n",
+ " | DISABILITIES | \n",
+ " 28 | \n",
+ "
\n",
+ " \n",
+ " | DoIT | \n",
+ " 99 | \n",
+ "
\n",
+ " \n",
+ " | FAMILY & SUPPORT | \n",
+ " 615 | \n",
+ "
\n",
+ " \n",
+ " | FINANCE | \n",
+ " 560 | \n",
+ "
\n",
+ " \n",
+ " | FIRE | \n",
+ " 4641 | \n",
+ "
\n",
+ " \n",
+ " | GENERAL SERVICES | \n",
+ " 980 | \n",
+ "
\n",
+ " \n",
+ " | HEALTH | \n",
+ " 488 | \n",
+ "
\n",
+ " \n",
+ " | HUMAN RELATIONS | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ " | HUMAN RESOURCES | \n",
+ " 79 | \n",
+ "
\n",
+ " \n",
+ " | INSPECTOR GEN | \n",
+ " 87 | \n",
+ "
\n",
+ " \n",
+ " | LAW | \n",
+ " 407 | \n",
+ "
\n",
+ " \n",
+ " | LICENSE APPL COMM | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | MAYOR'S OFFICE | \n",
+ " 85 | \n",
+ "
\n",
+ " \n",
+ " | OEMC | \n",
+ " 2102 | \n",
+ "
\n",
+ " \n",
+ " | POLICE | \n",
+ " 13414 | \n",
+ "
\n",
+ " \n",
+ " | POLICE BOARD | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | PROCUREMENT | \n",
+ " 92 | \n",
+ "
\n",
+ " \n",
+ " | PUBLIC LIBRARY | \n",
+ " 1015 | \n",
+ "
\n",
+ " \n",
+ " | STREETS & SAN | \n",
+ " 2198 | \n",
+ "
\n",
+ " \n",
+ " | TRANSPORTN | \n",
+ " 1140 | \n",
+ "
\n",
+ " \n",
+ " | TREASURER | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " | WATER MGMNT | \n",
+ " 1879 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name\n",
+ " count\n",
+ "Department \n",
+ "ADMIN HEARNG 39\n",
+ "ANIMAL CONTRL 81\n",
+ "AVIATION 1629\n",
+ "BOARD OF ELECTION 107\n",
+ "BOARD OF ETHICS 8\n",
+ "BUDGET & MGMT 46\n",
+ "BUILDINGS 269\n",
+ "BUSINESS AFFAIRS 171\n",
+ "CITY CLERK 84\n",
+ "CITY COUNCIL 411\n",
+ "COMMUNITY DEVELOPMENT 207\n",
+ "COPA 116\n",
+ "CULTURAL AFFAIRS 65\n",
+ "DISABILITIES 28\n",
+ "DoIT 99\n",
+ "FAMILY & SUPPORT 615\n",
+ "FINANCE 560\n",
+ "FIRE 4641\n",
+ "GENERAL SERVICES 980\n",
+ "HEALTH 488\n",
+ "HUMAN RELATIONS 16\n",
+ "HUMAN RESOURCES 79\n",
+ "INSPECTOR GEN 87\n",
+ "LAW 407\n",
+ "LICENSE APPL COMM 1\n",
+ "MAYOR'S OFFICE 85\n",
+ "OEMC 2102\n",
+ "POLICE 13414\n",
+ "POLICE BOARD 2\n",
+ "PROCUREMENT 92\n",
+ "PUBLIC LIBRARY 1015\n",
+ "STREETS & SAN 2198\n",
+ "TRANSPORTN 1140\n",
+ "TREASURER 22\n",
+ "WATER MGMNT 1879"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "salaries.groupby([\"Department\"]).agg({\"Name\":[\"count\"]})"
]
},
{
@@ -122,14 +497,60 @@
"In this section of the lab, we will test whether the hourly wage of all hourly workers is significantly different from $30/hr. Import the correct one sample test function from scipy and perform the hypothesis test for a 95% two sided confidence interval."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import scipy.stats as st"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4.3230240486229894e-92"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Set the hypothesis \n",
+ "\n",
+ "### wage of all workers is equal equal to 30\n",
+ "\n",
+ "### H0: mu mean wage = 30\n",
+ "### H1: mu mean wage != 30\n",
+ "\n",
+ "\n",
+ "# 2. choose a significance level\n",
+ "alpha = 0.05\n",
+ "\n",
+ "# 3. define a sample\n",
+ "sample = salaries[\"Hourly Rate\"]\n",
+ "sample.dropna(inplace = True)\n",
+ "\n",
+ "#4. run it \n",
+ "\n",
+ "output = st.ttest_1samp(sample, 30)\n",
+ "output[1]"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n",
- "\n"
+ "# denying the 0 hypothesis bececause p value < alpha"
]
},
{
@@ -141,14 +562,98 @@
"Hint: A one tailed test has a p-value that is half of the two tailed p-value. If our hypothesis is greater than, then to reject, the test statistic must also be positive."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "10\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(13414,)"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Set hypothesis\n",
+ "\n",
+ "### H0: mu mean wage police < 86.000\n",
+ "### H1: mu mean wage police >= 86.000\n",
+ "\n",
+ "# 2. choose a significance \n",
+ "\n",
+ "alpha = 0.05\n",
+ "\n",
+ "#sample\n",
+ "\n",
+ "police = salaries[salaries[\"Department\"] == \"POLICE\"]\n",
+ "sample = police[\"Annual Salary\"]\n",
+ "print(sample.isna().sum())\n",
+ "sample.shape\n",
+ "\n",
+ "# 10 values needing to be dropped\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/bk/02hh56sx1b7622qtqhym98tc0000gn/T/ipykernel_44494/1847533583.py:1: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " sample.dropna(inplace = True)\n"
+ ]
+ }
+ ],
+ "source": [
+ "sample.dropna(inplace = True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=3.081997005712994, pvalue=0.0010301701775482577, df=13403)"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "st.ttest_1samp(sample, 86000, alternative = \"greater\")"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n",
- "\n"
+ "# can deny 0 hypothesis which was the salary beeing lower than 86.000, \n",
+ "# police chief is right"
]
},
{
@@ -158,14 +663,280 @@
"Using the `crosstab` function, find the department that has the most hourly workers. "
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | Salary or Hourly | \n",
+ " Hourly | \n",
+ " Salary | \n",
+ "
\n",
+ " \n",
+ " | Department | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | ADMIN HEARNG | \n",
+ " 0 | \n",
+ " 39 | \n",
+ "
\n",
+ " \n",
+ " | ANIMAL CONTRL | \n",
+ " 19 | \n",
+ " 62 | \n",
+ "
\n",
+ " \n",
+ " | AVIATION | \n",
+ " 1082 | \n",
+ " 547 | \n",
+ "
\n",
+ " \n",
+ " | BOARD OF ELECTION | \n",
+ " 0 | \n",
+ " 107 | \n",
+ "
\n",
+ " \n",
+ " | BOARD OF ETHICS | \n",
+ " 0 | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " | BUDGET & MGMT | \n",
+ " 2 | \n",
+ " 44 | \n",
+ "
\n",
+ " \n",
+ " | BUILDINGS | \n",
+ " 0 | \n",
+ " 269 | \n",
+ "
\n",
+ " \n",
+ " | BUSINESS AFFAIRS | \n",
+ " 7 | \n",
+ " 164 | \n",
+ "
\n",
+ " \n",
+ " | CITY CLERK | \n",
+ " 0 | \n",
+ " 84 | \n",
+ "
\n",
+ " \n",
+ " | CITY COUNCIL | \n",
+ " 64 | \n",
+ " 347 | \n",
+ "
\n",
+ " \n",
+ " | COMMUNITY DEVELOPMENT | \n",
+ " 4 | \n",
+ " 203 | \n",
+ "
\n",
+ " \n",
+ " | COPA | \n",
+ " 0 | \n",
+ " 116 | \n",
+ "
\n",
+ " \n",
+ " | CULTURAL AFFAIRS | \n",
+ " 7 | \n",
+ " 58 | \n",
+ "
\n",
+ " \n",
+ " | DISABILITIES | \n",
+ " 0 | \n",
+ " 28 | \n",
+ "
\n",
+ " \n",
+ " | DoIT | \n",
+ " 0 | \n",
+ " 99 | \n",
+ "
\n",
+ " \n",
+ " | FAMILY & SUPPORT | \n",
+ " 287 | \n",
+ " 328 | \n",
+ "
\n",
+ " \n",
+ " | FINANCE | \n",
+ " 44 | \n",
+ " 516 | \n",
+ "
\n",
+ " \n",
+ " | FIRE | \n",
+ " 2 | \n",
+ " 4639 | \n",
+ "
\n",
+ " \n",
+ " | GENERAL SERVICES | \n",
+ " 765 | \n",
+ " 215 | \n",
+ "
\n",
+ " \n",
+ " | HEALTH | \n",
+ " 3 | \n",
+ " 485 | \n",
+ "
\n",
+ " \n",
+ " | HUMAN RELATIONS | \n",
+ " 0 | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ " | HUMAN RESOURCES | \n",
+ " 4 | \n",
+ " 75 | \n",
+ "
\n",
+ " \n",
+ " | INSPECTOR GEN | \n",
+ " 0 | \n",
+ " 87 | \n",
+ "
\n",
+ " \n",
+ " | LAW | \n",
+ " 40 | \n",
+ " 367 | \n",
+ "
\n",
+ " \n",
+ " | LICENSE APPL COMM | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | MAYOR'S OFFICE | \n",
+ " 8 | \n",
+ " 77 | \n",
+ "
\n",
+ " \n",
+ " | OEMC | \n",
+ " 1273 | \n",
+ " 829 | \n",
+ "
\n",
+ " \n",
+ " | POLICE | \n",
+ " 10 | \n",
+ " 13404 | \n",
+ "
\n",
+ " \n",
+ " | POLICE BOARD | \n",
+ " 0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | PROCUREMENT | \n",
+ " 2 | \n",
+ " 90 | \n",
+ "
\n",
+ " \n",
+ " | PUBLIC LIBRARY | \n",
+ " 299 | \n",
+ " 716 | \n",
+ "
\n",
+ " \n",
+ " | STREETS & SAN | \n",
+ " 1862 | \n",
+ " 336 | \n",
+ "
\n",
+ " \n",
+ " | TRANSPORTN | \n",
+ " 725 | \n",
+ " 415 | \n",
+ "
\n",
+ " \n",
+ " | TREASURER | \n",
+ " 0 | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " | WATER MGMNT | \n",
+ " 1513 | \n",
+ " 366 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Salary or Hourly Hourly Salary\n",
+ "Department \n",
+ "ADMIN HEARNG 0 39\n",
+ "ANIMAL CONTRL 19 62\n",
+ "AVIATION 1082 547\n",
+ "BOARD OF ELECTION 0 107\n",
+ "BOARD OF ETHICS 0 8\n",
+ "BUDGET & MGMT 2 44\n",
+ "BUILDINGS 0 269\n",
+ "BUSINESS AFFAIRS 7 164\n",
+ "CITY CLERK 0 84\n",
+ "CITY COUNCIL 64 347\n",
+ "COMMUNITY DEVELOPMENT 4 203\n",
+ "COPA 0 116\n",
+ "CULTURAL AFFAIRS 7 58\n",
+ "DISABILITIES 0 28\n",
+ "DoIT 0 99\n",
+ "FAMILY & SUPPORT 287 328\n",
+ "FINANCE 44 516\n",
+ "FIRE 2 4639\n",
+ "GENERAL SERVICES 765 215\n",
+ "HEALTH 3 485\n",
+ "HUMAN RELATIONS 0 16\n",
+ "HUMAN RESOURCES 4 75\n",
+ "INSPECTOR GEN 0 87\n",
+ "LAW 40 367\n",
+ "LICENSE APPL COMM 0 1\n",
+ "MAYOR'S OFFICE 8 77\n",
+ "OEMC 1273 829\n",
+ "POLICE 10 13404\n",
+ "POLICE BOARD 0 2\n",
+ "PROCUREMENT 2 90\n",
+ "PUBLIC LIBRARY 299 716\n",
+ "STREETS & SAN 1862 336\n",
+ "TRANSPORTN 725 415\n",
+ "TREASURER 0 22\n",
+ "WATER MGMNT 1513 366"
+ ]
+ },
+ "execution_count": 73,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "cross_table = pd.crosstab(salaries['Department'], salaries['Salary or Hourly'])\n",
+ "cross_table"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n",
- "\n"
+ "#STRETS & SAN"
]
},
{
@@ -175,14 +946,86 @@
"The workers from the department with the most hourly workers have complained that their hourly wage is less than $35/hour. Using a one sample t-test, test this one-sided hypothesis at the 95% confidence level."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Nulls: 25161\n",
+ "Total: 33183\n"
+ ]
+ }
+ ],
+ "source": [
+ "salaries[salaries[\"Department\"] == \"STREETS & SAN\"]\n",
+ "sample = salaries[\"Hourly Rate\"]\n",
+ "print(\"Nulls: \", sample.isnull().sum())\n",
+ "print(\"Total: \", sample.shape[0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sample.dropna(inplace = True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Nulls: 0\n",
+ "Total: 8022\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Nulls: \", sample.isnull().sum())\n",
+ "print(\"Total: \", sample.shape[0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=-16.352363851001144, pvalue=1.8818444649254009e-59, df=8021)"
+ ]
+ },
+ "execution_count": 85,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# H0 = hourly wage >=35\n",
+ "# H1 = hourly wage < 35\n",
+ "\n",
+ "\n",
+ "st.ttest_1samp(sample, 35, alternative = \"less\")"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n",
- "\n"
+ "# rejecting H0, wage is not greater than 35"
]
},
{
@@ -204,14 +1047,77 @@
"To compute the confidence interval of the hourly wage, use the 0.95 for the confidence level, number of rows - 1 for degrees of freedom, the mean of the sample for the location parameter and the standard error for the scale. The standard error can be computed using [this](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.sem.html) function in SciPy."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 99,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "nulls: 25161\n",
+ "totals: 33183\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "12.11257268427681"
+ ]
+ },
+ "execution_count": 99,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sample = salaries[\"Hourly Rate\"]\n",
+ "print(\"nulls: \",sample.isnull().sum())\n",
+ "print(\"totals: \",sample.shape[0])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 100,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sample.dropna(inplace = True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 105,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(32.52345834488425, 33.05365708767623)"
+ ]
+ },
+ "execution_count": 105,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "confidence = 0.95\n",
+ "std = sample.std()\n",
+ "mean = sample.mean()\n",
+ "n = len(sample)\n",
+ "\n",
+ "st.t.interval(confidence, n-1, loc = mean, scale = std/np.sqrt(n))"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n",
- "\n"
+ "# the true mean wage for workers is between 32.5 and 33,05"
]
},
{
@@ -223,12 +1129,112 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 107,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "10\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "(13414,)"
+ ]
+ },
+ "execution_count": 107,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# creating and cleaning a sample\n",
+ "\n",
+ "police = salaries[salaries[\"Department\"] == \"POLICE\"]\n",
+ "sample = police[\"Annual Salary\"]\n",
+ "print(sample.isna().sum())\n",
+ "sample.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/bk/02hh56sx1b7622qtqhym98tc0000gn/T/ipykernel_44494/345795188.py:1: SettingWithCopyWarning: \n",
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
+ "\n",
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+ " sample.dropna(inplace = True)\n"
+ ]
+ }
+ ],
+ "source": [
+ "sample.dropna(inplace = True)\n",
+ "print(sample.isna().sum())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 110,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(86177.05631531785, 86795.77269094893)"
+ ]
+ },
+ "execution_count": 110,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "confidence = 0.95\n",
+ "std = sample.std()\n",
+ "mean = sample.mean()\n",
+ "n = len(sample)\n",
+ "\n",
+ "\n",
+ "\n",
+ "interval = st.t.interval(confidence, n-1, loc = mean, scale = std/np.sqrt(n))\n",
+ "interval "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 113,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The true mean of police anual wages is between 86177.06 and 86795.77.\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f\"The true mean of police anual wages is between {round(interval[0],2)} and {round(interval[1], 2)}.\")"
]
},
{
@@ -257,7 +1263,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -271,7 +1277,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.11.4"
}
},
"nbformat": 4,