diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 59b955a..ed75467 100755
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,12 +12,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"# import numpy and pandas\n",
- "\n"
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import scipy.stats as st"
]
},
{
@@ -31,11 +33,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "salaries = pd.read_csv('Current_Employee_Names__Salaries__and_Position_Titles.csv')"
]
},
{
@@ -47,12 +49,130 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Job Titles | \n",
+ " Department | \n",
+ " Full or Part-Time | \n",
+ " Salary or Hourly | \n",
+ " Typical Hours | \n",
+ " Annual Salary | \n",
+ " Hourly Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " AARON, JEFFERY M | \n",
+ " SERGEANT | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101442.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " AARON, KARINA | \n",
+ " POLICE OFFICER (ASSIGNED AS DETECTIVE) | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 94122.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AARON, KIMBERLEI R | \n",
+ " CHIEF CONTRACT EXPEDITER | \n",
+ " GENERAL SERVICES | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101592.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ABAD JR, VICENTE M | \n",
+ " CIVIL ENGINEER IV | \n",
+ " WATER MGMNT | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 110064.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ABASCAL, REECE E | \n",
+ " TRAFFIC CONTROL AIDE-HOURLY | \n",
+ " OEMC | \n",
+ " P | \n",
+ " Hourly | \n",
+ " 20.0 | \n",
+ " NaN | \n",
+ " 19.86 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Job Titles \\\n",
+ "0 AARON, JEFFERY M SERGEANT \n",
+ "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n",
+ "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n",
+ "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n",
+ "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n",
+ "\n",
+ " Department Full or Part-Time Salary or Hourly Typical Hours \\\n",
+ "0 POLICE F Salary NaN \n",
+ "1 POLICE F Salary NaN \n",
+ "2 GENERAL SERVICES F Salary NaN \n",
+ "3 WATER MGMNT F Salary NaN \n",
+ "4 OEMC P Hourly 20.0 \n",
+ "\n",
+ " Annual Salary Hourly Rate \n",
+ "0 101442.0 NaN \n",
+ "1 94122.0 NaN \n",
+ "2 101592.0 NaN \n",
+ "3 110064.0 NaN \n",
+ "4 NaN 19.86 "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "salaries.head()"
]
},
{
@@ -64,12 +184,204 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Job Titles | \n",
+ " Department | \n",
+ " Full or Part-Time | \n",
+ " Salary or Hourly | \n",
+ " Typical Hours | \n",
+ " Annual Salary | \n",
+ " Hourly Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 33178 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 33179 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 33180 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 33181 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " | 33182 | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " False | \n",
+ " True | \n",
+ " False | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
33183 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Job Titles Department Full or Part-Time Salary or Hourly \\\n",
+ "0 False False False False False \n",
+ "1 False False False False False \n",
+ "2 False False False False False \n",
+ "3 False False False False False \n",
+ "4 False False False False False \n",
+ "... ... ... ... ... ... \n",
+ "33178 False False False False False \n",
+ "33179 False False False False False \n",
+ "33180 False False False False False \n",
+ "33181 False False False False False \n",
+ "33182 False False False False False \n",
+ "\n",
+ " Typical Hours Annual Salary Hourly Rate \n",
+ "0 True False True \n",
+ "1 True False True \n",
+ "2 True False True \n",
+ "3 True False True \n",
+ "4 False True False \n",
+ "... ... ... ... \n",
+ "33178 True False True \n",
+ "33179 True False True \n",
+ "33180 True False True \n",
+ "33181 True False True \n",
+ "33182 True False True \n",
+ "\n",
+ "[33183 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "salaries.isnull()"
]
},
{
@@ -81,12 +393,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "the percentage of hourly employs is 0.24175029382515142\n",
+ "the percentage of salary employs is 0.7582497061748485\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "num_salary = salaries['Salary or Hourly'].value_counts()[0]\n",
+ "num_hourly = salaries['Salary or Hourly'].value_counts()[1]\n",
+ "\n",
+ "print(f'the percentage of hourly employs is {num_hourly/(num_hourly + num_salary)}')\n",
+ "print(f'the percentage of salary employs is {num_salary/(num_hourly + num_salary)}')"
]
},
{
@@ -105,12 +429,58 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 26,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Department\n",
+ "POLICE 13414\n",
+ "FIRE 4641\n",
+ "STREETS & SAN 2198\n",
+ "OEMC 2102\n",
+ "WATER MGMNT 1879\n",
+ "AVIATION 1629\n",
+ "TRANSPORTN 1140\n",
+ "PUBLIC LIBRARY 1015\n",
+ "GENERAL SERVICES 980\n",
+ "FAMILY & SUPPORT 615\n",
+ "FINANCE 560\n",
+ "HEALTH 488\n",
+ "CITY COUNCIL 411\n",
+ "LAW 407\n",
+ "BUILDINGS 269\n",
+ "COMMUNITY DEVELOPMENT 207\n",
+ "BUSINESS AFFAIRS 171\n",
+ "COPA 116\n",
+ "BOARD OF ELECTION 107\n",
+ "DoIT 99\n",
+ "PROCUREMENT 92\n",
+ "INSPECTOR GEN 87\n",
+ "MAYOR'S OFFICE 85\n",
+ "CITY CLERK 84\n",
+ "ANIMAL CONTRL 81\n",
+ "HUMAN RESOURCES 79\n",
+ "CULTURAL AFFAIRS 65\n",
+ "BUDGET & MGMT 46\n",
+ "ADMIN HEARNG 39\n",
+ "DISABILITIES 28\n",
+ "TREASURER 22\n",
+ "HUMAN RELATIONS 16\n",
+ "BOARD OF ETHICS 8\n",
+ "POLICE BOARD 2\n",
+ "LICENSE APPL COMM 1\n",
+ "Name: count, dtype: int64"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "salaries['Department'].value_counts()"
]
},
{
@@ -124,12 +494,40 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 91,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "34.527\n",
+ "our hypothesis wasn't correct. p-values is 0.01742522954491291\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "# filtering for hourly workers\n",
+ "hourly_workers = salaries[salaries['Salary or Hourly']=='Hourly']\n",
+ "\n",
+ "# sampling for n = 50\n",
+ "n = 50\n",
+ "hourly_workers_sample = hourly_workers.sample(50)\n",
+ "hourly_workers_sample.head()\n",
+ "\n",
+ "#setting the test\n",
+ "mean = hourly_workers_sample['Hourly Rate'].mean()\n",
+ "std = hourly_workers_sample['Hourly Rate'].std(ddof = 1)\n",
+ "alpha = 0.05\n",
+ "mu = 30\n",
+ "stat = (mean - mu) / (std/np.sqrt(n))\n",
+ "\n",
+ "#get p-value\n",
+ "p_value = st.t.sf(abs(stat), n-1) * 2\n",
+ "if p_value > alpha:\n",
+ " print(f'our hypothesis is correct. p-values is {p_value}')\n",
+ "else:\n",
+ " print(f\"our hypothesis wasn't correct. p-values is {p_value}\")"
]
},
{
@@ -143,12 +541,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 58,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "84330.36\n",
+ "our hypothesis wasn't correct. p-values is 0.5550077500925089\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "# filtering for hourly workers\n",
+ "salary_workers = salaries[salaries['Salary or Hourly']=='Salary']\n",
+ "\n",
+ "# sampling for n = 50\n",
+ "n = 50\n",
+ "salary_workers_sample = salary_workers.sample(n)\n",
+ "salary_workers_sample.head()\n",
+ "\n",
+ "#setting the test\n",
+ "mean = salary_workers_sample['Annual Salary'].mean()\n",
+ "std = salary_workers_sample['Annual Salary'].std(ddof = 1)\n",
+ "alpha = 0.05\n",
+ "mu = 86000\n",
+ "stat = (mean - mu) / (std/np.sqrt(n))\n",
+ "\n",
+ "#get p-value\n",
+ "p_value = st.t.sf(abs(stat), n-1) * 2\n",
+ "p_value\n",
+ "\n",
+ "if p_value > alpha:\n",
+ " print(f'our hypothesis is correct. p-values is {p_value}')\n",
+ "else:\n",
+ " print(f\"our hypothesis wasn't correct. p-values is {p_value}\")"
]
},
{
@@ -160,12 +588,30 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 86,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "the dpt with more hourly workers is STREETS & SAN\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "#filtering df foi hourly workers\n",
+ "hourly_workers = salaries[salaries['Salary or Hourly']=='Hourly']\n",
+ "\n",
+ "#counting the number of hourly workers by dpt\n",
+ "count_hourly = hourly_workers.groupby(['Department']).agg({'Salary or Hourly':'count'})\n",
+ "\n",
+ "#sorting by descending order and finding what's the first dpt\n",
+ "count_hourly_desc = count_hourly.sort_values(by = 'Salary or Hourly', ascending = False)\n",
+ "\n",
+ "dpt = count_hourly_desc.reset_index()['Department'][0]\n",
+ "\n",
+ "print(f'the dpt with more hourly workers is {dpt}')"
]
},
{
@@ -177,12 +623,41 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 87,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "our hypothesis is correct. p-values is 0.1405739836240176\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "# filtering for hourly workers\n",
+ "streets_workers = salaries[salaries['Department']=='STREETS & SAN']\n",
+ "\n",
+ "# sampling for n = 50\n",
+ "n = 50\n",
+ "streets_workers_sample = streets_workers.sample(n)\n",
+ "streets_workers_sample.head()\n",
+ "\n",
+ "#setting the test\n",
+ "mean = streets_workers_sample['Hourly Rate'].mean()\n",
+ "std = streets_workers_sample['Hourly Rate'].std(ddof = 1)\n",
+ "alpha = 0.05\n",
+ "mu = 35\n",
+ "stat = (mean - mu) / (std/np.sqrt(n))\n",
+ "\n",
+ "#get p-value\n",
+ "p_value = st.t.sf(abs(stat), n-1) * 2\n",
+ "p_value\n",
+ "\n",
+ "if p_value > alpha:\n",
+ " print(f'our hypothesis is correct. p-values is {p_value}')\n",
+ "else:\n",
+ " print(f\"our hypothesis wasn't correct. p-values is {p_value}\")"
]
},
{
@@ -206,12 +681,32 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 122,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'confidence interval for 95%: (29.709927351854127, 34.45767264814585)'"
+ ]
+ },
+ "execution_count": 122,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "#gathering all hourly wages\n",
+ "hourly_rates = salaries[salaries['Salary or Hourly'] == 'Hourly']['Hourly Rate']\n",
+ "n = 100\n",
+ "hourly_rates_sample = hourly_rates.sample(n)\n",
+ "\n",
+ "#gathering variables\n",
+ "hr_mean = hourly_rates_sample.mean()\n",
+ "std = hourly_rates.std(ddof=0)\n",
+ "\n",
+ "# calculating conf interval\n",
+ "f'confidence interval for 95%: {st.norm.interval(0.95, loc=hr_mean, scale=std/np.sqrt(n))}'"
]
},
{
@@ -223,12 +718,32 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 124,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'confidence interval for 95%: (82776.67139237521, 91138.05900762479)'"
+ ]
+ },
+ "execution_count": 124,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "#gathering all hourly wages\n",
+ "annual_salary = salaries[salaries['Salary or Hourly'] == 'Salary']['Annual Salary']\n",
+ "n = 100\n",
+ "annual_salary_sample = annual_salary.sample(n)\n",
+ "\n",
+ "#gathering variables\n",
+ "as_mean = annual_salary_sample.mean()\n",
+ "std = annual_salary_sample.std(ddof=0)\n",
+ "\n",
+ "# calculating conf interval\n",
+ "f'confidence interval for 95%: {st.norm.interval(0.95, loc=as_mean, scale=std/np.sqrt(n))}'\n"
]
},
{
@@ -257,7 +772,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -271,7 +786,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.10.9"
}
},
"nbformat": 4,