From 441c91d68bc42d0ea9be52936602527886bafb5a Mon Sep 17 00:00:00 2001 From: Tiago Mateus Date: Sun, 19 Nov 2023 18:45:11 +0000 Subject: [PATCH] lab done --- your-code/main.ipynb | 607 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 561 insertions(+), 46 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 59b955a..ed75467 100755 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,12 +12,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "# import numpy and pandas\n", - "\n" + "import numpy as np\n", + "import pandas as pd\n", + "import scipy.stats as st" ] }, { @@ -31,11 +33,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "salaries = pd.read_csv('Current_Employee_Names__Salaries__and_Position_Titles.csv')" ] }, { @@ -47,12 +49,130 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameJob TitlesDepartmentFull or Part-TimeSalary or HourlyTypical HoursAnnual SalaryHourly Rate
0AARON, JEFFERY MSERGEANTPOLICEFSalaryNaN101442.0NaN
1AARON, KARINAPOLICE OFFICER (ASSIGNED AS DETECTIVE)POLICEFSalaryNaN94122.0NaN
2AARON, KIMBERLEI RCHIEF CONTRACT EXPEDITERGENERAL SERVICESFSalaryNaN101592.0NaN
3ABAD JR, VICENTE MCIVIL ENGINEER IVWATER MGMNTFSalaryNaN110064.0NaN
4ABASCAL, REECE ETRAFFIC CONTROL AIDE-HOURLYOEMCPHourly20.0NaN19.86
\n", + "
" + ], + "text/plain": [ + " Name Job Titles \\\n", + "0 AARON, JEFFERY M SERGEANT \n", + "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n", + "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n", + "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n", + "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n", + "\n", + " Department Full or Part-Time Salary or Hourly Typical Hours \\\n", + "0 POLICE F Salary NaN \n", + "1 POLICE F Salary NaN \n", + "2 GENERAL SERVICES F Salary NaN \n", + "3 WATER MGMNT F Salary NaN \n", + "4 OEMC P Hourly 20.0 \n", + "\n", + " Annual Salary Hourly Rate \n", + "0 101442.0 NaN \n", + "1 94122.0 NaN \n", + "2 101592.0 NaN \n", + "3 110064.0 NaN \n", + "4 NaN 19.86 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "salaries.head()" ] }, { @@ -64,12 +184,204 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameJob TitlesDepartmentFull or Part-TimeSalary or HourlyTypical HoursAnnual SalaryHourly Rate
0FalseFalseFalseFalseFalseTrueFalseTrue
1FalseFalseFalseFalseFalseTrueFalseTrue
2FalseFalseFalseFalseFalseTrueFalseTrue
3FalseFalseFalseFalseFalseTrueFalseTrue
4FalseFalseFalseFalseFalseFalseTrueFalse
...........................
33178FalseFalseFalseFalseFalseTrueFalseTrue
33179FalseFalseFalseFalseFalseTrueFalseTrue
33180FalseFalseFalseFalseFalseTrueFalseTrue
33181FalseFalseFalseFalseFalseTrueFalseTrue
33182FalseFalseFalseFalseFalseTrueFalseTrue
\n", + "

33183 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " Name Job Titles Department Full or Part-Time Salary or Hourly \\\n", + "0 False False False False False \n", + "1 False False False False False \n", + "2 False False False False False \n", + "3 False False False False False \n", + "4 False False False False False \n", + "... ... ... ... ... ... \n", + "33178 False False False False False \n", + "33179 False False False False False \n", + "33180 False False False False False \n", + "33181 False False False False False \n", + "33182 False False False False False \n", + "\n", + " Typical Hours Annual Salary Hourly Rate \n", + "0 True False True \n", + "1 True False True \n", + "2 True False True \n", + "3 True False True \n", + "4 False True False \n", + "... ... ... ... \n", + "33178 True False True \n", + "33179 True False True \n", + "33180 True False True \n", + "33181 True False True \n", + "33182 True False True \n", + "\n", + "[33183 rows x 8 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "salaries.isnull()" ] }, { @@ -81,12 +393,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "the percentage of hourly employs is 0.24175029382515142\n", + "the percentage of salary employs is 0.7582497061748485\n" + ] + } + ], "source": [ - "# Your code here:\n", - "\n" + "num_salary = salaries['Salary or Hourly'].value_counts()[0]\n", + "num_hourly = salaries['Salary or Hourly'].value_counts()[1]\n", + "\n", + "print(f'the percentage of hourly employs is {num_hourly/(num_hourly + num_salary)}')\n", + "print(f'the percentage of salary employs is {num_salary/(num_hourly + num_salary)}')" ] }, { @@ -105,12 +429,58 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Department\n", + "POLICE 13414\n", + "FIRE 4641\n", + "STREETS & SAN 2198\n", + "OEMC 2102\n", + "WATER MGMNT 1879\n", + "AVIATION 1629\n", + "TRANSPORTN 1140\n", + "PUBLIC LIBRARY 1015\n", + "GENERAL SERVICES 980\n", + "FAMILY & SUPPORT 615\n", + "FINANCE 560\n", + "HEALTH 488\n", + "CITY COUNCIL 411\n", + "LAW 407\n", + "BUILDINGS 269\n", + "COMMUNITY DEVELOPMENT 207\n", + "BUSINESS AFFAIRS 171\n", + "COPA 116\n", + "BOARD OF ELECTION 107\n", + "DoIT 99\n", + "PROCUREMENT 92\n", + "INSPECTOR GEN 87\n", + "MAYOR'S OFFICE 85\n", + "CITY CLERK 84\n", + "ANIMAL CONTRL 81\n", + "HUMAN RESOURCES 79\n", + "CULTURAL AFFAIRS 65\n", + "BUDGET & MGMT 46\n", + "ADMIN HEARNG 39\n", + "DISABILITIES 28\n", + "TREASURER 22\n", + "HUMAN RELATIONS 16\n", + "BOARD OF ETHICS 8\n", + "POLICE BOARD 2\n", + "LICENSE APPL COMM 1\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "salaries['Department'].value_counts()" ] }, { @@ -124,12 +494,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 91, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "34.527\n", + "our hypothesis wasn't correct. p-values is 0.01742522954491291\n" + ] + } + ], "source": [ - "# Your code here:\n", - "\n" + "# filtering for hourly workers\n", + "hourly_workers = salaries[salaries['Salary or Hourly']=='Hourly']\n", + "\n", + "# sampling for n = 50\n", + "n = 50\n", + "hourly_workers_sample = hourly_workers.sample(50)\n", + "hourly_workers_sample.head()\n", + "\n", + "#setting the test\n", + "mean = hourly_workers_sample['Hourly Rate'].mean()\n", + "std = hourly_workers_sample['Hourly Rate'].std(ddof = 1)\n", + "alpha = 0.05\n", + "mu = 30\n", + "stat = (mean - mu) / (std/np.sqrt(n))\n", + "\n", + "#get p-value\n", + "p_value = st.t.sf(abs(stat), n-1) * 2\n", + "if p_value > alpha:\n", + " print(f'our hypothesis is correct. p-values is {p_value}')\n", + "else:\n", + " print(f\"our hypothesis wasn't correct. p-values is {p_value}\")" ] }, { @@ -143,12 +541,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 58, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84330.36\n", + "our hypothesis wasn't correct. p-values is 0.5550077500925089\n" + ] + } + ], "source": [ - "# Your code here:\n", - "\n" + "# filtering for hourly workers\n", + "salary_workers = salaries[salaries['Salary or Hourly']=='Salary']\n", + "\n", + "# sampling for n = 50\n", + "n = 50\n", + "salary_workers_sample = salary_workers.sample(n)\n", + "salary_workers_sample.head()\n", + "\n", + "#setting the test\n", + "mean = salary_workers_sample['Annual Salary'].mean()\n", + "std = salary_workers_sample['Annual Salary'].std(ddof = 1)\n", + "alpha = 0.05\n", + "mu = 86000\n", + "stat = (mean - mu) / (std/np.sqrt(n))\n", + "\n", + "#get p-value\n", + "p_value = st.t.sf(abs(stat), n-1) * 2\n", + "p_value\n", + "\n", + "if p_value > alpha:\n", + " print(f'our hypothesis is correct. p-values is {p_value}')\n", + "else:\n", + " print(f\"our hypothesis wasn't correct. p-values is {p_value}\")" ] }, { @@ -160,12 +588,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 86, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "the dpt with more hourly workers is STREETS & SAN\n" + ] + } + ], "source": [ - "# Your code here:\n", - "\n" + "#filtering df foi hourly workers\n", + "hourly_workers = salaries[salaries['Salary or Hourly']=='Hourly']\n", + "\n", + "#counting the number of hourly workers by dpt\n", + "count_hourly = hourly_workers.groupby(['Department']).agg({'Salary or Hourly':'count'})\n", + "\n", + "#sorting by descending order and finding what's the first dpt\n", + "count_hourly_desc = count_hourly.sort_values(by = 'Salary or Hourly', ascending = False)\n", + "\n", + "dpt = count_hourly_desc.reset_index()['Department'][0]\n", + "\n", + "print(f'the dpt with more hourly workers is {dpt}')" ] }, { @@ -177,12 +623,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 87, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "our hypothesis is correct. p-values is 0.1405739836240176\n" + ] + } + ], "source": [ - "# Your code here:\n", - "\n" + "# filtering for hourly workers\n", + "streets_workers = salaries[salaries['Department']=='STREETS & SAN']\n", + "\n", + "# sampling for n = 50\n", + "n = 50\n", + "streets_workers_sample = streets_workers.sample(n)\n", + "streets_workers_sample.head()\n", + "\n", + "#setting the test\n", + "mean = streets_workers_sample['Hourly Rate'].mean()\n", + "std = streets_workers_sample['Hourly Rate'].std(ddof = 1)\n", + "alpha = 0.05\n", + "mu = 35\n", + "stat = (mean - mu) / (std/np.sqrt(n))\n", + "\n", + "#get p-value\n", + "p_value = st.t.sf(abs(stat), n-1) * 2\n", + "p_value\n", + "\n", + "if p_value > alpha:\n", + " print(f'our hypothesis is correct. p-values is {p_value}')\n", + "else:\n", + " print(f\"our hypothesis wasn't correct. p-values is {p_value}\")" ] }, { @@ -206,12 +681,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 122, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'confidence interval for 95%: (29.709927351854127, 34.45767264814585)'" + ] + }, + "execution_count": 122, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "#gathering all hourly wages\n", + "hourly_rates = salaries[salaries['Salary or Hourly'] == 'Hourly']['Hourly Rate']\n", + "n = 100\n", + "hourly_rates_sample = hourly_rates.sample(n)\n", + "\n", + "#gathering variables\n", + "hr_mean = hourly_rates_sample.mean()\n", + "std = hourly_rates.std(ddof=0)\n", + "\n", + "# calculating conf interval\n", + "f'confidence interval for 95%: {st.norm.interval(0.95, loc=hr_mean, scale=std/np.sqrt(n))}'" ] }, { @@ -223,12 +718,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 124, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'confidence interval for 95%: (82776.67139237521, 91138.05900762479)'" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "#gathering all hourly wages\n", + "annual_salary = salaries[salaries['Salary or Hourly'] == 'Salary']['Annual Salary']\n", + "n = 100\n", + "annual_salary_sample = annual_salary.sample(n)\n", + "\n", + "#gathering variables\n", + "as_mean = annual_salary_sample.mean()\n", + "std = annual_salary_sample.std(ddof=0)\n", + "\n", + "# calculating conf interval\n", + "f'confidence interval for 95%: {st.norm.interval(0.95, loc=as_mean, scale=std/np.sqrt(n))}'\n" ] }, { @@ -257,7 +772,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -271,7 +786,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.10.9" } }, "nbformat": 4,