diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 59b955a..82e57d1 100755 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,12 +12,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 74, "metadata": {}, "outputs": [], "source": [ - "# import numpy and pandas\n", - "\n" + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "import scipy.stats as st\n" ] }, { @@ -31,11 +33,219 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 75, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameJob TitlesDepartmentFull or Part-TimeSalary or HourlyTypical HoursAnnual SalaryHourly Rate
0AARON, JEFFERY MSERGEANTPOLICEFSalaryNaN101442.0NaN
1AARON, KARINAPOLICE OFFICER (ASSIGNED AS DETECTIVE)POLICEFSalaryNaN94122.0NaN
2AARON, KIMBERLEI RCHIEF CONTRACT EXPEDITERGENERAL SERVICESFSalaryNaN101592.0NaN
3ABAD JR, VICENTE MCIVIL ENGINEER IVWATER MGMNTFSalaryNaN110064.0NaN
4ABASCAL, REECE ETRAFFIC CONTROL AIDE-HOURLYOEMCPHourly20.0NaN19.86
...........................
33178ZYLINSKA, KATARZYNAPOLICE OFFICERPOLICEFSalaryNaN72510.0NaN
33179ZYMANTAS, LAURA CPOLICE OFFICERPOLICEFSalaryNaN48078.0NaN
33180ZYMANTAS, MARK EPOLICE OFFICERPOLICEFSalaryNaN90024.0NaN
33181ZYRKOWSKI, CARLO EPOLICE OFFICERPOLICEFSalaryNaN93354.0NaN
33182ZYSKOWSKI, DARIUSZCHIEF DATA BASE ANALYSTDoITFSalaryNaN115932.0NaN
\n", + "

33183 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " Name Job Titles \\\n", + "0 AARON, JEFFERY M SERGEANT \n", + "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n", + "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n", + "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n", + "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n", + "... ... ... \n", + "33178 ZYLINSKA, KATARZYNA POLICE OFFICER \n", + "33179 ZYMANTAS, LAURA C POLICE OFFICER \n", + "33180 ZYMANTAS, MARK E POLICE OFFICER \n", + "33181 ZYRKOWSKI, CARLO E POLICE OFFICER \n", + "33182 ZYSKOWSKI, DARIUSZ CHIEF DATA BASE ANALYST \n", + "\n", + " Department Full or Part-Time Salary or Hourly Typical Hours \\\n", + "0 POLICE F Salary NaN \n", + "1 POLICE F Salary NaN \n", + "2 GENERAL SERVICES F Salary NaN \n", + "3 WATER MGMNT F Salary NaN \n", + "4 OEMC P Hourly 20.0 \n", + "... ... ... ... ... \n", + "33178 POLICE F Salary NaN \n", + "33179 POLICE F Salary NaN \n", + "33180 POLICE F Salary NaN \n", + "33181 POLICE F Salary NaN \n", + "33182 DoIT F Salary NaN \n", + "\n", + " Annual Salary Hourly Rate \n", + "0 101442.0 NaN \n", + "1 94122.0 NaN \n", + "2 101592.0 NaN \n", + "3 110064.0 NaN \n", + "4 NaN 19.86 \n", + "... ... ... \n", + "33178 72510.0 NaN \n", + "33179 48078.0 NaN \n", + "33180 90024.0 NaN \n", + "33181 93354.0 NaN \n", + "33182 115932.0 NaN \n", + "\n", + "[33183 rows x 8 columns]" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n" + "salaries = pd.read_csv(\"Current_Employee_Names__Salaries__and_Position_Titles.csv\")\n", + "\n", + "salaries" ] }, { @@ -47,12 +257,130 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 76, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameJob TitlesDepartmentFull or Part-TimeSalary or HourlyTypical HoursAnnual SalaryHourly Rate
0AARON, JEFFERY MSERGEANTPOLICEFSalaryNaN101442.0NaN
1AARON, KARINAPOLICE OFFICER (ASSIGNED AS DETECTIVE)POLICEFSalaryNaN94122.0NaN
2AARON, KIMBERLEI RCHIEF CONTRACT EXPEDITERGENERAL SERVICESFSalaryNaN101592.0NaN
3ABAD JR, VICENTE MCIVIL ENGINEER IVWATER MGMNTFSalaryNaN110064.0NaN
4ABASCAL, REECE ETRAFFIC CONTROL AIDE-HOURLYOEMCPHourly20.0NaN19.86
\n", + "
" + ], + "text/plain": [ + " Name Job Titles \\\n", + "0 AARON, JEFFERY M SERGEANT \n", + "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n", + "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n", + "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n", + "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n", + "\n", + " Department Full or Part-Time Salary or Hourly Typical Hours \\\n", + "0 POLICE F Salary NaN \n", + "1 POLICE F Salary NaN \n", + "2 GENERAL SERVICES F Salary NaN \n", + "3 WATER MGMNT F Salary NaN \n", + "4 OEMC P Hourly 20.0 \n", + "\n", + " Annual Salary Hourly Rate \n", + "0 101442.0 NaN \n", + "1 94122.0 NaN \n", + "2 101592.0 NaN \n", + "3 110064.0 NaN \n", + "4 NaN 19.86 " + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "salaries.head()" ] }, { @@ -64,11 +392,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 77, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Typical Hours 25161\n", + "Annual Salary 8022\n", + "Hourly Rate 25161\n", + "dtype: int64" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", + "null_cols = salaries.isnull().sum()\n", + "null_cols[null_cols >0]\n", + "\n", "\n" ] }, @@ -81,12 +425,96 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 78, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameJob TitlesDepartmentFull or Part-TimeTypical HoursAnnual SalaryHourly Rate
Salary or Hourly
Hourly8022802280228022802208022
Salary251612516125161251610251610
\n", + "
" + ], + "text/plain": [ + " Name Job Titles Department Full or Part-Time \\\n", + "Salary or Hourly \n", + "Hourly 8022 8022 8022 8022 \n", + "Salary 25161 25161 25161 25161 \n", + "\n", + " Typical Hours Annual Salary Hourly Rate \n", + "Salary or Hourly \n", + "Hourly 8022 0 8022 \n", + "Salary 0 25161 0 " + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "count = salaries.groupby([\"Salary or Hourly\"]).count()\n", + "\n", + "\n", + "count" ] }, { @@ -105,12 +533,60 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 79, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Department\n", + "ADMIN HEARNG 39\n", + "ANIMAL CONTRL 81\n", + "AVIATION 1629\n", + "BOARD OF ELECTION 107\n", + "BOARD OF ETHICS 8\n", + "BUDGET & MGMT 46\n", + "BUILDINGS 269\n", + "BUSINESS AFFAIRS 171\n", + "CITY CLERK 84\n", + "CITY COUNCIL 411\n", + "COMMUNITY DEVELOPMENT 207\n", + "COPA 116\n", + "CULTURAL AFFAIRS 65\n", + "DISABILITIES 28\n", + "DoIT 99\n", + "FAMILY & SUPPORT 615\n", + "FINANCE 560\n", + "FIRE 4641\n", + "GENERAL SERVICES 980\n", + "HEALTH 488\n", + "HUMAN RELATIONS 16\n", + "HUMAN RESOURCES 79\n", + "INSPECTOR GEN 87\n", + "LAW 407\n", + "LICENSE APPL COMM 1\n", + "MAYOR'S OFFICE 85\n", + "OEMC 2102\n", + "POLICE 13414\n", + "POLICE BOARD 2\n", + "PROCUREMENT 92\n", + "PUBLIC LIBRARY 1015\n", + "STREETS & SAN 2198\n", + "TRANSPORTN 1140\n", + "TREASURER 22\n", + "WATER MGMNT 1879\n", + "Name: Name, dtype: int64" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "count_dep= salaries.groupby([\"Department\"]).count()\n", + "\n", + "count_dep[\"Name\"]" ] }, { @@ -124,12 +600,82 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 103, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "4 19.86\n", + "6 46.10\n", + "7 35.60\n", + "10 2.65\n", + "18 17.68\n", + " ... \n", + "33164 46.10\n", + "33168 17.68\n", + "33169 35.60\n", + "33174 46.35\n", + "33175 48.85\n", + "Name: Hourly Rate, Length: 8022, dtype: float64" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 1) hypothesis\n", + "\n", + "# H0: hourly_wage = 30\n", + "# H1: hourly_wage != 30\n", + "\n", + "# 2) significance:\n", + "\n", + "alpha = 0.05\n", + "\n", + "# 3) sample \n", + "\n", + "sample = salaries[salaries[\"Salary or Hourly\"]== \"Hourly\"][\"Hourly Rate\"]\n", + "\n", + "sample\n" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=20.6198057854942, pvalue=4.3230240486229894e-92, df=8021)" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 4) Compute statistics \n", + "\n", + "st.ttest_1samp(sample, 30)\n", + "\n", + "#st.ttest_1samp(c3_sample, 17)" + ] + }, + { + "cell_type": "code", + "execution_count": 82, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n", - "\n" + "# p_value e less than alpha, so i have to reject H0. hourly rate of the workers is \n", + "# different from 30$" ] }, { @@ -143,12 +689,86 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=653.6034847441706, pvalue=1.0, df=25160)" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# H0: salaries >= 86\n", + "# H1: salaries < 86\n", + "\n", + "alpha = 0.05\n", + "\n", + "# using a sample\n", + "\n", + "sample_salaries = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][\"Annual Salary\"]\n", + "\n", + "\n", + "st.ttest_1samp(sample_salaries, 86, alternative = \"less\")\n", + "\n", + "#st.ttest_1samp(c3_age_sample, 17, alternative = \"greater\")" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/ms/vrwr3_md2xl156mcvfmyskx80000gn/T/ipykernel_77248/27391849.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", + " sample_salaries = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"]== \"POLICE\"][\"Annual Salary\"]\n" + ] + } + ], + "source": [ + "sample_salaries = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"]== \"POLICE\"][\"Annual Salary\"]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 84, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n", - "\n" + "## p_value greater than significance level, so i can NOT reject H0, salaries seem to be \n", + "# greater than 86000/year." + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=653.6034847441706, pvalue=1.0, df=25160)" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# using total workers from the dataframe that has a salary.\n", + "\n", + "salaries1 = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][\"Annual Salary\"]\n", + "\n", + "st.ttest_1samp(salaries1, 86, alternative = \"less\")\n" ] }, { @@ -160,12 +780,253 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Salary or Hourly\n", + "Hourly STREETS & SAN\n", + "Salary POLICE\n", + "dtype: object" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dep_hourly = pd.crosstab(salaries[\"Department\"], salaries[\"Salary or Hourly\"])\n", + "\n", + "dep_max = dep_hourly.idxmax()\n", + "\n", + "dep_max" + ] + }, + { + "cell_type": "code", + "execution_count": 100, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n", - "\n" + "# The departmente with the most hourly workers is Streets & San. " + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameJob TitlesDepartmentFull or Part-TimeSalary or HourlyTypical HoursAnnual SalaryHourly Rate
0AARON, JEFFERY MSERGEANTPOLICEFSalaryNaN101442.0NaN
1AARON, KARINAPOLICE OFFICER (ASSIGNED AS DETECTIVE)POLICEFSalaryNaN94122.0NaN
2AARON, KIMBERLEI RCHIEF CONTRACT EXPEDITERGENERAL SERVICESFSalaryNaN101592.0NaN
3ABAD JR, VICENTE MCIVIL ENGINEER IVWATER MGMNTFSalaryNaN110064.0NaN
4ABASCAL, REECE ETRAFFIC CONTROL AIDE-HOURLYOEMCPHourly20.0NaN19.86
...........................
33178ZYLINSKA, KATARZYNAPOLICE OFFICERPOLICEFSalaryNaN72510.0NaN
33179ZYMANTAS, LAURA CPOLICE OFFICERPOLICEFSalaryNaN48078.0NaN
33180ZYMANTAS, MARK EPOLICE OFFICERPOLICEFSalaryNaN90024.0NaN
33181ZYRKOWSKI, CARLO EPOLICE OFFICERPOLICEFSalaryNaN93354.0NaN
33182ZYSKOWSKI, DARIUSZCHIEF DATA BASE ANALYSTDoITFSalaryNaN115932.0NaN
\n", + "

33183 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " Name Job Titles \\\n", + "0 AARON, JEFFERY M SERGEANT \n", + "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n", + "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n", + "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n", + "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n", + "... ... ... \n", + "33178 ZYLINSKA, KATARZYNA POLICE OFFICER \n", + "33179 ZYMANTAS, LAURA C POLICE OFFICER \n", + "33180 ZYMANTAS, MARK E POLICE OFFICER \n", + "33181 ZYRKOWSKI, CARLO E POLICE OFFICER \n", + "33182 ZYSKOWSKI, DARIUSZ CHIEF DATA BASE ANALYST \n", + "\n", + " Department Full or Part-Time Salary or Hourly Typical Hours \\\n", + "0 POLICE F Salary NaN \n", + "1 POLICE F Salary NaN \n", + "2 GENERAL SERVICES F Salary NaN \n", + "3 WATER MGMNT F Salary NaN \n", + "4 OEMC P Hourly 20.0 \n", + "... ... ... ... ... \n", + "33178 POLICE F Salary NaN \n", + "33179 POLICE F Salary NaN \n", + "33180 POLICE F Salary NaN \n", + "33181 POLICE F Salary NaN \n", + "33182 DoIT F Salary NaN \n", + "\n", + " Annual Salary Hourly Rate \n", + "0 101442.0 NaN \n", + "1 94122.0 NaN \n", + "2 101592.0 NaN \n", + "3 110064.0 NaN \n", + "4 NaN 19.86 \n", + "... ... ... \n", + "33178 72510.0 NaN \n", + "33179 48078.0 NaN \n", + "33180 90024.0 NaN \n", + "33181 93354.0 NaN \n", + "33182 115932.0 NaN \n", + "\n", + "[33183 rows x 8 columns]" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "salaries" ] }, { @@ -175,14 +1036,43 @@ "The workers from the department with the most hourly workers have complained that their hourly wage is less than $35/hour. Using a one sample t-test, test this one-sided hypothesis at the 95% confidence level." ] }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=-59075.69614118866, pvalue=1.0, df=8021)" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# H0 = hourly rate =< 35\n", + "# H1 = hourly rate > 35\n", + "\n", + "alpha = 0.05\n", + "\n", + "workers_hourly = salaries[salaries[\"Salary or Hourly\"]== \"Hourly\"][\"Hourly Rate\"]\n", + "\n", + "\n", + "st.ttest_1samp(workers_hourly, len(workers_hourly), alternative = \"greater\")\n", + "\n", + "#st.ttest_1samp(salaries1, 86, alternative = \"less\")\n" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n", - "\n" + "## P value is greater than alpha, so i can NOT reject H0." ] }, { @@ -206,11 +1096,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 114, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(32.52349834813791, 33.05361708442256)" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", + "\n", + "hourly_wage_mean = salaries[salaries[\"Salary or Hourly\"]==\"Hourly\"][\"Hourly Rate\"].mean()\n", + "\n", + "hourly_wage_std = salaries[salaries[\"Salary or Hourly\"]==\"Hourly\"][\"Hourly Rate\"].std()\n", + "\n", + "n = len(salaries[salaries[\"Salary or Hourly\"]==\"Hourly\"][\"Hourly Rate\"])\n", + "\n", + "\n", + "st.norm.interval(0.95, loc=hourly_wage_mean, scale = hourly_wage_std/np.sqrt(n))\n", "\n" ] }, @@ -223,12 +1132,74 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 130, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/ms/vrwr3_md2xl156mcvfmyskx80000gn/T/ipykernel_77248/4047882935.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", + " salary_police_mean = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"] == \"POLICE\"][\"Annual Salary\"].mean()\n", + "/var/folders/ms/vrwr3_md2xl156mcvfmyskx80000gn/T/ipykernel_77248/4047882935.py:3: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", + " salary_police_std = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"] == \"POLICE\"][\"Annual Salary\"].std()\n", + "/var/folders/ms/vrwr3_md2xl156mcvfmyskx80000gn/T/ipykernel_77248/4047882935.py:5: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", + " n = len(salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"] == \"POLICE\"][\"Annual Salary\"])\n" + ] + }, + { + "data": { + "text/plain": [ + "(86177.08425202279, 86795.74475424399)" + ] + }, + "execution_count": 130, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "\n", + "salary_police_mean = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"] == \"POLICE\"][\"Annual Salary\"].mean()\n", + "\n", + "salary_police_std = salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"] == \"POLICE\"][\"Annual Salary\"].std()\n", + "\n", + "n = len(salaries[salaries[\"Salary or Hourly\"] == \"Salary\"][salaries[\"Department\"] == \"POLICE\"][\"Annual Salary\"])\n", + "\n", + "st.norm.interval(0.95, loc=salary_police_mean, scale = salary_police_std/np.sqrt(n))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/ms/vrwr3_md2xl156mcvfmyskx80000gn/T/ipykernel_77248/1646062075.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n", + " salary_police_mean = salaries[salaries[\"Department\"] == \"POLICE\"][salaries[\"Salary or Hourly\"] == \"Salary\"][\"Annual Salary\"].mean()\n" + ] + }, + { + "data": { + "text/plain": [ + "86486.41450313339" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "## can i have two conditions on a boolean mask???? why is it giving this error???\n", + "\n", + "salary_police_mean = salaries[salaries[\"Department\"] == \"POLICE\"][\"Annual Salary\"].mean()\n", + "\n", + "salary_police_mean" ] }, { @@ -246,7 +1217,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 92, "metadata": {}, "outputs": [], "source": [ @@ -257,7 +1228,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -271,7 +1242,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.10.9" } }, "nbformat": 4,