From b2ab83e6bb9b0b036cb711065ff277c9455fa6ff Mon Sep 17 00:00:00 2001 From: YaraLis Date: Thu, 14 Dec 2023 20:37:09 +0000 Subject: [PATCH] lab done --- your-code/main.ipynb | 581 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 549 insertions(+), 32 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 59b955a..55dde43 100755 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,11 +12,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "# import numpy and pandas\n", + "import numpy as np\n", + "import pandas as pd\n", "\n" ] }, @@ -31,11 +32,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "salaries = pd.read_csv('Current_Employee_Names__Salaries__and_Position_Titles.csv')\n" ] }, { @@ -47,12 +48,103 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameJob TitlesDepartmentFull or Part-TimeSalary or HourlyTypical HoursAnnual SalaryHourly Rate
0AARON, JEFFERY MSERGEANTPOLICEFSalaryNaN101442.0NaN
1AARON, KARINAPOLICE OFFICER (ASSIGNED AS DETECTIVE)POLICEFSalaryNaN94122.0NaN
2AARON, KIMBERLEI RCHIEF CONTRACT EXPEDITERGENERAL SERVICESFSalaryNaN101592.0NaN
\n", + "
" + ], + "text/plain": [ + " Name Job Titles \\\n", + "0 AARON, JEFFERY M SERGEANT \n", + "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n", + "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n", + "\n", + " Department Full or Part-Time Salary or Hourly Typical Hours \\\n", + "0 POLICE F Salary NaN \n", + "1 POLICE F Salary NaN \n", + "2 GENERAL SERVICES F Salary NaN \n", + "\n", + " Annual Salary Hourly Rate \n", + "0 101442.0 NaN \n", + "1 94122.0 NaN \n", + "2 101592.0 NaN " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "salaries.head(3)\n" ] }, { @@ -64,12 +156,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Name 0\n", + "Job Titles 0\n", + "Department 0\n", + "Full or Part-Time 0\n", + "Salary or Hourly 0\n", + "Typical Hours 25161\n", + "Annual Salary 8022\n", + "Hourly Rate 25161\n", + "dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "salaries.isnull().sum()\n" ] }, { @@ -81,12 +192,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Salary or Hourly\n", + "Salary 25161\n", + "Hourly 8022\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "salaries[\"Salary or Hourly\"].value_counts()\n" ] }, { @@ -105,11 +230,58 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Department\n", + "POLICE 13414\n", + "FIRE 4641\n", + "STREETS & SAN 2198\n", + "OEMC 2102\n", + "WATER MGMNT 1879\n", + "AVIATION 1629\n", + "TRANSPORTN 1140\n", + "PUBLIC LIBRARY 1015\n", + "GENERAL SERVICES 980\n", + "FAMILY & SUPPORT 615\n", + "FINANCE 560\n", + "HEALTH 488\n", + "CITY COUNCIL 411\n", + "LAW 407\n", + "BUILDINGS 269\n", + "COMMUNITY DEVELOPMENT 207\n", + "BUSINESS AFFAIRS 171\n", + "COPA 116\n", + "BOARD OF ELECTION 107\n", + "DoIT 99\n", + "PROCUREMENT 92\n", + "INSPECTOR GEN 87\n", + "MAYOR'S OFFICE 85\n", + "CITY CLERK 84\n", + "ANIMAL CONTRL 81\n", + "HUMAN RESOURCES 79\n", + "CULTURAL AFFAIRS 65\n", + "BUDGET & MGMT 46\n", + "ADMIN HEARNG 39\n", + "DISABILITIES 28\n", + "TREASURER 22\n", + "HUMAN RELATIONS 16\n", + "BOARD OF ETHICS 8\n", + "POLICE BOARD 2\n", + "LICENSE APPL COMM 1\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", + "salaries[\"Department\"].value_counts()\n", "\n" ] }, @@ -124,12 +296,84 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n", - "\n" + "import scipy.stats as st\n", + "import numpy as np " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "14550 19.50\n", + "16850 13.94\n", + "9294 40.20\n", + "31614 18.52\n", + "21968 35.60\n", + "Name: Hourly Rate, dtype: float64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "##H0: mu hour rate =30/h\n", + "##H1: mu hour rate != 30/h \n", + "alpha = 0.05\n", + "hourly_wages = salaries['Hourly Rate'].dropna()\n", + "c_sample = hourly_wages.sample(2000)\n", + "c_sample.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "9.213189919496925\n" + ] + } + ], + "source": [ + "mean = c_sample.mean()\n", + "std = c_sample.std(ddof=1)\n", + "\n", + "stat = (mean - 30)/ (std/ np.sqrt(2000))\n", + "print(stat)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We can reject the null hypothesis\n" + ] + } + ], + "source": [ + "p_value = st.t.sf(abs(stat), 2000 -1) *2\n", + "if p_value > 0.05:\n", + " print (\"I cannot reject the null hypothesis\")\n", + "else:\n", + " print(\"We can reject the null hypothesis\") " ] }, { @@ -143,14 +387,154 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameJob TitlesDepartmentFull or Part-TimeSalary or HourlyTypical HoursAnnual SalaryHourly Rate
15571KNIGHTEN, FREDERICKDETENTION AIDEPOLICEFSalaryNaN64392.0NaN
5726CORDIN, CHARLESPOLICE OFFICERPOLICEFSalaryNaN84054.0NaN
22098OSHANA, SARGONPOLICE OFFICERPOLICEFSalaryNaN87006.0NaN
13712JACKSON IV, WESLEY JPOLICE OFFICERPOLICEFSalaryNaN87006.0NaN
2756BRACKIN, CONNORPOLICE OFFICERPOLICEFSalaryNaN76266.0NaN
\n", + "
" + ], + "text/plain": [ + " Name Job Titles Department Full or Part-Time \\\n", + "15571 KNIGHTEN, FREDERICK DETENTION AIDE POLICE F \n", + "5726 CORDIN, CHARLES POLICE OFFICER POLICE F \n", + "22098 OSHANA, SARGON POLICE OFFICER POLICE F \n", + "13712 JACKSON IV, WESLEY J POLICE OFFICER POLICE F \n", + "2756 BRACKIN, CONNOR POLICE OFFICER POLICE F \n", + "\n", + " Salary or Hourly Typical Hours Annual Salary Hourly Rate \n", + "15571 Salary NaN 64392.0 NaN \n", + "5726 Salary NaN 84054.0 NaN \n", + "22098 Salary NaN 87006.0 NaN \n", + "13712 Salary NaN 87006.0 NaN \n", + "2756 Salary NaN 76266.0 NaN " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", + "c2_sample = salaries[salaries['Department'] ==\"POLICE\"].sample(30)\n", + "c2_sample.head()\n", "\n" ] }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "I cannot reject the null hypothesis\n" + ] + } + ], + "source": [ + "c2_sample_f = c2_sample['Annual Salary']\n", + "mean = c2_sample_f.mean()\n", + "std = c2_sample_f.std(ddof=1)\n", + "mu = 86000\n", + "\n", + "stat = (mean - 86000)/ (std/ np.sqrt(30))\n", + "p_value = st.t.sf(abs(stat), 30-1) *2\n", + "if p_value > 0.05:\n", + " print (\"I cannot reject the null hypothesis\")\n", + "else:\n", + " print(\"We can reject the null hypothesis\") " + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -160,11 +544,72 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n", + "hourly_data = salaries[salaries[\"Salary or Hourly\"] == \"Hourly\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Department with the most hourly workers: Department\n", + "ADMIN HEARNG NaN\n", + "ANIMAL CONTRL NaN\n", + "AVIATION NaN\n", + "BOARD OF ELECTION NaN\n", + "BOARD OF ETHICS NaN\n", + "BUDGET & MGMT NaN\n", + "BUILDINGS NaN\n", + "BUSINESS AFFAIRS NaN\n", + "CITY CLERK NaN\n", + "CITY COUNCIL NaN\n", + "COMMUNITY DEVELOPMENT NaN\n", + "COPA NaN\n", + "CULTURAL AFFAIRS NaN\n", + "DISABILITIES NaN\n", + "DoIT NaN\n", + "FAMILY & SUPPORT NaN\n", + "FINANCE NaN\n", + "FIRE NaN\n", + "GENERAL SERVICES NaN\n", + "HEALTH NaN\n", + "HUMAN RELATIONS NaN\n", + "HUMAN RESOURCES NaN\n", + "INSPECTOR GEN NaN\n", + "LAW NaN\n", + "LICENSE APPL COMM NaN\n", + "MAYOR'S OFFICE NaN\n", + "OEMC NaN\n", + "POLICE NaN\n", + "POLICE BOARD NaN\n", + "PROCUREMENT NaN\n", + "PUBLIC LIBRARY NaN\n", + "STREETS & SAN NaN\n", + "TRANSPORTN NaN\n", + "TREASURER NaN\n", + "WATER MGMNT NaN\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "\n", + "cross_tab = pd.crosstab(salaries[\"Department\"], salaries[\"Department\"]).dropna()\n", + "\n", + "hourly_counts = cross_tab[hourly_data]\n", + "\n", + "# Find the department with the maximum hourly workers\n", + "most_hourly_department = hourly_counts.idxmax()\n", + "\n", + "print(\"Department with the most hourly workers:\", most_hourly_department)\n", "\n" ] }, @@ -175,13 +620,52 @@ "The workers from the department with the most hourly workers have complained that their hourly wage is less than $35/hour. Using a one sample t-test, test this one-sided hypothesis at the 95% confidence level." ] }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Department with the most hourly workers: POLICE\n" + ] + } + ], + "source": [ + "most_hourly_workers_department = salaries['Department'].value_counts().idxmax()\n", + "print(\"Department with the most hourly workers:\", most_hourly_workers_department)" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n", + "import scipy.stats as stats\n", + "\n", + "\n", + "target_department = 'POLICE'\n", + "target_hourly_wages = salaries.loc[salaries['department'] == target_department, 'Hourly Rate']\n", + "\n", + "null_hypothesis_mean = 35\n", + "\n", + "# Perform the one-sample t-test\n", + "t_statistic, p_value = stats.ttest_1samp(target_hourly_wages, null_hypothesis_mean)\n", + "\n", + "# Print the results\n", + "print(\"T-statistic:\", t_statistic)\n", + "print(\"P-value:\", p_value)\n", + "\n", + "# Check if the p-value is less than 0.05 for a one-sided test\n", + "alpha = 0.05\n", + "if p_value < alpha:\n", + " print(f\"At the 95% confidence level, we reject the null hypothesis.\")\n", + "else:\n", + " print(f\"At the 95% confidence level, we fail to reject the null hypothesis.\")\n", + "\n", "\n" ] }, @@ -206,12 +690,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(32.52345834488425, 33.05365708767623)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "\n", + "import numpy as np\n", + "from scipy.stats import t\n", + "from scipy.stats import sem\n", + "\n", + "hourly_rate = salaries['Hourly Rate'].dropna()\n", + "\n", + "confidence_interval = st.t.interval(0.95, len(hourly_rate)-1, loc=hourly_rate.mean(), scale=st.sem(hourly_rate))\n", + "confidence_interval\n" ] }, { @@ -223,12 +726,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(9.31381234362183, 9.45418765637817)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Your code here:\n", - "\n" + "hourly_rate_police = salaries.loc[salaries['Department'] == 'POLICE', 'Hourly Rate'].dropna()\n", + "\n", + "confidence_interval = st.t.interval(0.95, len(hourly_rate_police) - 1, loc=hourly_rate_police.mean(), scale=st.sem(hourly_rate_police))\n", + "confidence_interval\n" ] }, { @@ -271,7 +788,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.11.5" } }, "nbformat": 4,