diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 59b955a..fa2fc89 100755
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,12 +12,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
- "# import numpy and pandas\n",
- "\n"
+ "import numpy as np\n",
+ "import pandas as pd\n"
]
},
{
@@ -31,11 +31,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "chicago = pd.read_csv('Current_Employee_Names__Salaries__and_Position_Titles.csv')"
]
},
{
@@ -47,12 +47,130 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Job Titles | \n",
+ " Department | \n",
+ " Full or Part-Time | \n",
+ " Salary or Hourly | \n",
+ " Typical Hours | \n",
+ " Annual Salary | \n",
+ " Hourly Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " AARON, JEFFERY M | \n",
+ " SERGEANT | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101442.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " AARON, KARINA | \n",
+ " POLICE OFFICER (ASSIGNED AS DETECTIVE) | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 94122.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AARON, KIMBERLEI R | \n",
+ " CHIEF CONTRACT EXPEDITER | \n",
+ " GENERAL SERVICES | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101592.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ABAD JR, VICENTE M | \n",
+ " CIVIL ENGINEER IV | \n",
+ " WATER MGMNT | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 110064.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " ABASCAL, REECE E | \n",
+ " TRAFFIC CONTROL AIDE-HOURLY | \n",
+ " OEMC | \n",
+ " P | \n",
+ " Hourly | \n",
+ " 20.0 | \n",
+ " NaN | \n",
+ " 19.86 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Job Titles \\\n",
+ "0 AARON, JEFFERY M SERGEANT \n",
+ "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n",
+ "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n",
+ "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n",
+ "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n",
+ "\n",
+ " Department Full or Part-Time Salary or Hourly Typical Hours \\\n",
+ "0 POLICE F Salary NaN \n",
+ "1 POLICE F Salary NaN \n",
+ "2 GENERAL SERVICES F Salary NaN \n",
+ "3 WATER MGMNT F Salary NaN \n",
+ "4 OEMC P Hourly 20.0 \n",
+ "\n",
+ " Annual Salary Hourly Rate \n",
+ "0 101442.0 NaN \n",
+ "1 94122.0 NaN \n",
+ "2 101592.0 NaN \n",
+ "3 110064.0 NaN \n",
+ "4 NaN 19.86 "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "chicago.head()\n"
]
},
{
@@ -64,12 +182,30 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Name 0\n",
+ "Job Titles 0\n",
+ "Department 0\n",
+ "Full or Part-Time 0\n",
+ "Salary or Hourly 0\n",
+ "Typical Hours 25161\n",
+ "Annual Salary 8022\n",
+ "Hourly Rate 25161\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "chicago.isnull().sum()\n"
]
},
{
@@ -81,12 +217,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 112,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Salary 25161\n",
+ "Hourly 8022\n",
+ "Name: Salary or Hourly, dtype: int64"
+ ]
+ },
+ "execution_count": 112,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "s_or_h = chicago['Salary or Hourly'].value_counts()\n",
+ "s_or_h"
]
},
{
@@ -105,12 +254,58 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "POLICE 13414\n",
+ "FIRE 4641\n",
+ "STREETS & SAN 2198\n",
+ "OEMC 2102\n",
+ "WATER MGMNT 1879\n",
+ "AVIATION 1629\n",
+ "TRANSPORTN 1140\n",
+ "PUBLIC LIBRARY 1015\n",
+ "GENERAL SERVICES 980\n",
+ "FAMILY & SUPPORT 615\n",
+ "FINANCE 560\n",
+ "HEALTH 488\n",
+ "CITY COUNCIL 411\n",
+ "LAW 407\n",
+ "BUILDINGS 269\n",
+ "COMMUNITY DEVELOPMENT 207\n",
+ "BUSINESS AFFAIRS 171\n",
+ "COPA 116\n",
+ "BOARD OF ELECTION 107\n",
+ "DoIT 99\n",
+ "PROCUREMENT 92\n",
+ "INSPECTOR GEN 87\n",
+ "MAYOR'S OFFICE 85\n",
+ "CITY CLERK 84\n",
+ "ANIMAL CONTRL 81\n",
+ "HUMAN RESOURCES 79\n",
+ "CULTURAL AFFAIRS 65\n",
+ "BUDGET & MGMT 46\n",
+ "ADMIN HEARNG 39\n",
+ "DISABILITIES 28\n",
+ "TREASURER 22\n",
+ "HUMAN RELATIONS 16\n",
+ "BOARD OF ETHICS 8\n",
+ "POLICE BOARD 2\n",
+ "LICENSE APPL COMM 1\n",
+ "Name: Department, dtype: int64"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "department = chicago['Department'].value_counts()\n",
+ "department"
]
},
{
@@ -124,12 +319,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=0.570006786746275, pvalue=0.5730629469471077, df=29)"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "import scipy.stats as st\n",
+ "\n",
+ "hourly_sample = chicago[chicago[\"Salary or Hourly\"]==\"Hourly\"][\"Hourly Rate\"].sample(30)\n",
+ "\n",
+ "st.ttest_1samp(hourly_sample, 30)\n",
+ "\n",
+ "#We cannot reject the hypothesis"
]
},
{
@@ -143,12 +354,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=-0.5127437740568284, pvalue=0.6939928490276305, df=29)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "police_sample = chicago[chicago[\"Department\"]==\"POLICE\"][\"Annual Salary\"].sample(30)\n",
+ "\n",
+ "st.ttest_1samp(police_sample, 86000, alternative = \"greater\")\n",
+ "\n",
+ "#We cannot reject the hypothesis"
]
},
{
@@ -160,12 +385,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 22,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'STREETS & SAN'"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "cross_tab = pd.crosstab(chicago['Department'], chicago['Salary or Hourly'])\n",
+ "cross_tab[\"Hourly\"].idxmax()"
]
},
{
@@ -177,12 +413,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 49,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=-0.7312338911537641, pvalue=0.23525196231784318, df=29)"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "streets_san_sample = chicago[chicago[\"Department\"]==\"STREETS & SAN\"][\"Hourly Rate\"].sample(30)\n",
+ "\n",
+ "streets_san_sample = streets_san_sample.astype(float)\n",
+ "\n",
+ "st.ttest_1samp(streets_san_sample, 35, alternative = \"less\")\n",
+ "\n",
+ "#We cannot reject the hypothesis"
]
},
{
@@ -206,12 +458,32 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 53,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(32.52345834488425, 33.05365708767623)"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "hourly_wages = chicago[chicago['Salary or Hourly'] == 'Hourly']['Hourly Rate']\n",
+ "\n",
+ "mean_hourly_wage = np.mean(hourly_wages)\n",
+ "std_error_hourly_wage = st.sem(hourly_wages)\n",
+ "\n",
+ "confidence_level = 0.95\n",
+ "degrees_freedom = len(hourly_wages) - 1\n",
+ "\n",
+ "confidence_interval = st.t.interval(confidence_level, df=degrees_freedom, loc=mean_hourly_wage, scale=std_error_hourly_wage)\n",
+ "\n",
+ "confidence_interval"
]
},
{
@@ -223,12 +495,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 122,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(86177.05631531784, 86795.77269094894)"
+ ]
+ },
+ "execution_count": 122,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "police_salaries = chicago[(chicago['Department'] == 'POLICE') & (chicago['Salary or Hourly'] == 'Salary')]['Annual Salary']\n",
+ "\n",
+ "police_salaries = police_salaries.dropna().astype(float)\n",
+ "\n",
+ "mean_police_salary = np.mean(police_salaries)\n",
+ "std_error_police_salary = st.sem(police_salaries)\n",
+ "\n",
+ "confidence_level = 0.95\n",
+ "degrees_freedom = len(police_salaries) - 1\n",
+ "\n",
+ "confidence_interval = st.t.interval(confidence_level, df=degrees_freedom, loc=mean_police_salary, scale=std_error_police_salary)\n",
+ "\n",
+ "confidence_interval"
]
},
{
@@ -271,7 +565,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.11.4"
}
},
"nbformat": 4,