diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 59b955a..22a652c 100755
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,11 +12,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
- "# import numpy and pandas\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
"\n"
]
},
@@ -31,11 +32,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here:\n"
+ "salaries = pd.read_csv('Current_Employee_Names__Salaries__and_Position_Titles.csv')\n"
]
},
{
@@ -47,11 +48,102 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ " Job Titles | \n",
+ " Department | \n",
+ " Full or Part-Time | \n",
+ " Salary or Hourly | \n",
+ " Typical Hours | \n",
+ " Annual Salary | \n",
+ " Hourly Rate | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " AARON, JEFFERY M | \n",
+ " SERGEANT | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101442.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " AARON, KARINA | \n",
+ " POLICE OFFICER (ASSIGNED AS DETECTIVE) | \n",
+ " POLICE | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 94122.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AARON, KIMBERLEI R | \n",
+ " CHIEF CONTRACT EXPEDITER | \n",
+ " GENERAL SERVICES | \n",
+ " F | \n",
+ " Salary | \n",
+ " NaN | \n",
+ " 101592.0 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name Job Titles \\\n",
+ "0 AARON, JEFFERY M SERGEANT \n",
+ "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n",
+ "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n",
+ "\n",
+ " Department Full or Part-Time Salary or Hourly Typical Hours \\\n",
+ "0 POLICE F Salary NaN \n",
+ "1 POLICE F Salary NaN \n",
+ "2 GENERAL SERVICES F Salary NaN \n",
+ "\n",
+ " Annual Salary Hourly Rate \n",
+ "0 101442.0 NaN \n",
+ "1 94122.0 NaN \n",
+ "2 101592.0 NaN "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
+ "salaries.head(3)\n",
"\n"
]
},
@@ -64,12 +156,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 33183 entries, 0 to 33182\n",
+ "Data columns (total 8 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Name 33183 non-null object \n",
+ " 1 Job Titles 33183 non-null object \n",
+ " 2 Department 33183 non-null object \n",
+ " 3 Full or Part-Time 33183 non-null object \n",
+ " 4 Salary or Hourly 33183 non-null object \n",
+ " 5 Typical Hours 8022 non-null float64\n",
+ " 6 Annual Salary 25161 non-null float64\n",
+ " 7 Hourly Rate 8022 non-null float64\n",
+ "dtypes: float64(3), object(5)\n",
+ "memory usage: 2.0+ MB\n",
+ "None\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "print(salaries.info())"
]
},
{
@@ -81,12 +195,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Salary 25161\n",
+ "Hourly 8022\n",
+ "Name: Salary or Hourly, dtype: int64"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "salaries['Salary or Hourly'].value_counts()"
]
},
{
@@ -105,12 +231,229 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Name | \n",
+ "
\n",
+ " \n",
+ " | Department | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | ADMIN HEARNG | \n",
+ " 39 | \n",
+ "
\n",
+ " \n",
+ " | ANIMAL CONTRL | \n",
+ " 81 | \n",
+ "
\n",
+ " \n",
+ " | AVIATION | \n",
+ " 1629 | \n",
+ "
\n",
+ " \n",
+ " | BOARD OF ELECTION | \n",
+ " 107 | \n",
+ "
\n",
+ " \n",
+ " | BOARD OF ETHICS | \n",
+ " 8 | \n",
+ "
\n",
+ " \n",
+ " | BUDGET & MGMT | \n",
+ " 46 | \n",
+ "
\n",
+ " \n",
+ " | BUILDINGS | \n",
+ " 269 | \n",
+ "
\n",
+ " \n",
+ " | BUSINESS AFFAIRS | \n",
+ " 171 | \n",
+ "
\n",
+ " \n",
+ " | CITY CLERK | \n",
+ " 84 | \n",
+ "
\n",
+ " \n",
+ " | CITY COUNCIL | \n",
+ " 411 | \n",
+ "
\n",
+ " \n",
+ " | COMMUNITY DEVELOPMENT | \n",
+ " 207 | \n",
+ "
\n",
+ " \n",
+ " | COPA | \n",
+ " 116 | \n",
+ "
\n",
+ " \n",
+ " | CULTURAL AFFAIRS | \n",
+ " 65 | \n",
+ "
\n",
+ " \n",
+ " | DISABILITIES | \n",
+ " 28 | \n",
+ "
\n",
+ " \n",
+ " | DoIT | \n",
+ " 99 | \n",
+ "
\n",
+ " \n",
+ " | FAMILY & SUPPORT | \n",
+ " 615 | \n",
+ "
\n",
+ " \n",
+ " | FINANCE | \n",
+ " 560 | \n",
+ "
\n",
+ " \n",
+ " | FIRE | \n",
+ " 4641 | \n",
+ "
\n",
+ " \n",
+ " | GENERAL SERVICES | \n",
+ " 980 | \n",
+ "
\n",
+ " \n",
+ " | HEALTH | \n",
+ " 488 | \n",
+ "
\n",
+ " \n",
+ " | HUMAN RELATIONS | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ " | HUMAN RESOURCES | \n",
+ " 79 | \n",
+ "
\n",
+ " \n",
+ " | INSPECTOR GEN | \n",
+ " 87 | \n",
+ "
\n",
+ " \n",
+ " | LAW | \n",
+ " 407 | \n",
+ "
\n",
+ " \n",
+ " | LICENSE APPL COMM | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | MAYOR'S OFFICE | \n",
+ " 85 | \n",
+ "
\n",
+ " \n",
+ " | OEMC | \n",
+ " 2102 | \n",
+ "
\n",
+ " \n",
+ " | POLICE | \n",
+ " 13414 | \n",
+ "
\n",
+ " \n",
+ " | POLICE BOARD | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " | PROCUREMENT | \n",
+ " 92 | \n",
+ "
\n",
+ " \n",
+ " | PUBLIC LIBRARY | \n",
+ " 1015 | \n",
+ "
\n",
+ " \n",
+ " | STREETS & SAN | \n",
+ " 2198 | \n",
+ "
\n",
+ " \n",
+ " | TRANSPORTN | \n",
+ " 1140 | \n",
+ "
\n",
+ " \n",
+ " | TREASURER | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " | WATER MGMNT | \n",
+ " 1879 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Name\n",
+ "Department \n",
+ "ADMIN HEARNG 39\n",
+ "ANIMAL CONTRL 81\n",
+ "AVIATION 1629\n",
+ "BOARD OF ELECTION 107\n",
+ "BOARD OF ETHICS 8\n",
+ "BUDGET & MGMT 46\n",
+ "BUILDINGS 269\n",
+ "BUSINESS AFFAIRS 171\n",
+ "CITY CLERK 84\n",
+ "CITY COUNCIL 411\n",
+ "COMMUNITY DEVELOPMENT 207\n",
+ "COPA 116\n",
+ "CULTURAL AFFAIRS 65\n",
+ "DISABILITIES 28\n",
+ "DoIT 99\n",
+ "FAMILY & SUPPORT 615\n",
+ "FINANCE 560\n",
+ "FIRE 4641\n",
+ "GENERAL SERVICES 980\n",
+ "HEALTH 488\n",
+ "HUMAN RELATIONS 16\n",
+ "HUMAN RESOURCES 79\n",
+ "INSPECTOR GEN 87\n",
+ "LAW 407\n",
+ "LICENSE APPL COMM 1\n",
+ "MAYOR'S OFFICE 85\n",
+ "OEMC 2102\n",
+ "POLICE 13414\n",
+ "POLICE BOARD 2\n",
+ "PROCUREMENT 92\n",
+ "PUBLIC LIBRARY 1015\n",
+ "STREETS & SAN 2198\n",
+ "TRANSPORTN 1140\n",
+ "TREASURER 22\n",
+ "WATER MGMNT 1879"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "salaries.groupby('Department').agg({'Name':'count'})"
]
},
{
@@ -124,12 +467,23 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=20.6198057854942, pvalue=4.3230240486229894e-92, df=8021)"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "from scipy import stats\n",
+ "stats.ttest_1samp(salaries[salaries['Hourly Rate'].isnull() == False]['Hourly Rate'], 30)"
]
},
{
@@ -143,12 +497,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "3.081997005712994 0.0010301701775482569\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "tstat, pvalue = stats.ttest_1samp(salaries[(salaries['Department'] == 'POLICE') & (salaries['Annual Salary'].isnull() == False)]['Annual Salary'], 86000)\n",
+ "print(tstat, pvalue/2)"
]
},
{
@@ -160,12 +522,63 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | Salary or Hourly | \n",
+ " Hourly | \n",
+ " Salary | \n",
+ "
\n",
+ " \n",
+ " | Department | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | STREETS & SAN | \n",
+ " 0.847134 | \n",
+ " 0.152866 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Salary or Hourly Hourly Salary\n",
+ "Department \n",
+ "STREETS & SAN 0.847134 0.152866"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "hourly_prop = pd.crosstab(index=salaries['Department'], columns=salaries['Salary or Hourly'], normalize='index')\n",
+ "hourly_prop[hourly_prop['Hourly'] == hourly_prop['Hourly'].max()]\n"
]
},
{
@@ -177,12 +590,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "TtestResult(statistic=-9.567447887848152, pvalue=3.3378530564707717e-21, df=1861)"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "stats.ttest_1samp(salaries[(salaries['Department'] == 'STREETS & SAN') & (salaries['Hourly Rate'].isnull() == False)]['Hourly Rate'], 35)"
]
},
{
@@ -206,12 +629,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(32.52345834488425, 33.05365708767623)"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "hourly = salaries.loc[(salaries['Hourly Rate'].isnull() == False), 'Hourly Rate']\n",
+ "sample_mean = np.mean(hourly)\n",
+ "sample_error = stats.sem(hourly)\n",
+ "stats.t.interval(.95, loc=sample_mean, scale=sample_error, df= len(hourly)-1)"
]
},
{
@@ -223,12 +659,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(86526.99656774187, 87047.00301256099)"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here:\n",
- "\n"
+ "annual = salaries.loc[(salaries['Hourly Rate'].isnull() == True), 'Annual Salary']\n",
+ "sample_mean = np.mean(annual)\n",
+ "sample_error = stats.sem(annual)\n",
+ "stats.t.interval(.95, loc=sample_mean, scale=sample_error, df= len(annual)-1)"
]
},
{
@@ -271,7 +720,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.10.9"
}
},
"nbformat": 4,