From b6148dc9df20084f469f31317adde83cf7bbb61c Mon Sep 17 00:00:00 2001 From: HenrikSoeder Date: Wed, 22 Nov 2023 11:45:42 +0000 Subject: [PATCH] Lab Done --- your-code/main.ipynb | 1078 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 1042 insertions(+), 36 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 59b955a..4db305b 100755 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,11 +12,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "# import numpy and pandas\n", + "import pandas as pd\n", + "import numpy as np\n", "\n" ] }, @@ -31,11 +32,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n" + "salaries = pd.read_csv(\"Current_Employee_Names__Salaries__and_Position_Titles.csv\")" ] }, { @@ -47,12 +48,130 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameJob TitlesDepartmentFull or Part-TimeSalary or HourlyTypical HoursAnnual SalaryHourly Rate
0AARON, JEFFERY MSERGEANTPOLICEFSalaryNaN101442.0NaN
1AARON, KARINAPOLICE OFFICER (ASSIGNED AS DETECTIVE)POLICEFSalaryNaN94122.0NaN
2AARON, KIMBERLEI RCHIEF CONTRACT EXPEDITERGENERAL SERVICESFSalaryNaN101592.0NaN
3ABAD JR, VICENTE MCIVIL ENGINEER IVWATER MGMNTFSalaryNaN110064.0NaN
4ABASCAL, REECE ETRAFFIC CONTROL AIDE-HOURLYOEMCPHourly20.0NaN19.86
\n", + "
" + ], + "text/plain": [ + " Name Job Titles \\\n", + "0 AARON, JEFFERY M SERGEANT \n", + "1 AARON, KARINA POLICE OFFICER (ASSIGNED AS DETECTIVE) \n", + "2 AARON, KIMBERLEI R CHIEF CONTRACT EXPEDITER \n", + "3 ABAD JR, VICENTE M CIVIL ENGINEER IV \n", + "4 ABASCAL, REECE E TRAFFIC CONTROL AIDE-HOURLY \n", + "\n", + " Department Full or Part-Time Salary or Hourly Typical Hours \\\n", + "0 POLICE F Salary NaN \n", + "1 POLICE F Salary NaN \n", + "2 GENERAL SERVICES F Salary NaN \n", + "3 WATER MGMNT F Salary NaN \n", + "4 OEMC P Hourly 20.0 \n", + "\n", + " Annual Salary Hourly Rate \n", + "0 101442.0 NaN \n", + "1 94122.0 NaN \n", + "2 101592.0 NaN \n", + "3 110064.0 NaN \n", + "4 NaN 19.86 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "salaries.head()" ] }, { @@ -64,12 +183,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Name 0\n", + "Job Titles 0\n", + "Department 0\n", + "Full or Part-Time 0\n", + "Salary or Hourly 0\n", + "Typical Hours 25161\n", + "Annual Salary 8022\n", + "Hourly Rate 25161\n", + "dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "salaries.isna().sum()" ] }, { @@ -81,12 +218,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Salary 25161\n", + "Hourly 8022\n", + "Name: Salary or Hourly, dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "salaries[\"Salary or Hourly\"].value_counts()" ] }, { @@ -105,12 +254,238 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Name
count
Department
ADMIN HEARNG39
ANIMAL CONTRL81
AVIATION1629
BOARD OF ELECTION107
BOARD OF ETHICS8
BUDGET & MGMT46
BUILDINGS269
BUSINESS AFFAIRS171
CITY CLERK84
CITY COUNCIL411
COMMUNITY DEVELOPMENT207
COPA116
CULTURAL AFFAIRS65
DISABILITIES28
DoIT99
FAMILY & SUPPORT615
FINANCE560
FIRE4641
GENERAL SERVICES980
HEALTH488
HUMAN RELATIONS16
HUMAN RESOURCES79
INSPECTOR GEN87
LAW407
LICENSE APPL COMM1
MAYOR'S OFFICE85
OEMC2102
POLICE13414
POLICE BOARD2
PROCUREMENT92
PUBLIC LIBRARY1015
STREETS & SAN2198
TRANSPORTN1140
TREASURER22
WATER MGMNT1879
\n", + "
" + ], + "text/plain": [ + " Name\n", + " count\n", + "Department \n", + "ADMIN HEARNG 39\n", + "ANIMAL CONTRL 81\n", + "AVIATION 1629\n", + "BOARD OF ELECTION 107\n", + "BOARD OF ETHICS 8\n", + "BUDGET & MGMT 46\n", + "BUILDINGS 269\n", + "BUSINESS AFFAIRS 171\n", + "CITY CLERK 84\n", + "CITY COUNCIL 411\n", + "COMMUNITY DEVELOPMENT 207\n", + "COPA 116\n", + "CULTURAL AFFAIRS 65\n", + "DISABILITIES 28\n", + "DoIT 99\n", + "FAMILY & SUPPORT 615\n", + "FINANCE 560\n", + "FIRE 4641\n", + "GENERAL SERVICES 980\n", + "HEALTH 488\n", + "HUMAN RELATIONS 16\n", + "HUMAN RESOURCES 79\n", + "INSPECTOR GEN 87\n", + "LAW 407\n", + "LICENSE APPL COMM 1\n", + "MAYOR'S OFFICE 85\n", + "OEMC 2102\n", + "POLICE 13414\n", + "POLICE BOARD 2\n", + "PROCUREMENT 92\n", + "PUBLIC LIBRARY 1015\n", + "STREETS & SAN 2198\n", + "TRANSPORTN 1140\n", + "TREASURER 22\n", + "WATER MGMNT 1879" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "salaries.groupby([\"Department\"]).agg({\"Name\":[\"count\"]})" ] }, { @@ -122,14 +497,60 @@ "In this section of the lab, we will test whether the hourly wage of all hourly workers is significantly different from $30/hr. Import the correct one sample test function from scipy and perform the hypothesis test for a 95% two sided confidence interval." ] }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "import scipy.stats as st" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4.3230240486229894e-92" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Set the hypothesis \n", + "\n", + "### wage of all workers is equal equal to 30\n", + "\n", + "### H0: mu mean wage = 30\n", + "### H1: mu mean wage != 30\n", + "\n", + "\n", + "# 2. choose a significance level\n", + "alpha = 0.05\n", + "\n", + "# 3. define a sample\n", + "sample = salaries[\"Hourly Rate\"]\n", + "sample.dropna(inplace = True)\n", + "\n", + "#4. run it \n", + "\n", + "output = st.ttest_1samp(sample, 30)\n", + "output[1]" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n", - "\n" + "# denying the 0 hypothesis bececause p value < alpha" ] }, { @@ -141,14 +562,98 @@ "Hint: A one tailed test has a p-value that is half of the two tailed p-value. If our hypothesis is greater than, then to reject, the test statistic must also be positive." ] }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10\n" + ] + }, + { + "data": { + "text/plain": [ + "(13414,)" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Set hypothesis\n", + "\n", + "### H0: mu mean wage police < 86.000\n", + "### H1: mu mean wage police >= 86.000\n", + "\n", + "# 2. choose a significance \n", + "\n", + "alpha = 0.05\n", + "\n", + "#sample\n", + "\n", + "police = salaries[salaries[\"Department\"] == \"POLICE\"]\n", + "sample = police[\"Annual Salary\"]\n", + "print(sample.isna().sum())\n", + "sample.shape\n", + "\n", + "# 10 values needing to be dropped\n" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/bk/02hh56sx1b7622qtqhym98tc0000gn/T/ipykernel_44494/1847533583.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " sample.dropna(inplace = True)\n" + ] + } + ], + "source": [ + "sample.dropna(inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=3.081997005712994, pvalue=0.0010301701775482577, df=13403)" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "st.ttest_1samp(sample, 86000, alternative = \"greater\")" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n", - "\n" + "# can deny 0 hypothesis which was the salary beeing lower than 86.000, \n", + "# police chief is right" ] }, { @@ -158,14 +663,280 @@ "Using the `crosstab` function, find the department that has the most hourly workers. " ] }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Salary or HourlyHourlySalary
Department
ADMIN HEARNG039
ANIMAL CONTRL1962
AVIATION1082547
BOARD OF ELECTION0107
BOARD OF ETHICS08
BUDGET & MGMT244
BUILDINGS0269
BUSINESS AFFAIRS7164
CITY CLERK084
CITY COUNCIL64347
COMMUNITY DEVELOPMENT4203
COPA0116
CULTURAL AFFAIRS758
DISABILITIES028
DoIT099
FAMILY & SUPPORT287328
FINANCE44516
FIRE24639
GENERAL SERVICES765215
HEALTH3485
HUMAN RELATIONS016
HUMAN RESOURCES475
INSPECTOR GEN087
LAW40367
LICENSE APPL COMM01
MAYOR'S OFFICE877
OEMC1273829
POLICE1013404
POLICE BOARD02
PROCUREMENT290
PUBLIC LIBRARY299716
STREETS & SAN1862336
TRANSPORTN725415
TREASURER022
WATER MGMNT1513366
\n", + "
" + ], + "text/plain": [ + "Salary or Hourly Hourly Salary\n", + "Department \n", + "ADMIN HEARNG 0 39\n", + "ANIMAL CONTRL 19 62\n", + "AVIATION 1082 547\n", + "BOARD OF ELECTION 0 107\n", + "BOARD OF ETHICS 0 8\n", + "BUDGET & MGMT 2 44\n", + "BUILDINGS 0 269\n", + "BUSINESS AFFAIRS 7 164\n", + "CITY CLERK 0 84\n", + "CITY COUNCIL 64 347\n", + "COMMUNITY DEVELOPMENT 4 203\n", + "COPA 0 116\n", + "CULTURAL AFFAIRS 7 58\n", + "DISABILITIES 0 28\n", + "DoIT 0 99\n", + "FAMILY & SUPPORT 287 328\n", + "FINANCE 44 516\n", + "FIRE 2 4639\n", + "GENERAL SERVICES 765 215\n", + "HEALTH 3 485\n", + "HUMAN RELATIONS 0 16\n", + "HUMAN RESOURCES 4 75\n", + "INSPECTOR GEN 0 87\n", + "LAW 40 367\n", + "LICENSE APPL COMM 0 1\n", + "MAYOR'S OFFICE 8 77\n", + "OEMC 1273 829\n", + "POLICE 10 13404\n", + "POLICE BOARD 0 2\n", + "PROCUREMENT 2 90\n", + "PUBLIC LIBRARY 299 716\n", + "STREETS & SAN 1862 336\n", + "TRANSPORTN 725 415\n", + "TREASURER 0 22\n", + "WATER MGMNT 1513 366" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cross_table = pd.crosstab(salaries['Department'], salaries['Salary or Hourly'])\n", + "cross_table" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n", - "\n" + "#STRETS & SAN" ] }, { @@ -175,14 +946,86 @@ "The workers from the department with the most hourly workers have complained that their hourly wage is less than $35/hour. Using a one sample t-test, test this one-sided hypothesis at the 95% confidence level." ] }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nulls: 25161\n", + "Total: 33183\n" + ] + } + ], + "source": [ + "salaries[salaries[\"Department\"] == \"STREETS & SAN\"]\n", + "sample = salaries[\"Hourly Rate\"]\n", + "print(\"Nulls: \", sample.isnull().sum())\n", + "print(\"Total: \", sample.shape[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "sample.dropna(inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Nulls: 0\n", + "Total: 8022\n" + ] + } + ], + "source": [ + "print(\"Nulls: \", sample.isnull().sum())\n", + "print(\"Total: \", sample.shape[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=-16.352363851001144, pvalue=1.8818444649254009e-59, df=8021)" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# H0 = hourly wage >=35\n", + "# H1 = hourly wage < 35\n", + "\n", + "\n", + "st.ttest_1samp(sample, 35, alternative = \"less\")" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n", - "\n" + "# rejecting H0, wage is not greater than 35" ] }, { @@ -204,14 +1047,77 @@ "To compute the confidence interval of the hourly wage, use the 0.95 for the confidence level, number of rows - 1 for degrees of freedom, the mean of the sample for the location parameter and the standard error for the scale. The standard error can be computed using [this](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.sem.html) function in SciPy." ] }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "nulls: 25161\n", + "totals: 33183\n" + ] + }, + { + "data": { + "text/plain": [ + "12.11257268427681" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sample = salaries[\"Hourly Rate\"]\n", + "print(\"nulls: \",sample.isnull().sum())\n", + "print(\"totals: \",sample.shape[0])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [], + "source": [ + "sample.dropna(inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(32.52345834488425, 33.05365708767623)" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "confidence = 0.95\n", + "std = sample.std()\n", + "mean = sample.mean()\n", + "n = len(sample)\n", + "\n", + "st.t.interval(confidence, n-1, loc = mean, scale = std/np.sqrt(n))" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Your code here:\n", - "\n" + "# the true mean wage for workers is between 32.5 and 33,05" ] }, { @@ -223,12 +1129,112 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 107, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10\n" + ] + }, + { + "data": { + "text/plain": [ + "(13414,)" + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# creating and cleaning a sample\n", + "\n", + "police = salaries[salaries[\"Department\"] == \"POLICE\"]\n", + "sample = police[\"Annual Salary\"]\n", + "print(sample.isna().sum())\n", + "sample.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/bk/02hh56sx1b7622qtqhym98tc0000gn/T/ipykernel_44494/345795188.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " sample.dropna(inplace = True)\n" + ] + } + ], + "source": [ + "sample.dropna(inplace = True)\n", + "print(sample.isna().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": 110, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(86177.05631531785, 86795.77269094893)" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here:\n", - "\n" + "confidence = 0.95\n", + "std = sample.std()\n", + "mean = sample.mean()\n", + "n = len(sample)\n", + "\n", + "\n", + "\n", + "interval = st.t.interval(confidence, n-1, loc = mean, scale = std/np.sqrt(n))\n", + "interval " + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The true mean of police anual wages is between 86177.06 and 86795.77.\n" + ] + } + ], + "source": [ + "print(f\"The true mean of police anual wages is between {round(interval[0],2)} and {round(interval[1], 2)}.\")" ] }, { @@ -257,7 +1263,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -271,7 +1277,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.11.4" } }, "nbformat": 4,