From 661ab50932dfe7a465293b0f7e86e56f534c8014 Mon Sep 17 00:00:00 2001 From: Valeriya Tolmacheva Date: Tue, 14 Nov 2023 18:36:16 +0000 Subject: [PATCH] Lab Done --- your-code/main.ipynb | 1067 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 986 insertions(+), 81 deletions(-) diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 406e6ba..2395c4c 100755 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -18,11 +18,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ - "# import libraries here" + "import pandas as pd\n", + "import numpy as np" ] }, { @@ -203,11 +204,155 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameDepartmentEducationGenderTitleYearsSalary
0JoseITBachelorManalyst135
1MariaITMasterFanalyst230
2DavidHRMasterManalyst230
3SoniaHRBachelorFanalyst435
4SamuelSalesMasterMassociate355
5EvaSalesBachelorFassociate255
6CarlosITMasterMVP870
7PedroITPhdMassociate760
8AnaHRMasterFVP870
\n", + "
" + ], + "text/plain": [ + " Name Department Education Gender Title Years Salary\n", + "0 Jose IT Bachelor M analyst 1 35\n", + "1 Maria IT Master F analyst 2 30\n", + "2 David HR Master M analyst 2 30\n", + "3 Sonia HR Bachelor F analyst 4 35\n", + "4 Samuel Sales Master M associate 3 55\n", + "5 Eva Sales Bachelor F associate 2 55\n", + "6 Carlos IT Master M VP 8 70\n", + "7 Pedro IT Phd M associate 7 60\n", + "8 Ana HR Master F VP 8 70" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees=pd.read_csv(\"Employee.csv\")\n", + "employees" ] }, { @@ -219,21 +364,53 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 9 entries, 0 to 8\n", + "Data columns (total 7 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Name 9 non-null object\n", + " 1 Department 9 non-null object\n", + " 2 Education 9 non-null object\n", + " 3 Gender 9 non-null object\n", + " 4 Title 9 non-null object\n", + " 5 Years 9 non-null int64 \n", + " 6 Salary 9 non-null int64 \n", + "dtypes: int64(2), object(5)\n", + "memory usage: 636.0+ bytes\n" + ] + } + ], + "source": [ + "employees.info()" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'\\nData types from first glance seem corresponding to the context of each series in the dataframe\\n'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\"\"\"\n", - "your comments here\n", + "Data types from first glance seem corresponding to the context of each series in the dataframe\n", "\"\"\"" ] }, @@ -246,11 +423,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "48.888888888888886" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "employees[\"Salary\"].mean()" ] }, { @@ -262,11 +450,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "70" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "employees[\"Salary\"].max()" ] }, { @@ -278,11 +477,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "30" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "employees[\"Salary\"].min()" ] }, { @@ -294,11 +504,77 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameDepartmentEducationGenderTitleYearsSalary
1MariaITMasterFanalyst230
2DavidHRMasterManalyst230
\n", + "
" + ], + "text/plain": [ + " Name Department Education Gender Title Years Salary\n", + "1 Maria IT Master F analyst 2 30\n", + "2 David HR Master M analyst 2 30" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees[employees[\"Salary\"] == employees[\"Salary\"].min()]" ] }, { @@ -310,11 +586,66 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameDepartmentEducationGenderTitleYearsSalary
2DavidHRMasterManalyst230
\n", + "
" + ], + "text/plain": [ + " Name Department Education Gender Title Years Salary\n", + "2 David HR Master M analyst 2 30" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees[employees[\"Name\"] == \"David\"]" ] }, { @@ -326,11 +657,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "2 30\n", + "Name: Salary, dtype: int64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "employees[employees[\"Name\"] == \"David\"][\"Salary\"]" ] }, { @@ -342,11 +685,88 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameDepartmentEducationGenderTitleYearsSalary
4SamuelSalesMasterMassociate355
5EvaSalesBachelorFassociate255
7PedroITPhdMassociate760
\n", + "
" + ], + "text/plain": [ + " Name Department Education Gender Title Years Salary\n", + "4 Samuel Sales Master M associate 3 55\n", + "5 Eva Sales Bachelor F associate 2 55\n", + "7 Pedro IT Phd M associate 7 60" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees[employees[\"Title\"] == \"associate\"]" ] }, { @@ -359,12 +779,89 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameDepartmentEducationGenderTitleYearsSalary
0JoseITBachelorManalyst135
1MariaITMasterFanalyst230
2DavidHRMasterManalyst230
\n", + "
" + ], + "text/plain": [ + " Name Department Education Gender Title Years Salary\n", + "0 Jose IT Bachelor M analyst 1 35\n", + "1 Maria IT Master F analyst 2 30\n", + "2 David HR Master M analyst 2 30" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Method 1\n", - "# your code here" + "employees[employees.index<3]" ] }, { @@ -386,11 +883,66 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameDepartmentEducationGenderTitleYearsSalary
7PedroITPhdMassociate760
\n", + "
" + ], + "text/plain": [ + " Name Department Education Gender Title Years Salary\n", + "7 Pedro IT Phd M associate 7 60" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees[(employees[\"Title\"] == \"associate\") & (employees[\"Salary\"] > 55)]" ] }, { @@ -402,11 +954,84 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Salary
Years
135.000000
238.333333
355.000000
435.000000
760.000000
870.000000
\n", + "
" + ], + "text/plain": [ + " Salary\n", + "Years \n", + "1 35.000000\n", + "2 38.333333\n", + "3 55.000000\n", + "4 35.000000\n", + "7 60.000000\n", + "8 70.000000" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees.groupby(\"Years\").agg({\"Salary\":\"mean\"})" ] }, { @@ -418,11 +1043,69 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Salary
Title
VP70.000000
analyst32.500000
associate56.666667
\n", + "
" + ], + "text/plain": [ + " Salary\n", + "Title \n", + "VP 70.000000\n", + "analyst 32.500000\n", + "associate 56.666667" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees.groupby(\"Title\").agg({\"Salary\":\"mean\"})" ] }, { @@ -434,11 +1117,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "35.0 55.0 60.0\n" + ] + } + ], "source": [ - "# your code here" + "q1=np.quantile(employees[\"Salary\"],0.25)\n", + "q2=np.quantile(employees[\"Salary\"],0.50)\n", + "q3=np.quantile(employees[\"Salary\"],0.75)\n", + "print (q1,q2,q3)" ] }, { @@ -450,11 +1144,64 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Salary
Gender
F47.5
M50.0
\n", + "
" + ], + "text/plain": [ + " Salary\n", + "Gender \n", + "F 47.5\n", + "M 50.0" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees.groupby(\"Gender\").agg({\"Salary\":\"mean\"})" ] }, { @@ -467,11 +1214,98 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearsSalary
count9.0000009.000000
mean4.11111148.888889
std2.80376716.541194
min1.00000030.000000
25%2.00000035.000000
50%3.00000055.000000
75%7.00000060.000000
max8.00000070.000000
\n", + "
" + ], + "text/plain": [ + " Years Salary\n", + "count 9.000000 9.000000\n", + "mean 4.111111 48.888889\n", + "std 2.803767 16.541194\n", + "min 1.000000 30.000000\n", + "25% 2.000000 35.000000\n", + "50% 3.000000 55.000000\n", + "75% 7.000000 60.000000\n", + "max 8.000000 70.000000" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees.describe()" ] }, { @@ -484,11 +1318,82 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
minmaxdiff
Department
HR307040
IT307040
Sales55550
\n", + "
" + ], + "text/plain": [ + " min max diff\n", + "Department \n", + "HR 30 70 40\n", + "IT 30 70 40\n", + "Sales 55 55 0" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_emp=employees.groupby(\"Department\").agg({\"Salary\":[\"min\",\"max\"]})\n", + "new_emp.columns=[\"min\",\"max\"]\n", + "new_emp[\"diff\"]=new_emp[\"max\"]-new_emp[\"min\"]\n", + "new_emp" ] }, { @@ -799,7 +1704,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.2" + "version": "3.11.4" } }, "nbformat": 4,