diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 406e6ba..4f6eae9 100755 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -18,11 +18,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "# import libraries here" + "from sklearn import linear_model\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" ] }, { @@ -37,11 +40,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "# your code here" + "temp = pd.read_csv(\"Temp_States.csv\")" ] }, { @@ -53,11 +56,100 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CityStateTemperature
0NYCNew York19.444444
1AlbanyNew York9.444444
2BuffaloNew York3.333333
3HartfordConnecticut17.222222
4BridgeportConnecticut14.444444
5TretonNew Jersey22.222222
6NewarkNew Jersey20.000000
\n", + "
" + ], + "text/plain": [ + " City State Temperature\n", + "0 NYC New York 19.444444\n", + "1 Albany New York 9.444444\n", + "2 Buffalo New York 3.333333\n", + "3 Hartford Connecticut 17.222222\n", + "4 Bridgeport Connecticut 14.444444\n", + "5 Treton New Jersey 22.222222\n", + "6 Newark New Jersey 20.000000" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp" ] }, { @@ -69,11 +161,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "City object\n", + "State object\n", + "Temperature float64\n", + "dtype: object" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "temp.dtypes" ] }, { @@ -83,7 +189,7 @@ "outputs": [], "source": [ "\"\"\"\n", - "your comments here\n", + "We have two columns with strings and one column with floats.\n", "\"\"\"" ] }, @@ -96,11 +202,73 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CityStateTemperature
0NYCNew York19.444444
1AlbanyNew York9.444444
2BuffaloNew York3.333333
\n", + "
" + ], + "text/plain": [ + " City State Temperature\n", + "0 NYC New York 19.444444\n", + "1 Albany New York 9.444444\n", + "2 Buffalo New York 3.333333" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nyc = temp[temp[\"State\"] == \"New York\"]\n", + "nyc" ] }, { @@ -112,11 +280,32 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\carol\\AppData\\Local\\Temp\\ipykernel_16960\\4213886779.py:1: FutureWarning: The default value of numeric_only in DataFrame.mean is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " avg_nyc = temp[temp[\"State\"] == \"New York\"].mean()\n" + ] + }, + { + "data": { + "text/plain": [ + "Temperature 10.740741\n", + "dtype: float64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg_nyc = temp[temp[\"State\"] == \"New York\"].mean()\n", + "avg_nyc" ] }, { @@ -128,11 +317,79 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CityStateTemperature
0NYCNew York19.444444
3HartfordConnecticut17.222222
5TretonNew Jersey22.222222
6NewarkNew Jersey20.000000
\n", + "
" + ], + "text/plain": [ + " City State Temperature\n", + "0 NYC New York 19.444444\n", + "3 Hartford Connecticut 17.222222\n", + "5 Treton New Jersey 22.222222\n", + "6 Newark New Jersey 20.000000" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp[temp[\"Temperature\"] > 15]" ] }, { @@ -144,11 +401,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['NYC', 'Hartford', 'Treton', 'Newark'], dtype=object)" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp_15 = temp[temp[\"Temperature\"] > 15]\n", + "\n", + "cities = temp_15[\"City\"].unique()\n", + "cities" ] }, { @@ -162,11 +433,25 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['NYC', 'Hartford'], dtype=object)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "city = temp[(temp[\"Temperature\"] > 15) & (temp[\"Temperature\"] < 20)]\n", + "\n", + "temp_cities = city[\"City\"].unique()\n", + "temp_cities" ] }, { @@ -178,11 +463,83 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Temperature
meanstd
State
Connecticut15.8333331.964186
New Jersey21.1111111.571348
New York10.7407418.133404
\n", + "
" + ], + "text/plain": [ + " Temperature \n", + " mean std\n", + "State \n", + "Connecticut 15.833333 1.964186\n", + "New Jersey 21.111111 1.571348\n", + "New York 10.740741 8.133404" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp.groupby(\"State\").agg({\"Temperature\": [\"mean\", \"std\"]})" ] }, { @@ -203,11 +560,155 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameDepartmentEducationGenderTitleYearsSalary
0JoseITBachelorManalyst135
1MariaITMasterFanalyst230
2DavidHRMasterManalyst230
3SoniaHRBachelorFanalyst435
4SamuelSalesMasterMassociate355
5EvaSalesBachelorFassociate255
6CarlosITMasterMVP870
7PedroITPhdMassociate760
8AnaHRMasterFVP870
\n", + "
" + ], + "text/plain": [ + " Name Department Education Gender Title Years Salary\n", + "0 Jose IT Bachelor M analyst 1 35\n", + "1 Maria IT Master F analyst 2 30\n", + "2 David HR Master M analyst 2 30\n", + "3 Sonia HR Bachelor F analyst 4 35\n", + "4 Samuel Sales Master M associate 3 55\n", + "5 Eva Sales Bachelor F associate 2 55\n", + "6 Carlos IT Master M VP 8 70\n", + "7 Pedro IT Phd M associate 7 60\n", + "8 Ana HR Master F VP 8 70" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees = pd.read_csv(\"Employee.csv\")\n", + "employees" ] }, { @@ -219,11 +720,29 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Name object\n", + "Department object\n", + "Education object\n", + "Gender object\n", + "Title object\n", + "Years int64\n", + "Salary int64\n", + "dtype: object" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees.dtypes" ] }, { @@ -233,7 +752,7 @@ "outputs": [], "source": [ "\"\"\"\n", - "your comments here\n", + "The categorical columns are strings and the columns that have measures are integers\n", "\"\"\"" ] }, @@ -246,11 +765,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "48.888888888888886" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "employees[\"Salary\"].mean()" ] }, { @@ -262,11 +792,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "70" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "employees[\"Salary\"].max()" ] }, { @@ -278,11 +819,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "30" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "employees[\"Salary\"].min()" ] }, { @@ -294,11 +846,103 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Salary
Name
David30
Maria30
Jose35
Sonia35
Eva55
Samuel55
Pedro60
Ana70
Carlos70
\n", + "
" + ], + "text/plain": [ + " Salary\n", + "Name \n", + "David 30\n", + "Maria 30\n", + "Jose 35\n", + "Sonia 35\n", + "Eva 55\n", + "Samuel 55\n", + "Pedro 60\n", + "Ana 70\n", + "Carlos 70" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "min_salary = employees.groupby(\"Name\").agg({\"Salary\": \"min\"})\n", + "\n", + "min_salary.sort_values(by = \"Salary\")\n", + "\n", + "#David and Maria have the lowest salaries." ] }, { @@ -310,11 +954,67 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameDepartmentEducationGenderTitleYearsSalary
2DavidHRMasterManalyst230
\n", + "
" + ], + "text/plain": [ + " Name Department Education Gender Title Years Salary\n", + "2 David HR Master M analyst 2 30" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "david = employees[employees[\"Name\"] == \"David\"]\n", + "david" ] }, { @@ -326,11 +1026,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 49, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "2 30\n", + "Name: Salary, dtype: int64" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + "david[\"Salary\"]" ] }, { @@ -342,11 +1054,88 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameDepartmentEducationGenderTitleYearsSalary
4SamuelSalesMasterMassociate355
5EvaSalesBachelorFassociate255
7PedroITPhdMassociate760
\n", + "
" + ], + "text/plain": [ + " Name Department Education Gender Title Years Salary\n", + "4 Samuel Sales Master M associate 3 55\n", + "5 Eva Sales Bachelor F associate 2 55\n", + "7 Pedro IT Phd M associate 7 60" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees[employees[\"Title\"] == \"associate\"]" ] }, { @@ -359,22 +1148,176 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameDepartmentEducationGenderTitleYearsSalary
0JoseITBachelorManalyst135
1MariaITMasterFanalyst230
2DavidHRMasterManalyst230
\n", + "
" + ], + "text/plain": [ + " Name Department Education Gender Title Years Salary\n", + "0 Jose IT Bachelor M analyst 1 35\n", + "1 Maria IT Master F analyst 2 30\n", + "2 David HR Master M analyst 2 30" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Method 1\n", - "# your code here" + "employees.head(3)" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameDepartmentEducationGenderTitleYearsSalary
0JoseITBachelorManalyst135
1MariaITMasterFanalyst230
2DavidHRMasterManalyst230
\n", + "
" + ], + "text/plain": [ + " Name Department Education Gender Title Years Salary\n", + "0 Jose IT Bachelor M analyst 1 35\n", + "1 Maria IT Master F analyst 2 30\n", + "2 David HR Master M analyst 2 30" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Method 2\n", - "# your code here" + "employees[:3]" ] }, { @@ -386,11 +1329,66 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameDepartmentEducationGenderTitleYearsSalary
7PedroITPhdMassociate760
\n", + "
" + ], + "text/plain": [ + " Name Department Education Gender Title Years Salary\n", + "7 Pedro IT Phd M associate 7 60" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees[(employees[\"Title\"] == \"associate\") & (employees[\"Salary\"] > 55)]" ] }, { @@ -402,11 +1400,196 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 67, "metadata": {}, "outputs": [], "source": [ - "# your code here" + "year_1 = employees[employees[\"Years\"] == 1]\n", + "year_2 = employees[employees[\"Years\"] == 2]\n", + "year_3 = employees[employees[\"Years\"] == 3]\n", + "year_4 = employees[employees[\"Years\"] == 4]\n", + "year_7 = employees[employees[\"Years\"] == 7]\n", + "year_8 = employees[employees[\"Years\"] == 8]" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\carol\\AppData\\Local\\Temp\\ipykernel_16960\\3540855446.py:1: FutureWarning: The default value of numeric_only in DataFrame.mean is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " year_1.mean()\n" + ] + }, + { + "data": { + "text/plain": [ + "Years 1.0\n", + "Salary 35.0\n", + "dtype: float64" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "year_1.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\carol\\AppData\\Local\\Temp\\ipykernel_16960\\247903228.py:1: FutureWarning: The default value of numeric_only in DataFrame.mean is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " year_2.mean()\n" + ] + }, + { + "data": { + "text/plain": [ + "Years 2.000000\n", + "Salary 38.333333\n", + "dtype: float64" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "year_2.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\carol\\AppData\\Local\\Temp\\ipykernel_16960\\2669788241.py:1: FutureWarning: The default value of numeric_only in DataFrame.mean is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " year_3.mean()\n" + ] + }, + { + "data": { + "text/plain": [ + "Years 3.0\n", + "Salary 55.0\n", + "dtype: float64" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "year_3.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\carol\\AppData\\Local\\Temp\\ipykernel_16960\\3491485861.py:1: FutureWarning: The default value of numeric_only in DataFrame.mean is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " year_4.mean()\n" + ] + }, + { + "data": { + "text/plain": [ + "Years 4.0\n", + "Salary 35.0\n", + "dtype: float64" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "year_4.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\carol\\AppData\\Local\\Temp\\ipykernel_16960\\3102777081.py:1: FutureWarning: The default value of numeric_only in DataFrame.mean is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " year_7.mean()\n" + ] + }, + { + "data": { + "text/plain": [ + "Years 7.0\n", + "Salary 60.0\n", + "dtype: float64" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "year_7.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\carol\\AppData\\Local\\Temp\\ipykernel_16960\\2549719899.py:1: FutureWarning: The default value of numeric_only in DataFrame.mean is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " year_8.mean()\n" + ] + }, + { + "data": { + "text/plain": [ + "Years 8.0\n", + "Salary 70.0\n", + "dtype: float64" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "year_8.mean()" ] }, { @@ -418,11 +1601,69 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Salary
Title
VP70.000000
analyst32.500000
associate56.666667
\n", + "
" + ], + "text/plain": [ + " Salary\n", + "Title \n", + "VP 70.000000\n", + "analyst 32.500000\n", + "associate 56.666667" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees.groupby(\"Title\").agg({\"Salary\": \"mean\"})" ] }, { @@ -434,11 +1675,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The first quartile is 35.0\n", + "The second quartile is 55.0\n", + "The third quartile is 60.0\n" + ] + } + ], + "source": [ + "q1 = employees[\"Salary\"].quantile(0.25)\n", + "q2 = employees[\"Salary\"].quantile(0.50)\n", + "q3 = employees[\"Salary\"].quantile(0.75)\n", + "\n", + "print(f\"The first quartile is {q1}\")\n", + "print(f\"The second quartile is {q2}\")\n", + "print(f\"The third quartile is {q3}\")" ] }, { @@ -450,11 +1707,64 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Salary
Gender
F47.5
M50.0
\n", + "
" + ], + "text/plain": [ + " Salary\n", + "Gender \n", + "F 47.5\n", + "M 50.0" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees.groupby(\"Gender\").agg({\"Salary\": \"mean\"})" ] }, { @@ -467,11 +1777,156 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meanminmax
Department
HR45.003070
IT48.753070
Sales55.005555
\n", + "
" + ], + "text/plain": [ + " mean min max\n", + "Department \n", + "HR 45.00 30 70\n", + "IT 48.75 30 70\n", + "Sales 55.00 55 55" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees.groupby(\"Department\")[\"Salary\"].agg([\"mean\", \"min\", \"max\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meanminmax
Department
HR4.66666728
IT4.50000018
Sales2.50000023
\n", + "
" + ], + "text/plain": [ + " mean min max\n", + "Department \n", + "HR 4.666667 2 8\n", + "IT 4.500000 1 8\n", + "Sales 2.500000 2 3" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "employees.groupby(\"Department\")[\"Years\"].agg([\"mean\", \"min\", \"max\"])" ] }, { @@ -484,11 +1939,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 89, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Department\n", + "HR 40\n", + "IT 40\n", + "Sales 0\n", + "Name: Salary, dtype: int64" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here" + " employees.groupby(\"Department\")[\"Salary\"].agg(lambda x: x.max() - x.min())" ] }, { @@ -785,7 +2255,7 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -799,7 +2269,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.2" + "version": "3.10.9" } }, "nbformat": 4,