From 24670ab68ba019f5090bc453f113e5aba2d29f74 Mon Sep 17 00:00:00 2001 From: elsonluis Date: Fri, 15 Dec 2023 06:29:05 +0000 Subject: [PATCH] lab done --- your-code/main.ipynb | 3651 ++++++++++++++++++++++++++++++++---------- 1 file changed, 2845 insertions(+), 806 deletions(-) mode change 100755 => 100644 your-code/main.ipynb diff --git a/your-code/main.ipynb b/your-code/main.ipynb old mode 100755 new mode 100644 index 406e6ba..78e674a --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -1,807 +1,2846 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Subsetting and Descriptive Stats\n", - "\n", - "## Before your start:\n", - " - Remember that you just need to do one of the challenges.\n", - " - Keep in mind that you need to use some of the functions you learned in the previous lessons.\n", - " - All datasets are provided in IronHack's database.\n", - " - Elaborate your codes and outputs as much as you can.\n", - " - Try your best to answer the questions and complete the tasks and most importantly: enjoy the process!\n", - " \n", - "#### Import all the necessary libraries here:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# import libraries here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# [ONLY ONE MANDATORY] Challenge 1\n", - "#### In this challenge we will use the `Temp_States` dataset. \n", - "\n", - "#### First import it into a dataframe called `temp`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Print `temp`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Explore the data types of the *temp* dataframe. What types of data do we have? Comment your result." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "your comments here\n", - "\"\"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Select the rows where state is New York." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### What is the average temperature of cities in New York?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Which states and cities have a temperature above 15 degrees Celsius?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Now, return only the cities that have a temperature above 15 degrees Celsius." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Which cities have a temperature above 15 degrees Celcius and below 20 degrees Celsius?\n", - "\n", - "**Hint**: First, write the condition. Then, select the rows." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Find the mean and standard deviation of the temperature of each state." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "# [ONLY ONE MANDATORY] Challenge 2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Load the `employees` dataset into a dataframe. Call the dataframe `employees`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Explore the data types of the `employees` dataframe. Comment your results." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "your comments here\n", - "\"\"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### What's the average salary in this company?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### What's the highest salary?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### What's the lowest salary?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Who are the employees with the lowest salary?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Find all the information about an employee called David." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Could you return only David's salary?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Print all the rows where job title is associate." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Print the first 3 rows of your dataframe.\n", - "**Tip**: There are 2 ways to do it. Do it both ways." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Method 1\n", - "# your code here" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Method 2\n", - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Find the employees whose title is associate and whose salary is above 55." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Group the employees by number of years of employment. What are the average salaries in each group?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### What is the average salary per title?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Find the salary quartiles.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Is the mean salary different per gender?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Find the minimum, mean and maximum of all numeric columns for each company department.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Bonus Question: for each department, compute the difference between the maximum and the minimum salary.\n", - "**Hint**: try using `agg` or `apply` combined with `lambda` functions." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "# [ONLY ONE MANDATORY] Challenge 3\n", - "#### Open the `Orders` dataset. Name your dataset `orders`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Explore your dataset by looking at the data types and summary statistics. Comment your results." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "your comments here\n", - "\"\"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### What is the average purchase price?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### What are the highest and lowest purchase prices? " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Select all the customers from Spain.\n", - "**Hint**: Remember that you are not asked to find orders from Spain but customers. A customer might have more than one order associated. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### How many customers do we have in Spain?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Select all the customers who have bought more than 50 items.\n", - "**Hint**: Remember that you are not asked to find orders with more than 50 items but customers who bought more than 50 items. A customer with two orders of 30 items each should appear in the selection." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Select orders from Spain that include more than 50 items." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Select all free orders." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Select all orders whose description starts with `lunch bag`.\n", - "**Hint**: use string functions." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Select all `lunch bag` orders made in 2011." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Show the frequency distribution of the amount spent in Spain." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Select all orders made in the month of August." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Find the number of orders made by each country in the month of August.\n", - "**Hint**: Use value_counts()." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### What's the average amount of money spent by country?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### What's the most expensive item?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### What is the average amount spent per year?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.2" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "9vNeRFiGAP22" + }, + "source": [ + "# Subsetting and Descriptive Stats\n", + "\n", + "## Before your start:\n", + " - Remember that you just need to do one of the challenges.\n", + " - Keep in mind that you need to use some of the functions you learned in the previous lessons.\n", + " - All datasets are provided in IronHack's database.\n", + " - Elaborate your codes and outputs as much as you can.\n", + " - Try your best to answer the questions and complete the tasks and most importantly: enjoy the process!\n", + " \n", + "#### Import all the necessary libraries here:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "fMDlGSrHAP25" + }, + "outputs": [], + "source": [ + "# import libraries here\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tx1RSW3SAP27" + }, + "source": [ + "# [ONLY ONE MANDATORY] Challenge 1\n", + "#### In this challenge we will use the `Temp_States` dataset.\n", + "\n", + "#### First import it into a dataframe called `temp`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 268 + }, + "id": "v__F9AroAP28", + "outputId": "af9b9ab9-b4e7-4001-8c59-2d68f4467edb" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " City State Temperature\n", + "0 NYC New York 19.444444\n", + "1 Albany New York 9.444444\n", + "2 Buffalo New York 3.333333\n", + "3 Hartford Connecticut 17.222222\n", + "4 Bridgeport Connecticut 14.444444\n", + "5 Treton New Jersey 22.222222\n", + "6 Newark New Jersey 20.000000" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CityStateTemperature
0NYCNew York19.444444
1AlbanyNew York9.444444
2BuffaloNew York3.333333
3HartfordConnecticut17.222222
4BridgeportConnecticut14.444444
5TretonNew Jersey22.222222
6NewarkNew Jersey20.000000
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ], + "source": [ + "# your code here\n", + "temp = pd.read_csv(\"/content/Temp_States.csv\")\n", + "temp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ad8hMJNuAP28" + }, + "source": [ + "#### Print `temp`." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 268 + }, + "id": "HwOIAjJ9AP29", + "outputId": "811e2856-7c5a-42d1-92bf-fcc5e446dd55" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " City State Temperature\n", + "0 NYC New York 19.444444\n", + "1 Albany New York 9.444444\n", + "2 Buffalo New York 3.333333\n", + "3 Hartford Connecticut 17.222222\n", + "4 Bridgeport Connecticut 14.444444\n", + "5 Treton New Jersey 22.222222\n", + "6 Newark New Jersey 20.000000" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CityStateTemperature
0NYCNew York19.444444
1AlbanyNew York9.444444
2BuffaloNew York3.333333
3HartfordConnecticut17.222222
4BridgeportConnecticut14.444444
5TretonNew Jersey22.222222
6NewarkNew Jersey20.000000
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ], + "source": [ + "# your code here\n", + "\n", + "temp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7RXMpIRVAP29" + }, + "source": [ + "#### Explore the data types of the *temp* dataframe. What types of data do we have? Comment your result." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "VWi6I1SOAP29", + "outputId": "a333e82c-6cdc-42b3-e22c-d942213c12ee" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "dtype('O')" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "source": [ + "# data types df[nome_da_coluna].dtype\n", + "\n", + "temp['City'].dtype\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "w84XslWgAP2-", + "outputId": "abc94795-3924-441f-e0c9-3dfdbcebc8ca" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "dtype('float64')" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ], + "source": [ + "\"\"\"\n", + "your comments here\n", + "\"\"\"\n", + "temp['State'].dtype\n", + "temp['City'].dtype\n", + "temp['Temperature'].dtype" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hm6iKCMIAP2-" + }, + "source": [ + "#### Select the rows where state is New York." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "id": "wm7qkvL8AP2_", + "outputId": "b9696d55-1740-44cd-b9f2-d9a45fab7019" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " City State Temperature\n", + "0 NYC New York 19.444444\n", + "1 Albany New York 9.444444\n", + "2 Buffalo New York 3.333333" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CityStateTemperature
0NYCNew York19.444444
1AlbanyNew York9.444444
2BuffaloNew York3.333333
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ], + "source": [ + "# Select the rows where state is New York\n", + "Selected= temp[temp['State'] == 'New York']\n", + "\n", + "Selected" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ueGgHRxdAP2_" + }, + "source": [ + "#### What is the average temperature of cities in New York?" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lcU8v-6kAP2_", + "outputId": "2991f455-361b-4d8f-9436-3d9d2819e6ae" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":3: FutureWarning: The default value of numeric_only in DataFrame.mean is deprecated. In a future version, it will default to False. In addition, specifying 'numeric_only=None' is deprecated. Select only valid columns or specify the value of numeric_only to silence this warning.\n", + " Selected.mean()\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Temperature 10.740741\n", + "dtype: float64" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ], + "source": [ + "# your code here\n", + "\n", + "Selected.mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yaJ7zeEMAP3A" + }, + "source": [ + "#### Which states and cities have a temperature above 15 degrees Celsius?" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 174 + }, + "id": "KTeHNjaGAP3A", + "outputId": "1ea59c00-e4dc-4a16-c778-99438ff71dab" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " City State Temperature\n", + "0 NYC New York 19.444444\n", + "3 Hartford Connecticut 17.222222\n", + "5 Treton New Jersey 22.222222\n", + "6 Newark New Jersey 20.000000" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CityStateTemperature
0NYCNew York19.444444
3HartfordConnecticut17.222222
5TretonNew Jersey22.222222
6NewarkNew Jersey20.000000
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 41 + } + ], + "source": [ + "# your code here\n", + "\n", + "Above = temp[temp['Temperature'] > 15]\n", + "\n", + "Above" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QQ_DSOl8AP3A" + }, + "source": [ + "#### Now, return only the cities that have a temperature above 15 degrees Celsius." + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mj263J_YAP3A", + "outputId": "9f5d0e48-82fa-4311-8726-d0e2c37226e1" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 NYC\n", + "3 Hartford\n", + "5 Treton\n", + "6 Newark\n", + "Name: City, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 54 + } + ], + "source": [ + "# your code here\n", + "\n", + "Above['City']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J4UQVN4cAP3B" + }, + "source": [ + "#### Which cities have a temperature above 15 degrees Celcius and below 20 degrees Celsius?\n", + "\n", + "**Hint**: First, write the condition. Then, select the rows." + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 112 + }, + "id": "rKpHw3D4AP3B", + "outputId": "315732af-f9e7-4026-ff8f-8a274c0505b1" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " City State Temperature\n", + "0 NYC New York 19.444444\n", + "3 Hartford Connecticut 17.222222" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CityStateTemperature
0NYCNew York19.444444
3HartfordConnecticut17.222222
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 51 + } + ], + "source": [ + "# above 15 degrees Celcius and below 20 degrees\n", + "\n", + "temp[(temp['Temperature'] >= 15) & (temp['Temperature'] <20)]\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4yuBbbHtAP3B" + }, + "source": [ + "#### Find the mean and standard deviation of the temperature of each state." + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 268 + }, + "id": "xm_6aG6XAP3B", + "outputId": "7423f935-e650-4fd8-a87c-a74d657b1671" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " City State Temperature\n", + "0 NYC New York 19.444444\n", + "1 Albany New York 9.444444\n", + "2 Buffalo New York 3.333333\n", + "3 Hartford Connecticut 17.222222\n", + "4 Bridgeport Connecticut 14.444444\n", + "5 Treton New Jersey 22.222222\n", + "6 Newark New Jersey 20.000000" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CityStateTemperature
0NYCNew York19.444444
1AlbanyNew York9.444444
2BuffaloNew York3.333333
3HartfordConnecticut17.222222
4BridgeportConnecticut14.444444
5TretonNew Jersey22.222222
6NewarkNew Jersey20.000000
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 55 + } + ], + "source": [ + "# mean and standard deviation of the temperature of each state\n", + "\n", + "temp" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "yTlgKr1GRM1_" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "id": "uwtQlNE9AP3B" + }, + "source": [ + "# [ONLY ONE MANDATORY] Challenge 2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1HPHKQd6AP3C" + }, + "source": [ + "#### Load the `employees` dataset into a dataframe. Call the dataframe `employees`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "G3RwznDvAP3C" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gtKyTwYzAP3D" + }, + "source": [ + "#### Explore the data types of the `employees` dataframe. Comment your results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rtb8HPQcAP3D" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a3ZnjGrlAP3D" + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "your comments here\n", + "\"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5Yf69mO4AP3E" + }, + "source": [ + "#### What's the average salary in this company?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_rwWHeWFAP3E" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T2oEkk26AP3E" + }, + "source": [ + "#### What's the highest salary?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-LItRlE-AP3E" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u1f8psTUAP3E" + }, + "source": [ + "#### What's the lowest salary?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7XeUlduXAP3F" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NuSqf4VIAP3F" + }, + "source": [ + "#### Who are the employees with the lowest salary?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lum6od-EAP3F" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wz2F1DVYAP3F" + }, + "source": [ + "#### Find all the information about an employee called David." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fleozxNjAP3F" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Op9FsCGrAP3G" + }, + "source": [ + "#### Could you return only David's salary?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "A5b4llTtAP3G" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ECBcORpaAP3R" + }, + "source": [ + "#### Print all the rows where job title is associate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WvgagSMKAP3S" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TPKoM0oeAP3S" + }, + "source": [ + "#### Print the first 3 rows of your dataframe.\n", + "**Tip**: There are 2 ways to do it. Do it both ways." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gUVhYD51AP3S" + }, + "outputs": [], + "source": [ + "# Method 1\n", + "# your code here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZH4alYzLAP3T" + }, + "outputs": [], + "source": [ + "# Method 2\n", + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2ZNPbjsrAP3T" + }, + "source": [ + "#### Find the employees whose title is associate and whose salary is above 55." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7v0lGBYGAP3T" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hm69mDPMAP3T" + }, + "source": [ + "#### Group the employees by number of years of employment. What are the average salaries in each group?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kYfck_2FAP3U" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AI-kxiWiAP3U" + }, + "source": [ + "#### What is the average salary per title?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d1LYwaIIAP3U" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q4XzcmRjAP3U" + }, + "source": [ + "#### Find the salary quartiles.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zeMkDGdyAP3U" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cdNkMxoiAP3V" + }, + "source": [ + "#### Is the mean salary different per gender?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CWBdxAH-AP3V" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PI3S8SXjAP3V" + }, + "source": [ + "#### Find the minimum, mean and maximum of all numeric columns for each company department.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JhrvM-owAP3V" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0N1lb8C_AP3V" + }, + "source": [ + "#### Bonus Question: for each department, compute the difference between the maximum and the minimum salary.\n", + "**Hint**: try using `agg` or `apply` combined with `lambda` functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0EuhDJxuAP3W" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "id": "DV9f2qX_AP3W" + }, + "source": [ + "# [ONLY ONE MANDATORY] Challenge 3\n", + "#### Open the `Orders` dataset. Name your dataset `orders`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JfN-7d05AP3W" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aNTvUKrCAP3W" + }, + "source": [ + "#### Explore your dataset by looking at the data types and summary statistics. Comment your results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "G6Fz8DBIAP3W" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ERachw2wAP3X" + }, + "outputs": [], + "source": [ + "\"\"\"\n", + "your comments here\n", + "\"\"\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EPjlV4sVAP3X" + }, + "source": [ + "#### What is the average purchase price?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zrl1PjEiAP3X" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jpYQmGQVAP3X" + }, + "source": [ + "#### What are the highest and lowest purchase prices?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SY-XMLMRAP3X" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qvTRg8ixAP3X" + }, + "source": [ + "#### Select all the customers from Spain.\n", + "**Hint**: Remember that you are not asked to find orders from Spain but customers. A customer might have more than one order associated." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xbdwKknpAP3Y" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0Wogf_0oAP3Y" + }, + "source": [ + "#### How many customers do we have in Spain?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "58C-DkmBAP3Y" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gFzJIo96AP3Y" + }, + "source": [ + "#### Select all the customers who have bought more than 50 items.\n", + "**Hint**: Remember that you are not asked to find orders with more than 50 items but customers who bought more than 50 items. A customer with two orders of 30 items each should appear in the selection." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pXqAk_KoAP3Y" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZcjYgvrQAP3Z" + }, + "source": [ + "#### Select orders from Spain that include more than 50 items." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d6WCS-xLAP3Z" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HRxWtBCpAP3Z" + }, + "source": [ + "#### Select all free orders." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "p6Z_UgceAP3Z" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r1FgYuU4AP3a" + }, + "source": [ + "#### Select all orders whose description starts with `lunch bag`.\n", + "**Hint**: use string functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MyVKiGPTAP3a" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L0zH1KMxAP3a" + }, + "source": [ + "#### Select all `lunch bag` orders made in 2011." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "voY7Xv3hAP3a" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j_WpRBNGAP3b" + }, + "source": [ + "#### Show the frequency distribution of the amount spent in Spain." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ervoMOlJAP3b" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X0zOQeQXAP3d" + }, + "source": [ + "#### Select all orders made in the month of August." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Xne88xCyAP3e" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Aa-icH1tAP3e" + }, + "source": [ + "#### Find the number of orders made by each country in the month of August.\n", + "**Hint**: Use value_counts()." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ybc4ol2bAP3f" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VDwnEoXWAP3f" + }, + "source": [ + "#### What's the average amount of money spent by country?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "z3w2emA_AP3f" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iHsM5_T-AP3f" + }, + "source": [ + "#### What's the most expensive item?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hNe96MkzAP3f" + }, + "outputs": [], + "source": [ + "# your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZFDoQcH9AP3g" + }, + "source": [ + "#### What is the average amount spent per year?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dDLB11vmAP3g" + }, + "outputs": [], + "source": [ + "# your code here" + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.2" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file