diff --git a/1.-Data Cleaning.ipynb b/1.-Data Cleaning.ipynb
index 1a927b9..3d25f73 100644
--- a/1.-Data Cleaning.ipynb
+++ b/1.-Data Cleaning.ipynb
@@ -71,7 +71,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -90,11 +90,15 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
- "#your code here"
+ "path = 'C:/Users/Zaca/Documents/Datasets/diamonds.csv'\n",
+ "diamonds = pd.read_csv(path)\n",
+ "\n",
+ "# dropping the index column\n",
+ "diamonds.drop('Unnamed: 0', axis= 1, inplace=True)"
]
},
{
@@ -109,11 +113,129 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code here"
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " cut | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.23 | \n",
+ " Ideal | \n",
+ " E | \n",
+ " SI2 | \n",
+ " 61.5 | \n",
+ " 55.0 | \n",
+ " 326 | \n",
+ " 3.95 | \n",
+ " 3.98 | \n",
+ " 2.43 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0.21 | \n",
+ " Premium | \n",
+ " E | \n",
+ " SI1 | \n",
+ " 59.8 | \n",
+ " 61.0 | \n",
+ " 326 | \n",
+ " 3.89 | \n",
+ " 3.84 | \n",
+ " 2.31 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0.23 | \n",
+ " Good | \n",
+ " E | \n",
+ " VS1 | \n",
+ " 56.9 | \n",
+ " 65.0 | \n",
+ " 327 | \n",
+ " 4.05 | \n",
+ " 4.07 | \n",
+ " 2.31 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 0.29 | \n",
+ " Premium | \n",
+ " I | \n",
+ " VS2 | \n",
+ " 62.4 | \n",
+ " 58.0 | \n",
+ " 334 | \n",
+ " 4.20 | \n",
+ " 4.23 | \n",
+ " 2.63 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0.31 | \n",
+ " Good | \n",
+ " J | \n",
+ " SI2 | \n",
+ " 63.3 | \n",
+ " 58.0 | \n",
+ " 335 | \n",
+ " 4.34 | \n",
+ " 4.35 | \n",
+ " 2.75 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " carat cut color clarity depth table price x y z\n",
+ "0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43\n",
+ "1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31\n",
+ "2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31\n",
+ "3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63\n",
+ "4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# exploring data\n",
+ "diamonds.head()"
]
},
{
@@ -127,11 +249,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
- "#your code here"
+ "# The numbers we see in initial exploration fall within the ranges defined in the codebook. \n",
+ "# There are a few labels i don't really understand because I don't know much about diamonds."
]
},
{
@@ -145,11 +268,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(53940, 10)"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "#your code here"
+ "diamonds.shape"
]
},
{
@@ -163,11 +297,218 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code here"
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " cut | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.23 | \n",
+ " Ideal | \n",
+ " E | \n",
+ " 1 | \n",
+ " 61.5 | \n",
+ " 55.0 | \n",
+ " 326 | \n",
+ " 3.95 | \n",
+ " 3.98 | \n",
+ " 2.43 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0.21 | \n",
+ " Premium | \n",
+ " E | \n",
+ " 2 | \n",
+ " 59.8 | \n",
+ " 61.0 | \n",
+ " 326 | \n",
+ " 3.89 | \n",
+ " 3.84 | \n",
+ " 2.31 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0.23 | \n",
+ " Good | \n",
+ " E | \n",
+ " 4 | \n",
+ " 56.9 | \n",
+ " 65.0 | \n",
+ " 327 | \n",
+ " 4.05 | \n",
+ " 4.07 | \n",
+ " 2.31 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 0.29 | \n",
+ " Premium | \n",
+ " I | \n",
+ " 3 | \n",
+ " 62.4 | \n",
+ " 58.0 | \n",
+ " 334 | \n",
+ " 4.20 | \n",
+ " 4.23 | \n",
+ " 2.63 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0.31 | \n",
+ " Good | \n",
+ " J | \n",
+ " 1 | \n",
+ " 63.3 | \n",
+ " 58.0 | \n",
+ " 335 | \n",
+ " 4.34 | \n",
+ " 4.35 | \n",
+ " 2.75 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 53935 | \n",
+ " 0.72 | \n",
+ " Ideal | \n",
+ " D | \n",
+ " 2 | \n",
+ " 60.8 | \n",
+ " 57.0 | \n",
+ " 2757 | \n",
+ " 5.75 | \n",
+ " 5.76 | \n",
+ " 3.50 | \n",
+ "
\n",
+ " \n",
+ " | 53936 | \n",
+ " 0.72 | \n",
+ " Good | \n",
+ " D | \n",
+ " 2 | \n",
+ " 63.1 | \n",
+ " 55.0 | \n",
+ " 2757 | \n",
+ " 5.69 | \n",
+ " 5.75 | \n",
+ " 3.61 | \n",
+ "
\n",
+ " \n",
+ " | 53937 | \n",
+ " 0.70 | \n",
+ " Very Good | \n",
+ " D | \n",
+ " 2 | \n",
+ " 62.8 | \n",
+ " 60.0 | \n",
+ " 2757 | \n",
+ " 5.66 | \n",
+ " 5.68 | \n",
+ " 3.56 | \n",
+ "
\n",
+ " \n",
+ " | 53938 | \n",
+ " 0.86 | \n",
+ " Premium | \n",
+ " H | \n",
+ " 1 | \n",
+ " 61.0 | \n",
+ " 58.0 | \n",
+ " 2757 | \n",
+ " 6.15 | \n",
+ " 6.12 | \n",
+ " 3.74 | \n",
+ "
\n",
+ " \n",
+ " | 53939 | \n",
+ " 0.75 | \n",
+ " Ideal | \n",
+ " D | \n",
+ " 1 | \n",
+ " 62.2 | \n",
+ " 55.0 | \n",
+ " 2757 | \n",
+ " 5.83 | \n",
+ " 5.87 | \n",
+ " 3.64 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
53940 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " carat cut color clarity depth table price x y z\n",
+ "0 0.23 Ideal E 1 61.5 55.0 326 3.95 3.98 2.43\n",
+ "1 0.21 Premium E 2 59.8 61.0 326 3.89 3.84 2.31\n",
+ "2 0.23 Good E 4 56.9 65.0 327 4.05 4.07 2.31\n",
+ "3 0.29 Premium I 3 62.4 58.0 334 4.20 4.23 2.63\n",
+ "4 0.31 Good J 1 63.3 58.0 335 4.34 4.35 2.75\n",
+ "... ... ... ... ... ... ... ... ... ... ...\n",
+ "53935 0.72 Ideal D 2 60.8 57.0 2757 5.75 5.76 3.50\n",
+ "53936 0.72 Good D 2 63.1 55.0 2757 5.69 5.75 3.61\n",
+ "53937 0.70 Very Good D 2 62.8 60.0 2757 5.66 5.68 3.56\n",
+ "53938 0.86 Premium H 1 61.0 58.0 2757 6.15 6.12 3.74\n",
+ "53939 0.75 Ideal D 1 62.2 55.0 2757 5.83 5.87 3.64\n",
+ "\n",
+ "[53940 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# using a dictionary to replace strings by values\n",
+ "clarity_dict = {'I1': 0, 'SI2': 1, 'SI1': 2, 'VS2': 3, 'VS1': 4, 'VVS2': 5, 'VVS1': 6, 'IF': 7}\n",
+ "diamonds['clarity'] = diamonds['clarity'].map(clarity_dict)\n",
+ "diamonds"
]
},
{
@@ -181,11 +522,218 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code here"
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " cut | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.23 | \n",
+ " Ideal | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 61.5 | \n",
+ " 55.0 | \n",
+ " 326 | \n",
+ " 3.95 | \n",
+ " 3.98 | \n",
+ " 2.43 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0.21 | \n",
+ " Premium | \n",
+ " 5 | \n",
+ " 2 | \n",
+ " 59.8 | \n",
+ " 61.0 | \n",
+ " 326 | \n",
+ " 3.89 | \n",
+ " 3.84 | \n",
+ " 2.31 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0.23 | \n",
+ " Good | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 56.9 | \n",
+ " 65.0 | \n",
+ " 327 | \n",
+ " 4.05 | \n",
+ " 4.07 | \n",
+ " 2.31 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 0.29 | \n",
+ " Premium | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 62.4 | \n",
+ " 58.0 | \n",
+ " 334 | \n",
+ " 4.20 | \n",
+ " 4.23 | \n",
+ " 2.63 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0.31 | \n",
+ " Good | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 63.3 | \n",
+ " 58.0 | \n",
+ " 335 | \n",
+ " 4.34 | \n",
+ " 4.35 | \n",
+ " 2.75 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 53935 | \n",
+ " 0.72 | \n",
+ " Ideal | \n",
+ " 6 | \n",
+ " 2 | \n",
+ " 60.8 | \n",
+ " 57.0 | \n",
+ " 2757 | \n",
+ " 5.75 | \n",
+ " 5.76 | \n",
+ " 3.50 | \n",
+ "
\n",
+ " \n",
+ " | 53936 | \n",
+ " 0.72 | \n",
+ " Good | \n",
+ " 6 | \n",
+ " 2 | \n",
+ " 63.1 | \n",
+ " 55.0 | \n",
+ " 2757 | \n",
+ " 5.69 | \n",
+ " 5.75 | \n",
+ " 3.61 | \n",
+ "
\n",
+ " \n",
+ " | 53937 | \n",
+ " 0.70 | \n",
+ " Very Good | \n",
+ " 6 | \n",
+ " 2 | \n",
+ " 62.8 | \n",
+ " 60.0 | \n",
+ " 2757 | \n",
+ " 5.66 | \n",
+ " 5.68 | \n",
+ " 3.56 | \n",
+ "
\n",
+ " \n",
+ " | 53938 | \n",
+ " 0.86 | \n",
+ " Premium | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 61.0 | \n",
+ " 58.0 | \n",
+ " 2757 | \n",
+ " 6.15 | \n",
+ " 6.12 | \n",
+ " 3.74 | \n",
+ "
\n",
+ " \n",
+ " | 53939 | \n",
+ " 0.75 | \n",
+ " Ideal | \n",
+ " 6 | \n",
+ " 1 | \n",
+ " 62.2 | \n",
+ " 55.0 | \n",
+ " 2757 | \n",
+ " 5.83 | \n",
+ " 5.87 | \n",
+ " 3.64 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
53940 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " carat cut color clarity depth table price x y z\n",
+ "0 0.23 Ideal 5 1 61.5 55.0 326 3.95 3.98 2.43\n",
+ "1 0.21 Premium 5 2 59.8 61.0 326 3.89 3.84 2.31\n",
+ "2 0.23 Good 5 4 56.9 65.0 327 4.05 4.07 2.31\n",
+ "3 0.29 Premium 1 3 62.4 58.0 334 4.20 4.23 2.63\n",
+ "4 0.31 Good 0 1 63.3 58.0 335 4.34 4.35 2.75\n",
+ "... ... ... ... ... ... ... ... ... ... ...\n",
+ "53935 0.72 Ideal 6 2 60.8 57.0 2757 5.75 5.76 3.50\n",
+ "53936 0.72 Good 6 2 63.1 55.0 2757 5.69 5.75 3.61\n",
+ "53937 0.70 Very Good 6 2 62.8 60.0 2757 5.66 5.68 3.56\n",
+ "53938 0.86 Premium 2 1 61.0 58.0 2757 6.15 6.12 3.74\n",
+ "53939 0.75 Ideal 6 1 62.2 55.0 2757 5.83 5.87 3.64\n",
+ "\n",
+ "[53940 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# repeating the process above but for the color column\n",
+ "color_dict = {'D': 6, 'E': 5, 'F': 4, 'G': 3, 'H': 2, 'I': 1, 'J': 0}\n",
+ "diamonds['color'] = diamonds['color'].map(color_dict)\n",
+ "diamonds"
]
},
{
@@ -199,11 +747,40 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 53940 entries, 0 to 53939\n",
+ "Data columns (total 10 columns):\n",
+ "carat 53940 non-null float64\n",
+ "cut 53940 non-null object\n",
+ "color 53940 non-null int64\n",
+ "clarity 53940 non-null int64\n",
+ "depth 53940 non-null float64\n",
+ "table 53940 non-null float64\n",
+ "price 53940 non-null int64\n",
+ "x 53940 non-null float64\n",
+ "y 53940 non-null float64\n",
+ "z 53940 non-null float64\n",
+ "dtypes: float64(6), int64(3), object(1)\n",
+ "memory usage: 4.1+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "diamonds.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [],
"source": [
- "#your code here"
+ "**With the info method we confirm that there are no null values, and we can see the same number of rows and columns**"
]
},
{
@@ -221,11 +798,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code here"
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "carat float64\n",
+ "cut object\n",
+ "color int64\n",
+ "clarity int64\n",
+ "depth float64\n",
+ "table float64\n",
+ "price int64\n",
+ "x float64\n",
+ "y float64\n",
+ "z float64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "diamonds.dtypes\n",
+ "# So far everything appears to be ok. Essentially everthing is a float (which should be), \n",
+ "# except price, color and clarity (which we changed into ints earlier) and the cut which are string values."
]
},
{
@@ -241,11 +841,173 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code and comments here"
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 53940.000000 | \n",
+ " 53940.000000 | \n",
+ " 53940.000000 | \n",
+ " 53940.000000 | \n",
+ " 53940.000000 | \n",
+ " 53940.000000 | \n",
+ " 53940.000000 | \n",
+ " 53940.000000 | \n",
+ " 53940.000000 | \n",
+ "
\n",
+ " \n",
+ " | mean | \n",
+ " 0.797940 | \n",
+ " 3.405803 | \n",
+ " 3.051020 | \n",
+ " 61.749405 | \n",
+ " 57.457184 | \n",
+ " 3932.799722 | \n",
+ " 5.731157 | \n",
+ " 5.734526 | \n",
+ " 3.538734 | \n",
+ "
\n",
+ " \n",
+ " | std | \n",
+ " 0.474011 | \n",
+ " 1.701105 | \n",
+ " 1.647136 | \n",
+ " 1.432621 | \n",
+ " 2.234491 | \n",
+ " 3989.439738 | \n",
+ " 1.121761 | \n",
+ " 1.142135 | \n",
+ " 0.705699 | \n",
+ "
\n",
+ " \n",
+ " | min | \n",
+ " 0.200000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 43.000000 | \n",
+ " 43.000000 | \n",
+ " 326.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " | 25% | \n",
+ " 0.400000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 61.000000 | \n",
+ " 56.000000 | \n",
+ " 950.000000 | \n",
+ " 4.710000 | \n",
+ " 4.720000 | \n",
+ " 2.910000 | \n",
+ "
\n",
+ " \n",
+ " | 50% | \n",
+ " 0.700000 | \n",
+ " 3.000000 | \n",
+ " 3.000000 | \n",
+ " 61.800000 | \n",
+ " 57.000000 | \n",
+ " 2401.000000 | \n",
+ " 5.700000 | \n",
+ " 5.710000 | \n",
+ " 3.530000 | \n",
+ "
\n",
+ " \n",
+ " | 75% | \n",
+ " 1.040000 | \n",
+ " 5.000000 | \n",
+ " 4.000000 | \n",
+ " 62.500000 | \n",
+ " 59.000000 | \n",
+ " 5324.250000 | \n",
+ " 6.540000 | \n",
+ " 6.540000 | \n",
+ " 4.040000 | \n",
+ "
\n",
+ " \n",
+ " | max | \n",
+ " 5.010000 | \n",
+ " 6.000000 | \n",
+ " 7.000000 | \n",
+ " 79.000000 | \n",
+ " 95.000000 | \n",
+ " 18823.000000 | \n",
+ " 10.740000 | \n",
+ " 58.900000 | \n",
+ " 31.800000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " carat color clarity depth table \\\n",
+ "count 53940.000000 53940.000000 53940.000000 53940.000000 53940.000000 \n",
+ "mean 0.797940 3.405803 3.051020 61.749405 57.457184 \n",
+ "std 0.474011 1.701105 1.647136 1.432621 2.234491 \n",
+ "min 0.200000 0.000000 0.000000 43.000000 43.000000 \n",
+ "25% 0.400000 2.000000 2.000000 61.000000 56.000000 \n",
+ "50% 0.700000 3.000000 3.000000 61.800000 57.000000 \n",
+ "75% 1.040000 5.000000 4.000000 62.500000 59.000000 \n",
+ "max 5.010000 6.000000 7.000000 79.000000 95.000000 \n",
+ "\n",
+ " price x y z \n",
+ "count 53940.000000 53940.000000 53940.000000 53940.000000 \n",
+ "mean 3932.799722 5.731157 5.734526 3.538734 \n",
+ "std 3989.439738 1.121761 1.142135 0.705699 \n",
+ "min 326.000000 0.000000 0.000000 0.000000 \n",
+ "25% 950.000000 4.710000 4.720000 2.910000 \n",
+ "50% 2401.000000 5.700000 5.710000 3.530000 \n",
+ "75% 5324.250000 6.540000 6.540000 4.040000 \n",
+ "max 18823.000000 10.740000 58.900000 31.800000 "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "diamonds.describe()\n",
+ "\n",
+ "# the describe methods shows us the summary statistics for every numeric column in our dataset."
]
},
{
@@ -261,11 +1023,339 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code here"
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " cut | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2207 | \n",
+ " 1.00 | \n",
+ " Premium | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 59.1 | \n",
+ " 59.0 | \n",
+ " 3142 | \n",
+ " 6.55 | \n",
+ " 6.48 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2314 | \n",
+ " 1.01 | \n",
+ " Premium | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 58.1 | \n",
+ " 59.0 | \n",
+ " 3167 | \n",
+ " 6.66 | \n",
+ " 6.60 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 4791 | \n",
+ " 1.10 | \n",
+ " Premium | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 63.0 | \n",
+ " 59.0 | \n",
+ " 3696 | \n",
+ " 6.50 | \n",
+ " 6.47 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 5471 | \n",
+ " 1.01 | \n",
+ " Premium | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 59.2 | \n",
+ " 58.0 | \n",
+ " 3837 | \n",
+ " 6.50 | \n",
+ " 6.47 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 10167 | \n",
+ " 1.50 | \n",
+ " Good | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 64.0 | \n",
+ " 61.0 | \n",
+ " 4731 | \n",
+ " 7.15 | \n",
+ " 7.04 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 11182 | \n",
+ " 1.07 | \n",
+ " Ideal | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 61.6 | \n",
+ " 56.0 | \n",
+ " 4954 | \n",
+ " 0.00 | \n",
+ " 6.62 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 11963 | \n",
+ " 1.00 | \n",
+ " Very Good | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 63.3 | \n",
+ " 53.0 | \n",
+ " 5139 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 13601 | \n",
+ " 1.15 | \n",
+ " Ideal | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 59.2 | \n",
+ " 56.0 | \n",
+ " 5564 | \n",
+ " 6.88 | \n",
+ " 6.83 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 15951 | \n",
+ " 1.14 | \n",
+ " Fair | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 57.5 | \n",
+ " 67.0 | \n",
+ " 6381 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 24394 | \n",
+ " 2.18 | \n",
+ " Premium | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 59.4 | \n",
+ " 61.0 | \n",
+ " 12631 | \n",
+ " 8.49 | \n",
+ " 8.45 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 24520 | \n",
+ " 1.56 | \n",
+ " Ideal | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 62.2 | \n",
+ " 54.0 | \n",
+ " 12800 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 26123 | \n",
+ " 2.25 | \n",
+ " Premium | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 61.3 | \n",
+ " 58.0 | \n",
+ " 15397 | \n",
+ " 8.52 | \n",
+ " 8.42 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 26243 | \n",
+ " 1.20 | \n",
+ " Premium | \n",
+ " 6 | \n",
+ " 6 | \n",
+ " 62.1 | \n",
+ " 59.0 | \n",
+ " 15686 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 27112 | \n",
+ " 2.20 | \n",
+ " Premium | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 61.2 | \n",
+ " 59.0 | \n",
+ " 17265 | \n",
+ " 8.42 | \n",
+ " 8.37 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 27429 | \n",
+ " 2.25 | \n",
+ " Premium | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 62.8 | \n",
+ " 59.0 | \n",
+ " 18034 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 27503 | \n",
+ " 2.02 | \n",
+ " Premium | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 62.7 | \n",
+ " 53.0 | \n",
+ " 18207 | \n",
+ " 8.02 | \n",
+ " 7.95 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 27739 | \n",
+ " 2.80 | \n",
+ " Good | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 63.8 | \n",
+ " 58.0 | \n",
+ " 18788 | \n",
+ " 8.90 | \n",
+ " 8.85 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 49556 | \n",
+ " 0.71 | \n",
+ " Good | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 64.1 | \n",
+ " 60.0 | \n",
+ " 2130 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 49557 | \n",
+ " 0.71 | \n",
+ " Good | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 64.1 | \n",
+ " 60.0 | \n",
+ " 2130 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 51506 | \n",
+ " 1.12 | \n",
+ " Premium | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 60.4 | \n",
+ " 59.0 | \n",
+ " 2383 | \n",
+ " 6.71 | \n",
+ " 6.67 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " carat cut color clarity depth table price x y z\n",
+ "2207 1.00 Premium 3 1 59.1 59.0 3142 6.55 6.48 0.0\n",
+ "2314 1.01 Premium 2 0 58.1 59.0 3167 6.66 6.60 0.0\n",
+ "4791 1.10 Premium 3 1 63.0 59.0 3696 6.50 6.47 0.0\n",
+ "5471 1.01 Premium 4 1 59.2 58.0 3837 6.50 6.47 0.0\n",
+ "10167 1.50 Good 3 0 64.0 61.0 4731 7.15 7.04 0.0\n",
+ "11182 1.07 Ideal 4 1 61.6 56.0 4954 0.00 6.62 0.0\n",
+ "11963 1.00 Very Good 2 3 63.3 53.0 5139 0.00 0.00 0.0\n",
+ "13601 1.15 Ideal 3 3 59.2 56.0 5564 6.88 6.83 0.0\n",
+ "15951 1.14 Fair 3 4 57.5 67.0 6381 0.00 0.00 0.0\n",
+ "24394 2.18 Premium 2 1 59.4 61.0 12631 8.49 8.45 0.0\n",
+ "24520 1.56 Ideal 3 3 62.2 54.0 12800 0.00 0.00 0.0\n",
+ "26123 2.25 Premium 1 2 61.3 58.0 15397 8.52 8.42 0.0\n",
+ "26243 1.20 Premium 6 6 62.1 59.0 15686 0.00 0.00 0.0\n",
+ "27112 2.20 Premium 2 2 61.2 59.0 17265 8.42 8.37 0.0\n",
+ "27429 2.25 Premium 2 1 62.8 59.0 18034 0.00 0.00 0.0\n",
+ "27503 2.02 Premium 2 3 62.7 53.0 18207 8.02 7.95 0.0\n",
+ "27739 2.80 Good 3 1 63.8 58.0 18788 8.90 8.85 0.0\n",
+ "49556 0.71 Good 4 1 64.1 60.0 2130 0.00 0.00 0.0\n",
+ "49557 0.71 Good 4 1 64.1 60.0 2130 0.00 0.00 0.0\n",
+ "51506 1.12 Premium 3 0 60.4 59.0 2383 6.71 6.67 0.0"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# checking the selection where any of x, y or z are zero.\n",
+ "diamonds[(diamonds['x'] == 0) | (diamonds['y'] == 0) | (diamonds['z'] == 0)]"
]
},
{
@@ -284,11 +1374,18 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
- "#your code here"
+ "def calculate_z(x, y, d):\n",
+ " \"\"\"\n",
+ " Calculates depth of diamond.\n",
+ " Input: height, width and depth as percentage.\n",
+ " Output: diamonds depth.\n",
+ " \"\"\"\n",
+ " d = d/100\n",
+ " return (d*x + d*y)/2"
]
},
{
@@ -300,11 +1397,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
- "#your code here"
+ "# using apply only on the rows which have a value zero for z\n",
+ "diamonds.loc[(diamonds['z'] == 0) & (diamonds['x'] != 0) & (diamonds['y'] != 0), 'z'] = diamonds[(diamonds['z'] == 0) & (diamonds['x'] != 0) & (diamonds['y'] != 0)].apply(lambda row: calculate_z(row.x, row.y, row.depth), axis=1)"
]
},
{
@@ -320,11 +1418,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
- "#your code here"
+ "# replacing the zeros we cannot calculate by NaN\n",
+ "diamonds.x.replace(0, np.nan, inplace= True)\n",
+ "diamonds.y.replace(0, np.nan, inplace= True)\n",
+ "diamonds.z.replace(0, np.nan, inplace= True)"
]
},
{
@@ -336,11 +1437,171 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code here"
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 53940.000000 | \n",
+ " 53940.000000 | \n",
+ " 53940.000000 | \n",
+ " 53940.000000 | \n",
+ " 53940.000000 | \n",
+ " 53940.000000 | \n",
+ " 53932.000000 | \n",
+ " 53933.000000 | \n",
+ " 53932.000000 | \n",
+ "
\n",
+ " \n",
+ " | mean | \n",
+ " 0.797940 | \n",
+ " 3.405803 | \n",
+ " 3.051020 | \n",
+ " 61.749405 | \n",
+ " 57.457184 | \n",
+ " 3932.799722 | \n",
+ " 5.732007 | \n",
+ " 5.735270 | \n",
+ " 3.540265 | \n",
+ "
\n",
+ " \n",
+ " | std | \n",
+ " 0.474011 | \n",
+ " 1.701105 | \n",
+ " 1.647136 | \n",
+ " 1.432621 | \n",
+ " 2.234491 | \n",
+ " 3989.439738 | \n",
+ " 1.119670 | \n",
+ " 1.140339 | \n",
+ " 0.702667 | \n",
+ "
\n",
+ " \n",
+ " | min | \n",
+ " 0.200000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 43.000000 | \n",
+ " 43.000000 | \n",
+ " 326.000000 | \n",
+ " 3.730000 | \n",
+ " 3.680000 | \n",
+ " 1.070000 | \n",
+ "
\n",
+ " \n",
+ " | 25% | \n",
+ " 0.400000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 61.000000 | \n",
+ " 56.000000 | \n",
+ " 950.000000 | \n",
+ " 4.710000 | \n",
+ " 4.720000 | \n",
+ " 2.910000 | \n",
+ "
\n",
+ " \n",
+ " | 50% | \n",
+ " 0.700000 | \n",
+ " 3.000000 | \n",
+ " 3.000000 | \n",
+ " 61.800000 | \n",
+ " 57.000000 | \n",
+ " 2401.000000 | \n",
+ " 5.700000 | \n",
+ " 5.710000 | \n",
+ " 3.530000 | \n",
+ "
\n",
+ " \n",
+ " | 75% | \n",
+ " 1.040000 | \n",
+ " 5.000000 | \n",
+ " 4.000000 | \n",
+ " 62.500000 | \n",
+ " 59.000000 | \n",
+ " 5324.250000 | \n",
+ " 6.540000 | \n",
+ " 6.540000 | \n",
+ " 4.040000 | \n",
+ "
\n",
+ " \n",
+ " | max | \n",
+ " 5.010000 | \n",
+ " 6.000000 | \n",
+ " 7.000000 | \n",
+ " 79.000000 | \n",
+ " 95.000000 | \n",
+ " 18823.000000 | \n",
+ " 10.740000 | \n",
+ " 58.900000 | \n",
+ " 31.800000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " carat color clarity depth table \\\n",
+ "count 53940.000000 53940.000000 53940.000000 53940.000000 53940.000000 \n",
+ "mean 0.797940 3.405803 3.051020 61.749405 57.457184 \n",
+ "std 0.474011 1.701105 1.647136 1.432621 2.234491 \n",
+ "min 0.200000 0.000000 0.000000 43.000000 43.000000 \n",
+ "25% 0.400000 2.000000 2.000000 61.000000 56.000000 \n",
+ "50% 0.700000 3.000000 3.000000 61.800000 57.000000 \n",
+ "75% 1.040000 5.000000 4.000000 62.500000 59.000000 \n",
+ "max 5.010000 6.000000 7.000000 79.000000 95.000000 \n",
+ "\n",
+ " price x y z \n",
+ "count 53940.000000 53932.000000 53933.000000 53932.000000 \n",
+ "mean 3932.799722 5.732007 5.735270 3.540265 \n",
+ "std 3989.439738 1.119670 1.140339 0.702667 \n",
+ "min 326.000000 3.730000 3.680000 1.070000 \n",
+ "25% 950.000000 4.710000 4.720000 2.910000 \n",
+ "50% 2401.000000 5.700000 5.710000 3.530000 \n",
+ "75% 5324.250000 6.540000 6.540000 4.040000 \n",
+ "max 18823.000000 10.740000 58.900000 31.800000 "
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "diamonds.describe()"
]
},
{
@@ -361,11 +1622,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "x (%): 0.00014831294030404152\n",
+ "y (%): 0.00012977382276603634\n",
+ "z (%): 0.00014831294030404152\n"
+ ]
+ }
+ ],
"source": [
- "#your code here"
+ "for col in diamonds.columns:\n",
+ " if diamonds[col].isnull().sum():\n",
+ " print(col + ' (%): ' + str(sum(diamonds[col].isnull())/len(diamonds)))\n",
+ " \n",
+ "# these values are such a small percentage of our dataset that we can just drop them\n",
+ "diamonds.dropna(inplace= True)"
]
},
{
@@ -380,20 +1656,183 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code here"
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 53932.000000 | \n",
+ " 53932.000000 | \n",
+ " 53932.000000 | \n",
+ " 53932.000000 | \n",
+ " 53932.000000 | \n",
+ " 53932.000000 | \n",
+ " 53932.000000 | \n",
+ " 53932.000000 | \n",
+ " 53932.000000 | \n",
+ "
\n",
+ " \n",
+ " | mean | \n",
+ " 0.797879 | \n",
+ " 3.405789 | \n",
+ " 3.051101 | \n",
+ " 61.749336 | \n",
+ " 57.457029 | \n",
+ " 3932.136079 | \n",
+ " 5.732007 | \n",
+ " 5.735254 | \n",
+ " 3.540265 | \n",
+ "
\n",
+ " \n",
+ " | std | \n",
+ " 0.473986 | \n",
+ " 1.701165 | \n",
+ " 1.647109 | \n",
+ " 1.432514 | \n",
+ " 2.234064 | \n",
+ " 3988.734835 | \n",
+ " 1.119670 | \n",
+ " 1.140343 | \n",
+ " 0.702667 | \n",
+ "
\n",
+ " \n",
+ " | min | \n",
+ " 0.200000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 43.000000 | \n",
+ " 43.000000 | \n",
+ " 326.000000 | \n",
+ " 3.730000 | \n",
+ " 3.680000 | \n",
+ " 1.070000 | \n",
+ "
\n",
+ " \n",
+ " | 25% | \n",
+ " 0.400000 | \n",
+ " 2.000000 | \n",
+ " 2.000000 | \n",
+ " 61.000000 | \n",
+ " 56.000000 | \n",
+ " 949.750000 | \n",
+ " 4.710000 | \n",
+ " 4.720000 | \n",
+ " 2.910000 | \n",
+ "
\n",
+ " \n",
+ " | 50% | \n",
+ " 0.700000 | \n",
+ " 3.000000 | \n",
+ " 3.000000 | \n",
+ " 61.800000 | \n",
+ " 57.000000 | \n",
+ " 2401.000000 | \n",
+ " 5.700000 | \n",
+ " 5.710000 | \n",
+ " 3.530000 | \n",
+ "
\n",
+ " \n",
+ " | 75% | \n",
+ " 1.040000 | \n",
+ " 5.000000 | \n",
+ " 4.000000 | \n",
+ " 62.500000 | \n",
+ " 59.000000 | \n",
+ " 5324.000000 | \n",
+ " 6.540000 | \n",
+ " 6.540000 | \n",
+ " 4.040000 | \n",
+ "
\n",
+ " \n",
+ " | max | \n",
+ " 5.010000 | \n",
+ " 6.000000 | \n",
+ " 7.000000 | \n",
+ " 79.000000 | \n",
+ " 95.000000 | \n",
+ " 18823.000000 | \n",
+ " 10.740000 | \n",
+ " 58.900000 | \n",
+ " 31.800000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " carat color clarity depth table \\\n",
+ "count 53932.000000 53932.000000 53932.000000 53932.000000 53932.000000 \n",
+ "mean 0.797879 3.405789 3.051101 61.749336 57.457029 \n",
+ "std 0.473986 1.701165 1.647109 1.432514 2.234064 \n",
+ "min 0.200000 0.000000 0.000000 43.000000 43.000000 \n",
+ "25% 0.400000 2.000000 2.000000 61.000000 56.000000 \n",
+ "50% 0.700000 3.000000 3.000000 61.800000 57.000000 \n",
+ "75% 1.040000 5.000000 4.000000 62.500000 59.000000 \n",
+ "max 5.010000 6.000000 7.000000 79.000000 95.000000 \n",
+ "\n",
+ " price x y z \n",
+ "count 53932.000000 53932.000000 53932.000000 53932.000000 \n",
+ "mean 3932.136079 5.732007 5.735254 3.540265 \n",
+ "std 3988.734835 1.119670 1.140343 0.702667 \n",
+ "min 326.000000 3.730000 3.680000 1.070000 \n",
+ "25% 949.750000 4.710000 4.720000 2.910000 \n",
+ "50% 2401.000000 5.700000 5.710000 3.530000 \n",
+ "75% 5324.000000 6.540000 6.540000 4.040000 \n",
+ "max 18823.000000 10.740000 58.900000 31.800000 "
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "diamonds.describe()"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
- "#your comments here"
+ "# The values haven't changed much since the last .describe() but now we don't any any min = 0\n",
+ "# for measurements that shouldn't be zero! \n",
+ "# There also seem to be some pretty high max values in the price, y and z columns. Much much higher than the mean,\n",
+ "# this could mean they are outliers."
]
},
{
@@ -407,11 +1846,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
- "#your code here"
+ "# taking the top 25% of our data based on price.\n",
+ "outliers = diamonds[diamonds['price'] > diamonds['price'].quantile(.75)]\n",
+ "\n",
+ "# This selected 13485 data points."
]
},
{
@@ -423,11 +1865,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#Run this code\n",
+ "execution_count": 20,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAl4AAAEhCAYAAACuva4PAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAXSklEQVR4nO3df4xdZ33n8ffXM2FxoGzi8cRrOUmdEmsLEo1TjaKsAlXi2O00W0hawaoRwlMR4Ur8cKCoJa1K0kW7FaxaUowqdl2S5VqiQBpAMcgMGXsDiH+yjMGNQ02xGxnixNjDJSlJ84PMzHf/mBPWMZ5kxp7znHOv3y9pdO859xzfTyLr+HOec+Y8kZlIkiSpfsuaDiBJknS2sHhJkiQVYvGSJEkqxOIlSZJUiMVLkiSpEIuXJElSIYNNB1iIlStX5tq1a5uOIZ119u7d++PMHG46h06Px06pGS927OyJ4rV27VomJyebjiGddSLiB01n0Onz2Ck148WOnV5qlCRJKsTiJUmSVEitxSsizouIuyPiexFxICL+U0SsiIiJiDhYvZ5fZwZJkqS2qHvE62PAeGb+KnAZcAC4BdiTmeuAPdWyJElS36uteEXEq4DfAO4AyMyfZebjwPVAp9qsA9xQVwZJkqQ2qXPE61eAKeB/R8R3IuKTEfEKYFVmHgWoXi841c4RsSUiJiNicmpqqsaYkiRJZdRZvAaBXwc+kZmXA//GIi4rZub2zBzJzJHhYR8j1Iu63S5bt26l2+02HUWSeobHzv5WZ/E6AhzJzPur5buZK2LHImI1QPV6vMYMalCn02H//v3s2LGj6SiS1DM8dva32opXZv4IeDgi/mO16lrgn4CdwFi1bgy4p64Mak6322V8fJzMZHx83DM3SVoAj539r+7fanwP8OmIeABYD/wl8GFgU0QcBDZVy+oznU6H2dlZAGZmZjxzk6QF8NjZ/2otXpm5r7pP69cy84bMfCwzu5l5bWauq15/UmcGNWP37t1MT08DMD09zcTERMOJJKn9PHb2P59cr1ps3LiRwcG5qUAHBwfZtGlTw4kkqf08dvY/i5dqMTY2xrJlc3+9BgYG2Lx5c8OJJKn9PHb2P4uXajE0NMTo6CgRwejoKENDQ01HkqTW89jZ/wabDqD+NTY2xuHDhz1jk6RF8NjZ3yxeqs3Q0BDbtm1rOoYk9RSPnf3NS42SJEmFWLwkSZIKsXhJkiQVYvGSJEkqxOIlSZJUiMVLklokIl4eEf83Iv4xIr4bEf+1Wn9JRNwfEQcj4nMR8bKms0paPIuXJLXLs8CGzLwMWA+MRsSVwEeA2zNzHfAYcFODGSWdJouXJLVIznmyWjyn+klgA3B3tb4D3NBAPElnyOIlSS0TEQMRsQ84DkwA/wI8npnT1SZHgDXz7LslIiYjYnJqaqpMYEkLZvGSpJbJzJnMXA9cCFwBvOZUm82z7/bMHMnMkeHh4TpjSjoNFi9JaqnMfBz4GnAlcF5EPD/N24XAo03lknT6LF6S1CIRMRwR51XvlwMbgQPAfcCbq83GgHuaSSjpTDhJtiS1y2qgExEDzJ0c35WZX46IfwI+GxH/DfgOcEeTISWdHouXJLVIZj4AXH6K9Q8xd7+XpB7mpUZJkqRCLF6SJEmFWLwkSZIKsXhJkiQVYvGSJEkqxOIlSZJUiMVLkiSpEIuXJElSIRYvSZKkQixekiRJhVi8JEmSCrF4SZIkFWLxkiRJKsTiJUmSVIjFS5IkqRCLlyRJUiGDdf7hEXEYeAKYAaYzcyQiVgCfA9YCh4H/kpmP1ZlDkiSpDUqMeF2Tmeszc6RavgXYk5nrgD3VsiRJUt9r4lLj9UCnet8BbmgggyRJUnF1F68E7o2IvRGxpVq3KjOPAlSvF9ScQZIkqRVqvccLuCozH42IC4CJiPjeQnesitoWgIsvvriufJIkScXUOuKVmY9Wr8eBLwJXAMciYjVA9Xp8nn23Z+ZIZo4MDw/XGVOSJKmI2opXRLwiIn7p+ffAbwIPAjuBsWqzMeCeujJIkiS1SZ2XGlcBX4yI57/n7zNzPCK+BdwVETcBPwTeUmMGSZKk1qiteGXmQ8Blp1jfBa6t63slSZLayifXS5IkFWLxkiRJKsTiJUmSVIjFS5IkqRCLlyRJUiEWL0mSpEIsXpLUIhFxUUTcFxEHIuK7EXFztf4vIuKRiNhX/VzXdFZJi1f3XI2SpMWZBt6fmd+uZv/YGxET1We3Z+ZfNZhN0hmyeElSi2TmUeBo9f6JiDgArGk2laSl4qVGSWqpiFgLXA7cX616d0Q8EBF3RsT5jQWTdNosXpLUQhHxSuDzwHsz86fAJ4BXA+uZGxH763n22xIRkxExOTU1VSyvpIWxeElSy0TEOcyVrk9n5hcAMvNYZs5k5izwd8AVp9o3M7dn5khmjgwPD5cLLWlBLF6S1CIREcAdwIHM/OgJ61efsNnvAg+WzibpzHlzvSS1y1XA24D9EbGvWvdnwI0RsR5I4DDwh83Ek3QmLF6S1CKZ+U0gTvHRrtJZJC09LzVKkiQVYvGSJEkqxOIlSZJUiMVLkiSpEIuXJElSIRYvSZKkQixekiRJhVi8JEmSCrF4SZIkFWLxkiRJKsTiJUmSVIjFS5IkqRCLlyRJUiEWL0mSpEIsXpIkSYVYvCRJkgqxeEmSJBVi8ZIkSSrE4iVJklSIxUuSJKkQi5ckSVIhtReviBiIiO9ExJer5Usi4v6IOBgRn4uIl9WdQZIkqQ1KjHjdDBw4YfkjwO2ZuQ54DLipQAZJkqTG1Vq8IuJC4D8Dn6yWA9gA3F1t0gFuqDODJElSW9Q94vU3wJ8As9XyEPB4Zk5Xy0eANafaMSK2RMRkRExOTU3VHFOSJKl+tRWviPgd4Hhm7j1x9Sk2zVPtn5nbM3MkM0eGh4dryShJklTSYI1/9lXAmyLiOuDlwKuYGwE7LyIGq1GvC4FHa8wgSZLUGrWNeGXmn2bmhZm5Fvh94P9k5luB+4A3V5uNAffUlUGSJKlNmniO1weAP4qIQ8zd83VHAxkkSZKKq/NS489l5teAr1XvHwKuKPG9kiRJbeKT6yVJkgqxeElSi0TERRFxX0QciIjvRsTN1foVETFRzfoxERHnN51V0uJZvCSpXaaB92fma4ArgXdFxGuBW4A91awfe6plST3G4iVJLZKZRzPz29X7J5ibcm0NcD1zs32As35IPcviJUktFRFrgcuB+4FVmXkU5soZcEFzySSdLouXJLVQRLwS+Dzw3sz86SL2c7o1qcUsXpLUMhFxDnOl69OZ+YVq9bGIWF19vho4fqp9nW5NajeLlyS1SEQEcw+WPpCZHz3ho53MzfYBzvoh9awiD1CVJC3YVcDbgP0Rsa9a92fAh4G7IuIm4IfAWxrKJ+kMWLwkqUUy85tAzPPxtSWzSFp6XmqUJEkqxOIlSZJUiMVLkiSpEIuXJElSIRYvSZKkQixekiRJhVi8JEmSCrF4SZIkFWLxkiRJKsTiJUmSVIjFS5IkqRCLlyRJUiEWL0mSpEIsXpIkSYVYvCRJkgqxeEmSJBVi8ZIkSSrE4qXadLtdtm7dSrfbbTqKJEmtYPFSbTqdDvv372fHjh1NR5EkqRUsXqpFt9tlfHyczGR8fNxRL0mSsHipJp1Oh9nZWQBmZmYc9ZIkCYuXarJ7926mp6cBmJ6eZmJiouFEkiQ17yWLV0S8OyLOLxFG/WPjxo0MDg4CMDg4yKZNmxpOJElS8xYy4vUfgG9FxF0RMRoRUXco9b6xsTGWLZv76zUwMMDmzZsbTiSVFRF7IuK6k9ZtbyqPpHZ4yeKVmX8OrAPuAP4AOBgRfxkRr645m3rY0NAQo6OjRASjo6MMDQ01HUkq7RLgAxFx2wnrRpoKI6kdFnSPV2Ym8KPqZxo4H7g7Iv5HjdnU48bGxnjd617naJfOVo8D1wKrIuJLEfHvmw4kqXmDL7VBRGwFxoAfA58E/jgzn4uIZcBB4E/m2e/lwDeAf1d9z92ZeVtEXAJ8FlgBfBt4W2b+bCn+Y9QuQ0NDbNu2rekYUlMiM6eBd0bEHwDfZO6kVdJZbCEjXiuB38vM38rMf8jM5wAycxb4nRfZ71lgQ2ZeBqwHRiPiSuAjwO2ZuQ54DLjpjP4LJKmd/ufzbzLzU8zdqnFvU2EktcNC7vG6NTN/MM9nB15kv8zMJ6vFc6qfBDYAd1frO8ANi0osST0gM//XSct7M/PtTeWR1A61PscrIgYiYh9wHJgA/gV4vBp+BzgCrJln3y0RMRkRk1NTU3XGlCRJKqLW4pWZM5m5HrgQuAJ4zak2m2ff7Zk5kpkjw8PDdcaUJEkqosiT6zPzceBrwJXAeRHx/E39FwKPlsggSZLUtNqKV0QMR8R51fvlwEbgAHAf8OZqszHgnroySFKviYg7I+J4RDx4wrq/iIhHImJf9XPdi/0ZktqrzhGv1cB9EfEA8C1gIjO/DHwA+KOIOAQMMfdgVknSnE8Bo6dYf3tmrq9+dhXOJGmJvORzvE5XZj4AXH6K9Q8xd7+XJOkkmfmNiFjbdA5J9Shyj5ck6Yy9OyIeqC5FzvsgVn8jXGo3i5cktd8ngFcz9zDqo8Bfz7ehvxEutZvFS5JaLjOPVY/nmQX+Dm/XkHqWxUuSWi4iVp+w+LvAg/NtK6ndaru5XpK0eBHxGeBqYGVEHAFuA66OiPXMPXD6MPCHjQWUdEYsXpLUIpl54ylW+9gdqU94qVGSJKkQi5ckSVIhFi9JkqRCLF6SJEmFWLwkSZIKsXhJkiQVYvGSJEkqxOIlSZJUiMVLkiSpEIuXJElSIRYvSZKkQixekiRJhVi8JEmSCrF4SZIkFWLxkiRJKsTiJUmSVIjFS5IkqRCLlyRJUiEWL0mSpEIsXpIkSYVYvCRJkgqxeEmSJBVi8ZIkSSrE4iVJklSIxUuSJKkQi5ckSVIhFi9JkqRCLF6SJEmFWLwkSZIKsXhJkiQVUlvxioiLIuK+iDgQEd+NiJur9SsiYiIiDlav59eVQZIkqU3qHPGaBt6fma8BrgTeFRGvBW4B9mTmOmBPtSxJAiLizog4HhEPnrDOE1apT9RWvDLzaGZ+u3r/BHAAWANcD3SqzTrADXVlkKQe9Clg9KR1nrBKfaLIPV4RsRa4HLgfWJWZR2GunAEXzLPPloiYjIjJqampEjElqXGZ+Q3gJyet9oRV6hO1F6+IeCXweeC9mfnThe6XmdszcyQzR4aHh+sLKEntt6ATVvCkVWq7WotXRJzDXOn6dGZ+oVp9LCJWV5+vBo7XmUGSziaetErtVudvNQZwB3AgMz96wkc7gbHq/RhwT10ZJKlPeMIq9Yk6R7yuAt4GbIiIfdXPdcCHgU0RcRDYVC1LkubnCavUJwbr+oMz85tAzPPxtXV9ryT1soj4DHA1sDIijgC3MXeCeldE3AT8EHhLcwklnYnaipckafEy88Z5PvKEVeoDThkkSZJUiMVLkiSpEIuXJElSIRYvSZKkQixekiRJhVi8JEmSCrF4SZLUIt1ul61bt9LtdpuOohpYvCRJapFOp8P+/fvZsWNH01FUA4uXJEkt0e12GR8fJzMZHx931KsPWbwkSWqJTqfD7OwsADMzM4569SGLlyRJLbF7926mp6cBmJ6eZmJiouFEWmoWL0mSWmLjxo0MDs5Nozw4OMimTZsaTqSlZvGSJKklxsbGWLZs7p/mgYEBNm/e3HAiLTWLlyRJLTE0NMTo6CgRwejoKENDQ01H0hKzeEmS1CJvetObOPfcc3njG9/YdBTVwOIlSVKL7Ny5k6eeeoovfelLTUdRDSxekiS1hM/x6n8WL0mSWqLT6TAzMwPMPU7C53j1H4uXJEktsXv37p8Xr5mZGZ/j1YcsXpIktcTrX//6Fyy/4Q1vaCiJ6mLxkiSpJSKi6QiqmcVLkqSW+PrXv/6iy+p9Fi9JklrKEbD+Y/GSJKklnnnmmRcsP/300w0lUV0sXpIkSYVYvCRJkgqxeEmSJBUy2HQASZLq9PGPf5xDhw41HeO03XzzzU1HWJBLL72U97znPU3HaD1HvCRJkgpxxEuS1Nd6aRTm6quv/oV1H/vYx8oHUW0c8ZIkqSXe+ta3vmB58+bNDSVRXSxeqk2322Xr1q10u92mo0h9ISIOR8T+iNgXEZNN59HSe8c73vGC5be//e0NJVFdLF6qTafTYf/+/ezYsaPpKFI/uSYz12fmSNNBVI+VK1cCjnb1K4uXatHtdvnKV75CZrJr1y5HvSRpgdasWcNll13maFefqq14RcSdEXE8Ih48Yd2KiJiIiIPV6/l1fb+a1el0eO655wB47rnnHPWSlkYC90bE3ojYcqoNImJLRExGxOTU1FTheJJeSp0jXp8CRk9adwuwJzPXAXuqZfWhe++99wXLX/3qVxtKIvWVqzLz14HfBt4VEb9x8gaZuT0zRzJzZHh4uHxCSS+qtuKVmd8AfnLS6uuBTvW+A9xQ1/erXSKi6QhSz8vMR6vX48AXgSuaTSRpsUrf47UqM48CVK8XFP5+FfLMM8+8YPnpp59uKInUHyLiFRHxS8+/B34TePDF95LUNq19gGp1/8IWgIsvvrjhNJLUuFXAF6vR40Hg7zNzvNlIkhardPE6FhGrM/NoRKwGjs+3YWZuB7YDjIyMZKmAktRGmfkQcFnTOSSdmdLFaycwBny4er2n8Pf3NCd6LcOJXiVJdamteEXEZ4CrgZURcQS4jbnCdVdE3AT8EHhLXd8vSVp6vX4C2Aue///bKyervayJE+3aildm3jjPR9fW9Z39rpdGYa655hoy//8V4ohwolepDxw6dIh9Dx5g5twVTUfpW8t+Nnfs3PvQsYaT9LeBp05+8EIZrb25Xr3tgx/8IB/60Id+vnzrrbc2mEbSUpo5dwVP/+p1TceQzsjy7+1q5HudMki12LBhw8/fRwTXXHNNg2kkSWoHR7xUm4suuoiHH37Y0S6pjzzyyCMMPPWvjY0WSEtl4KkujzwyXfx7LV6qzYoVK1ixYoWjXZIkVSxekqQFW7NmDT96dtB7vNTzln9vF2vWrCr+vd7jJUmSVIjFS5IkqRAvNUqSFmXgqZ94c32Nlj3zUwBmX/6qhpP0t7nneJW/1GjxkiQt2KWXXtp0hL536NATAFz6K+VLwdllVSN/n8/a4uW0F/Vz2otynF9Spfj3rH7PHzOd7aM/nbXFy2kv6ue0F2U0Ne2FJGnxztriBU57of7gvTaS1Dv8rUZJkqRCztoRL6e9UL9oatoLSdLiOeIlSZJUyFk74uW0F+oXTU17IUlaPEe8JEmSCjlrR7zApy/Xzacvl9HU05clSYt31hYvn75cP5++XEozT1+WJC3eWVu8fPpy/Xz6sqQ26LWZSr7//e/z7LPP8s53vpNzzjmn6TgL5gwaC+M9XpIktcjs7Cyzs7McO+asH/3orB3xkiSdHXppFKbb7XLjjTcC8OSTT3LrrbcyNDTUcCotJYtXD+m14fJenSTb4XJJTel0OszOzgIwMzPDjh07eN/73tdwKi0lLzWqNsuXL2f58uVNx5CknrF7926mp+dmopienmZiYqLhRFpqjnj1EEdhJKm/bdy4kV27djE9Pc3g4CCbNm1qOpKWmCNekiS1xNjYGMuWzf3TPDAwwObNmxtOpKVm8ZIkqSWGhoYYHR0lIhgdHfXG+j7kpUZJklpkbGyMw4cPO9rVpxzxkqQeERGjEfHPEXEoIm5pOo/qMTQ0xLZt2xzt6lMWL0nqARExAPwt8NvAa4EbI+K1zaaStFgWL0nqDVcAhzLzocz8GfBZ4PqGM0laJIuXJPWGNcDDJywfqdZJ6iEWL0nqDXGKdfkLG0VsiYjJiJicmpoqEEvSYli8JKk3HAEuOmH5QuDRkzfKzO2ZOZKZI8PDw8XCSVoYi5ck9YZvAesi4pKIeBnw+8DOhjNJWqTI/IWR6taJiCngB03n0GlZCfy46RA6bb+cmQ6btEREXAf8DTAA3JmZ//0ltvfY2bs8dva2eY+dPVG81LsiYjIzR5rOIUm9xGNn//JSoyRJUiEWL0mSpEIsXqrb9qYDSFIP8tjZp7zHS5IkqRBHvCRJkgqxeEmSJBVi8ZIkSSrE4iVJklSIxUuSJKmQ/wcvAo8vg+faGAAAAABJRU5ErkJggg==\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Run this code\n",
"fig, ax = plt.subplots(1,2, figsize=(10, 5))\n",
"sns.boxplot(y=diamonds.y, ax=ax[0])\n",
"sns.boxplot(y=diamonds.z, ax=ax[1])\n",
@@ -443,11 +1900,73 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code here"
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " cut | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 48410 | \n",
+ " 0.51 | \n",
+ " Very Good | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 61.8 | \n",
+ " 54.7 | \n",
+ " 1970 | \n",
+ " 5.12 | \n",
+ " 5.15 | \n",
+ " 31.8 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " carat cut color clarity depth table price x y z\n",
+ "48410 0.51 Very Good 5 4 61.8 54.7 1970 5.12 5.15 31.8"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# filtering for z > 10\n",
+ "diamonds.loc[diamonds['z'] > 10]"
]
},
{
@@ -461,11 +1980,12 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
- "#your code here"
+ "# re-calculate z for the outlier\n",
+ "diamonds.loc[diamonds['z'] > 10, 'z'] = diamonds.loc[diamonds['z'] > 10].apply(lambda row: calculate_z(row.x, row.y, row.depth), axis= 1)"
]
},
{
@@ -477,11 +1997,63 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "diamonds.loc[48410]"
+ "execution_count": 23,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " cut | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [carat, cut, color, clarity, depth, table, price, x, y, z]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "diamonds.loc[diamonds['z'] > 10]\n",
+ "# aaand it's gone."
]
},
{
@@ -495,11 +2067,129 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your code here"
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " cut | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 24067 | \n",
+ " 2.00 | \n",
+ " Premium | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 58.9 | \n",
+ " 57.0 | \n",
+ " 12210 | \n",
+ " 8.09 | \n",
+ " 58.90 | \n",
+ " 8.06 | \n",
+ "
\n",
+ " \n",
+ " | 25998 | \n",
+ " 4.01 | \n",
+ " Premium | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 61.0 | \n",
+ " 61.0 | \n",
+ " 15223 | \n",
+ " 10.14 | \n",
+ " 10.10 | \n",
+ " 6.17 | \n",
+ "
\n",
+ " \n",
+ " | 27415 | \n",
+ " 5.01 | \n",
+ " Fair | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 65.5 | \n",
+ " 59.0 | \n",
+ " 18018 | \n",
+ " 10.74 | \n",
+ " 10.54 | \n",
+ " 6.98 | \n",
+ "
\n",
+ " \n",
+ " | 27630 | \n",
+ " 4.50 | \n",
+ " Fair | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 65.8 | \n",
+ " 58.0 | \n",
+ " 18531 | \n",
+ " 10.23 | \n",
+ " 10.16 | \n",
+ " 6.72 | \n",
+ "
\n",
+ " \n",
+ " | 49189 | \n",
+ " 0.51 | \n",
+ " Ideal | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 61.8 | \n",
+ " 55.0 | \n",
+ " 2075 | \n",
+ " 5.15 | \n",
+ " 31.80 | \n",
+ " 5.12 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " carat cut color clarity depth table price x y z\n",
+ "24067 2.00 Premium 2 1 58.9 57.0 12210 8.09 58.90 8.06\n",
+ "25998 4.01 Premium 1 0 61.0 61.0 15223 10.14 10.10 6.17\n",
+ "27415 5.01 Fair 0 0 65.5 59.0 18018 10.74 10.54 6.98\n",
+ "27630 4.50 Fair 0 0 65.8 58.0 18531 10.23 10.16 6.72\n",
+ "49189 0.51 Ideal 5 4 61.8 55.0 2075 5.15 31.80 5.12"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# same for y > 10\n",
+ "diamonds.loc[diamonds['y'] > 10]"
]
},
{
@@ -513,11 +2203,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
- "#your code here"
+ "def calculate_y(x, z, d):\n",
+ " \"\"\"\n",
+ " Calculates diamonds width based on length and depth.\n",
+ " Input: diamonds length, depth and percentage depth.\n",
+ " Output: diamonds width.\n",
+ " \"\"\"\n",
+ " d = d/100\n",
+ " return ((2*z) - (d*x))/d\n",
+ "\n",
+ "# recalculating y for the outlier rows\n",
+ "diamonds.loc[(diamonds['y'] > 10), 'y'] = diamonds.loc[diamonds['y'] > 10].apply(lambda row: calculate_y(row.x, row.z, row.depth), axis= 1)"
]
},
{
@@ -531,11 +2231,135 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#your thoughts here"
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " cut | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 24067 | \n",
+ " 2.00 | \n",
+ " Premium | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 58.9 | \n",
+ " 57.0 | \n",
+ " 12210 | \n",
+ " 8.09 | \n",
+ " 19.278421 | \n",
+ " 8.06 | \n",
+ "
\n",
+ " \n",
+ " | 25998 | \n",
+ " 4.01 | \n",
+ " Premium | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 61.0 | \n",
+ " 61.0 | \n",
+ " 15223 | \n",
+ " 10.14 | \n",
+ " 10.089508 | \n",
+ " 6.17 | \n",
+ "
\n",
+ " \n",
+ " | 27415 | \n",
+ " 5.01 | \n",
+ " Fair | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 65.5 | \n",
+ " 59.0 | \n",
+ " 18018 | \n",
+ " 10.74 | \n",
+ " 10.572977 | \n",
+ " 6.98 | \n",
+ "
\n",
+ " \n",
+ " | 27630 | \n",
+ " 4.50 | \n",
+ " Fair | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 65.8 | \n",
+ " 58.0 | \n",
+ " 18531 | \n",
+ " 10.23 | \n",
+ " 10.195532 | \n",
+ " 6.72 | \n",
+ "
\n",
+ " \n",
+ " | 49189 | \n",
+ " 0.51 | \n",
+ " Ideal | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 61.8 | \n",
+ " 55.0 | \n",
+ " 2075 | \n",
+ " 5.15 | \n",
+ " 11.419579 | \n",
+ " 5.12 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " carat cut color clarity depth table price x y \\\n",
+ "24067 2.00 Premium 2 1 58.9 57.0 12210 8.09 19.278421 \n",
+ "25998 4.01 Premium 1 0 61.0 61.0 15223 10.14 10.089508 \n",
+ "27415 5.01 Fair 0 0 65.5 59.0 18018 10.74 10.572977 \n",
+ "27630 4.50 Fair 0 0 65.8 58.0 18531 10.23 10.195532 \n",
+ "49189 0.51 Ideal 5 4 61.8 55.0 2075 5.15 11.419579 \n",
+ "\n",
+ " z \n",
+ "24067 8.06 \n",
+ "25998 6.17 \n",
+ "27415 6.98 \n",
+ "27630 6.72 \n",
+ "49189 5.12 "
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "diamonds.loc[diamonds['y'] > 10]"
]
},
{
@@ -547,9 +2371,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAl4AAAEkCAYAAAD+cD+8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAZxUlEQVR4nO3df5Bd5X3f8c9He7GRiInhshZEoK7JJtAGB2yuMTFjSECbbBxsMv0xAzNE64TpTt10d63atY3TMZM/4nFdnCIpM2l3DOFqwshjY9ykrrxlReJidwjuCuRIRErZEglpBWi5qg2N+KG7++0fe2GkrbT6ted57rn7fs3s7H3OPfbzGUZz9NFzz32OI0IAAAAo3rLcAQAAAJYKihcAAEAiFC8AAIBEKF4AAACJULwAAAASoXgBAAAkUljxsn2Z7b+0vcv2M7ZHWscvtD1u+9nW7wuKygAAANBOXNQ+XrYvkXRJRDxl+12Stkn6TUmfkHQoIr5s+/OSLoiIzxUSAgAAoI0UtuIVES9ExFOt169K2iVplaTbJNVbp9U1V8YAAAA6XmErXsdMYvdIelzSVZKej4h3H/Xe/4mI/+/jRtuDkgYl6bzzzrv2yiuvLDwngGNt27bt5Yjozp0DZ+aiiy6Knp6e3DGAJWeha2el6Mlt/5Skb0n6VES8YvuU/ncRMSppVJJqtVpMTEwUFxLAcdnemzsDzlxPT4+4dgLpLXTtLPRbjbbP0VzpeigiHmkdfql1/9db94EdLDIDAABAuyjyW42WdL+kXRHxh0e99eeSBlqvByT9WVEZAAAA2kmRHzXeIOm3JO2wvb117AuSvizpG7bvkvS8pH9WYAYAAIC2UVjxiogfSDrRDV23FDUvAABAu2LnegAAgEQoXgAAAIlQvFCoRqOh4eFhNRqN3FEAoBS4bnY2ihcKVa/XtWPHDm3atCl3FAAoBa6bnY3ihcI0Gg2NjY0pIjQ2Nsa/3gDgJLhudj6KFwpTr9c1OzsrSZqZmeFfb8BZsr3O9jO2d9rebPvc3JmwuLhudj6KFwqzdetWNZtNSVKz2dT4+HjmREB52V4laVhSLSKuktQl6fa8qbDYuG52PooXCrNmzRpVKnNbxVUqFfX19WVOBJReRdJy2xVJKyQdyJwHi4zrZuejeKEwAwMDWrZs7o9YV1eX1q5dmzkRUF4RMSXpXs098eMFST+JiEfnn2d70PaE7Ynp6enUMXGWuG52PooXClOtVtXf3y/b6u/vV7VazR0JKC3bF0i6TdJ7Jf2MpPNs3zn/vIgYjYhaRNS6u7tTx8RZ4rrZ+SheKNTAwIDe97738a824OytkfR3ETEdEUckPSLpw5kzoQBcNztbkQ/JBlStVrVhw4bcMYBO8Lyk622vkPSa5p55O5E3EorAdbOzseIFACUQEU9KeljSU5J2aO76PZo1FIDTxooXAJRERNwj6Z7cOQCcOVa8AAAAEqF4AQAAJELxAgAASITiBQAAkAjFCwAAIBGKFwAAQCIULwAAgEQoXgAAAIlQvAAAABIprHjZfsD2Qds7jzp2je2/sr3d9oTt64qaHwAAoN0UueL1oKT+ece+Iun3I+IaSV9sjQEAAJaEwopXRDwu6dD8w5LOb73+aUkHipofAACg3aR+SPanJP032/dqrvR9+EQn2h6UNChJq1evTpMOAACgQKlvrv+kpHURcZmkdZLuP9GJETEaEbWIqHV3dycLCAAAUJTUxWtA0iOt19+UxM31AABgyUhdvA5Iuqn1+mZJzyaeHwAAIJvC7vGyvVnSL0u6yPZ+SfdI+ueS1tuuSHpdrXu4AAAAloLCildE3HGCt64tak4AAIB2xs71AAAAiVC8AAAAEqF4AQAAJELxAgAASITiBQAAkAjFCwAAIBGKFwAAQCIULwAAgEQoXgBQAravsL39qJ9XbH8qdy4svkajoeHhYTUajdxRUACKFwCUQET8bURcExHXaO4JIIclfTtzLBSgXq9rx44d2rRpU+4oKADFCwDK5xZJ/zsi9uYOgsXVaDQ0NjamiNDY2BirXh2I4gUA5XO7pM3He8P2oO0J2xPT09OJY+Fs1et1zc7OSpJmZmZY9epAFC8AKBHb75D0cUnfPN77ETEaEbWIqHV3d6cNh7O2detWNZtNSVKz2dT4+HjmRFhsFC8AKJdfl/RURLyUOwgW35o1a1SpVCRJlUpFfX19mRNhsVG8AKBc7tAJPmZE+Q0MDGjZsrm/mru6urR27drMibDYKF4AUBK2V0jqk/RI7iwoRrVaVX9/v2yrv79f1Wo1dyQsskruAACAUxMRhyXxN3GHGxgY0J49e1jt6lCseAEAACRC8QIAoI2wgWpno3gBANAm2EC181G8AABoE2yg2vkoXgAAtAk2UO18FC8AANoEG6h2vsKKl+0HbB+0vXPe8SHbf2v7GdtfKWp+AADKhg1UO1+RK14PSuo/+oDtX5F0m6RfjIhfkHRvgfMDAFAqbKDa+QrbQDUiHrfdM+/wJyV9OSLeaJ1zsKj5AQAoIzZQ7Wyp7/H6eUkfsf2k7f9u+4MnOtH2oO0J2xPT09MJIwIAkE+1WtWGDRtY7epQqYtXRdIFkq6X9G8kfcO2j3diRIxGRC0iat3d3SkzAgCQTaPR0PDwMHt4dajUxWu/pEdizg8lzUq6KHEGAADaFjvXd7bUxes/S7pZkmz/vKR3SHo5cQYAANrS0TvXf/e732XVqwMVuZ3EZklPSLrC9n7bd0l6QNLlrS0mvi5pICKiqAwAAJRJvV7XkSNHJElHjhxh1asDFfmtxjtO8NadRc0JAECZjY+P6631iIjQo48+qnXr1mVOhcXEzvUAALSJlStXLjhG+VG8AABoEy+99NKCY5QfxQsAgDbxkY985JjxjTfemCkJikLxAgCgTbz55pvHjN94441MSVAUihcAAG3i+9///oJjlB/FCwCANjE7O7vgGOVH8QIAoE3Mf4reCZ6qhxKjeAEA0CY+9KEPLThG+VG8AABoE3v27DlmvHfv3jxBUBiKFwAAbeKFF144ZnzgwIFMSVAUihcAAG2Ce7w6H8ULAErC9rttP2x7t+1dtn8pdyYsrptuumnBMcqP4gUA5bFe0lhEXCnpakm7MufBIrv11luPGX/sYx/LlARFoXgBQAnYPl/SjZLul6SIeDMifpw3FRbb+vXrjxnfd999mZKgKBQvACiHyyVNS/oT20/b/prt83KHwuLat2/fgmOUH8ULAMqhIukDkv44It4v6e8lfX7+SbYHbU/Ynpienk6dEcBJULwAoBz2S9ofEU+2xg9rrogdIyJGI6IWEbXu7u6kAQGcHMULAEogIl6UtM/2Fa1Dt0j6m4yRUAC2k+h8ldwBAACnbEjSQ7bfIek5Sb+dOQ8WWUQsOEb5UbwAoCQiYrukWu4cAM4cHzUCAAAkQvECAKBNnHvuuQuOUX6FFS/bD9g+aHvncd77jO2wfVFR8wMAUDavv/76gmOUX5H3eD0o6Y8kbTr6oO3LJPVJer7AuQEAkCRt3LhRk5OTuWOcsZGRkdwRTklvb6+GhoZyx2h7ha14RcTjkg4d563/IOmzkviqBgAAWFKSfqvR9sclTUXEj062N4ntQUmDkrR69eoE6QAAnahMqzCf/vSntW3btrfH1157rb761a9mTITFluzmetsrJP2epC+eyvnsvgwAWGq+8IUvLDhG+aX8VuPPSnqvpB/Z3iPpUklP2b44YQYAANpWtVrVihUrJM2tdlWr1cyJsNiSfdQYETskveetcat81SLi5VQZAABodz09Pdq7dy+rXR2qyO0kNkt6QtIVtvfbvquouQAA6BTnnHOOent7We3qUIWteEXEHSd5v6eouQEAANoRO9cDAAAkQvECAABIhOIFAACQCMULAAAgEYoXAABAIhQvAACARCheAAAAiVC8AAAAEqF4AQAAJELxAgAASITiBQAAkAjFCwAAIBGKFwAAQCIULwAAgEQoXgAAAIlQvAAAABKp5A4AADg1tvdIelXSjKRmRNTyJgJwuiheAFAuvxIRL+cOAeDM8FEjAABAIhQvACiPkPSo7W22B493gu1B2xO2J6anpxPHA3AyFC8AKI8bIuIDkn5d0u/avnH+CRExGhG1iKh1d3enTwhgQRQvACiJiDjQ+n1Q0rclXZc3EYDTRfECgBKwfZ7td731WtKvStqZNxWA01VY8bL9gO2Dtncedezf295t+69tf9v2u4uaHwA6zEpJP7D9I0k/lPRfI2IscyYAp6nIFa8HJfXPOzYu6aqI+EVJ/0vS3QXODwAdIyKei4irWz+/EBF/kDsTgNNXWPGKiMclHZp37NGIaLaGfyXp0qLmBwAAaDc57/H6HUnfPdGbfCUaAAB0mizFy/bvSWpKeuhE5/CVaAAA0GmSPzLI9oCkWyXdEhGRen4AAIBckhYv2/2SPifppog4nHJuAACA3IrcTmKzpCckXWF7v+27JP2RpHdJGre93fZ/LGp+AACAdlPktxrviIhLIuKciLg0Iu6PiN6IuCwirmn9/Iui5kd7aDQaGh4eVqPRyB0FAIDs2LkeharX69qxY4c2bdqUOwoAANmdtHjZ/le2L0gRBp2l0WhobGxMEaGxsTFWvQAAS96prHhdLOl/2v6G7X7bLjoUOkO9Xtfs7KwkaWZmhlUvoMX2Y7Y/Ou/YaK48ANI5afGKiH8r6eck3S/pE5Ketf0l2z9bcDaU3NatW9Vszj2ooNlsanx8PHMioG28V9LnbN9z1LFarjAA0jmle7xa+2292PppSrpA0sO2v1JgNpTcmjVrVKnM7VhSqVTU19eXORHQNn4s6RZJK23/F9s/nTsQgDRO5R6vYdvbJH1F0v+Q9L6I+KSkayX9k4LzocQGBga0bNncH7Guri6tXbs2cyKgbTgimhHxLyV9S9IPJL0ncyYACZzKitdFkv5xRPxaRHwzIo5IUkTMam4HeuC4qtWq+vv7ZVv9/f2qVqu5IwHt4u09DCPiQc3dxvForjAA0jnpzvUR8cUF3tu1uHHQaQYGBrRnzx5Wu4CjRMR/mjfeJul3MsUBkFDyZzViaalWq9qwYUPuGAAAtAU2UAUAAEiE4gUAAJAIxQsAACARihcAAEAiFC8UqtFoaHh4mOc0AgAgihcKVq/XtWPHDp7TCACAKF4oUKPR0NjYmCJCY2NjrHoBAJY8ihcKU6/XNTs7K0mamZlh1QsAsORRvFCYrVu3qtlsSpKazabGx8czJwIAIC+KFwqzZs0aVSpzD0eoVCrq6+vLnAgAgLwoXijMwMCAbEuSli1bxvMagUVgu8v207a/kzsLgNPHsxpRmGq1qosvvlj79u3TypUrVa1Wc0cCOsGIpF2Szs8x+caNGzU5OZlj6iXjrf++IyMjmZN0vt7eXg0NDSWdk+KFwjQaDU1NTUmSpqam1Gg0KF/AWbB9qaTfkPQHkv51jgyTk5PavnOXZlZcmGP6JWHZmyFJ2vbcS5mTdLauw4eyzEvxQmFGR0ff/lbj7OysRkdHdffdd2dOBZTafZI+K+ldJzrB9qCkQUlavXp1ISFmVlyo1678aCH/30Aqy3dvyTJvYfd42X7A9kHbO486dqHtcdvPtn5fUNT8yO+xxx5bcAzg1Nm+VdLBiNi20HkRMRoRtYiodXd3J0oH4FQVeXP9g5L65x37vKTHIuLnJD3WGqNDRcSCYwCn5QZJH7e9R9LXJd1s+0/zRgJwugorXhHxuKT5H6DeJqneel2X9JtFzY/8PvjBDx4zvu666zIlAcovIu6OiEsjokfS7ZL+IiLuzBwLwGlKvZ3Eyoh4QZJav99zohNtD9qesD0xPT2dLCAWz/PPP3/MeO/evZmSAADQHtp2Hy/uUyi/AwcOLDgGcGYi4nsRcWvuHABOX+ri9ZLtSySp9ftg4vkBAACySV28/lzSQOv1gKQ/Szw/Eurq6lpwDADAUlPkdhKbJT0h6Qrb+23fJenLkvpsPyuprzVGh5p/Mz031wMAlrrCNlCNiDtO8NYtRc2J9rJv375jxvv378+UBACA9tC2N9ej/OYXrflFDACApYbihcJwjxcAAMeieKEwMzMzC44BAFhqKF4AAACJULwAAAASKexbjVh8Gzdu1OTkZO4YZ2VkZCR3hJPq7e3V0NBQ7hgAgA5E8UJhbCsijhkDKLepqSl1Hf6Jlu/ekjsKcFa6Djc0NdVMPi/Fq0TKtgozMTGhz3zmM2+P7733Xl177bUZEwEAkBfFC4Wp1Wpvr3q9853vpHQBHWDVqlV68Y2KXrvyo7mjAGdl+e4tWrVqZfJ5ubkeherp6ZEkfelLX8obBACANkDxQqHOP/98XX311ax2AQAgihcAAEAyFC8AAIBEKF4AAACJULwAAAASoXgBAAAkQvECAABIhOIFAACQCMULAAAgEYoXAABAIhQvAACARCheAAAAiWQpXrbX2X7G9k7bm22fmyMHAJSF7XNt/9D2j1rXz9/PnQnA6UtevGyvkjQsqRYRV0nqknR76hwAUDJvSLo5Iq6WdI2kftvXZ84E4DRVMs673PYRSSskHciUAwBKISJC0v9tDc9p/US+RADORPIVr4iYknSvpOclvSDpJxHxaOocAFA2trtsb5d0UNJ4RDyZOxOA05Pjo8YLJN0m6b2SfkbSebbvPM55g7YnbE9MT0+njgkAbSciZiLiGkmXSrrO9lXzz+HaCbS3HDfXr5H0dxExHRFHJD0i6cPzT4qI0YioRUStu7s7eUgAaFcR8WNJ35PUf5z3uHYCbSzHPV7PS7re9gpJr0m6RdJEhhwAUBq2uyUdiYgf216uuX/E/rscWboOH9Ly3VtyTL0kLHv9FUnS7LnnZ07S2boOH5K0Mvm8yYtXRDxp+2FJT0lqSnpa0mjqHABQMpdIqtvu0tynFd+IiO+kDtHb25t6yiVncvJVSVLv5elLwdKyMsuf5yzfaoyIeyTdk2NuACijiPhrSe/PnWNoaCh3hI43MjIiSVq/fn3mJCgCO9cDAAAkQvECAABIhOIFAACQCMULAAAgEYoXAABAIhQvAACARCheAAAAiVC8AAAAEqF4AQAAJELxAgAASITiBQAAkAjFCwAAIBGKFwAAQCIULwAAgEQoXgAAAIlQvAAAABKheAEAACRSyR0gl40bN2pycjJ3jI731n/jkZGRzEk6W29vr4aGhnLHAACcxJItXpOTk9q+c5dmVlyYO0pHW/ZmSJK2PfdS5iSdq+vwodwRAACnaMkWL0maWXGhXrvyo7ljAGdl+e4tuSMAAE4R93gBAAAkQvECAABIhOIFAACQSJbiZfvdth+2vdv2Ltu/lCMHAABASrlurl8vaSwi/qntd0hakSkHAABAMsmLl+3zJd0o6ROSFBFvSnozdQ4AAIDUcnzUeLmkaUl/Yvtp21+zfd78k2wP2p6wPTE9PZ0+JQAAwCLLUbwqkj4g6Y8j4v2S/l7S5+efFBGjEVGLiFp3d3fqjADQVmxfZvsvW/fFPmObx0EAJZSjeO2XtD8inmyNH9ZcEQMAnFhT0qcj4h9Kul7S79r+R5kzAThNyYtXRLwoaZ/tK1qHbpH0N6lzAECZRMQLEfFU6/WrknZJWpU3FYDTletbjUOSHmp9o/E5Sb+dOsDU1JS6Dv+Ex62g9LoONzQ11cwdAwnZ7pH0fklPHue9QUmDkrR69eqkuQCcXJbiFRHbJdVyzA0AZWb7pyR9S9KnIuKV+e9HxKikUUmq1WqROB6Ak1iyD8letWqVXnyjwkOyUXrLd2/RqlUrc8dAArbP0VzpeigiHsmdB8Dp45FBAFACti3pfkm7IuIPc+cBcGYoXgBQDjdI+i1JN9ve3vphyR4omSX7USMAlElE/ECSc+cAcHZY8QIAAEiE4gUAAJAIxQsAACARihcAAEAiFC8AAIBEKF4AAACJULwAAAASoXgBAAAkQvECAABIZEnvXN91+JCW796SO0ZHW/b6K5Kk2XPPz5ykc3UdPiSJh2QDQBks2eLV29ubO8KSMDn5qiSp93KKQXFW8ucZAEpiyRavoaGh3BGWhJGREUnS+vXrMycBACA/7vECAABIhOIFAACQyJL9qBEAsDRs3LhRk5OTuWOcsreyvnWrRln09vZyG88poHgBANBGli9fnjsCCkTxAgB0NFZh0E64xwsAACARihcAAEAi2YqX7S7bT9v+Tq4MAAAAKeVc8RqRtCvj/AAAAEllKV62L5X0G5K+lmN+AACAHHKteN0n6bOSZk90gu1B2xO2J6anp9MlAwAAKEjy4mX7VkkHI2LbQudFxGhE1CKi1t3dnSgdAAB5NRoNDQ8Pq9Fo5I6CAuRY8bpB0sdt75H0dUk32/7TDDkAAGg79XpdO3bs0KZNm3JHQQGSF6+IuDsiLo2IHkm3S/qLiLgzdQ4AANpNo9HQ2NiYIkJjY2OsenUgdq4vkbI9b0wq5zPHeN4Y2pHtByS9davGVbnzoBj1el2zs3O3P8/MzGjTpk1at25d5lRYTFk3UI2I70XErTkzoFjLly/nuWPA4nhQUn/uECjW1q1b1Ww2JUnNZlPj4+OZE2GxseJVIqzCAEtXRDxuuyd3DhRrzZo12rJli5rNpiqVivr6+nJHwiLjkUEA0EHYiqfcBgYGtGzZ3F/NXV1dWrt2beZEWGwULwDoIGzFU27ValX9/f2yrf7+flWr1dyRsMj4qBEAgDYyMDCgPXv2sNrVoSheAAC0kWq1qg0bNuSOgYLwUSMAlIDtzZKekHSF7f2278qdCcDpY8ULAEogIu7InQHA2WPFCwAAIBGKFwAAQCIULwAAgEQoXgAAAIk4InJnOCnb05L25s6BM3aRpJdzh8AZ+QcRwS6cJcW1s9S4bpbbCa+dpSheKDfbExFRy50DAMqC62bn4qNGAACARCheAAAAiVC8kMJo7gAAUDJcNzsU93gBAAAkwooXAABAIhQvAACARCheAAAAiVC8AAAAEqF4AQAAJPL/AKBjWXmewTQ9AAAAAElFTkSuQmCC\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
"source": [
"#Run this code\n",
"fig, ax = plt.subplots(1,2, figsize=(10, 5))\n",
@@ -567,11 +2404,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
- "#your thoughts here"
+ "# These values seem to fall more within reasonable range of the overall data distribution."
]
},
{
@@ -583,11 +2420,11 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
- "#your code here"
+ "diamonds.to_csv('diamonds_clean.csv')"
]
}
],
@@ -607,7 +2444,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.8"
+ "version": "3.7.4"
}
},
"nbformat": 4,