diff --git a/1.-Data Cleaning.ipynb b/1.-Data Cleaning.ipynb index 1a927b9..3d25f73 100644 --- a/1.-Data Cleaning.ipynb +++ b/1.-Data Cleaning.ipynb @@ -71,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -90,11 +90,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "path = 'C:/Users/Zaca/Documents/Datasets/diamonds.csv'\n", + "diamonds = pd.read_csv(path)\n", + "\n", + "# dropping the index column\n", + "diamonds.drop('Unnamed: 0', axis= 1, inplace=True)" ] }, { @@ -109,11 +113,129 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#your code here" + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcutcolorclaritydepthtablepricexyz
00.23IdealESI261.555.03263.953.982.43
10.21PremiumESI159.861.03263.893.842.31
20.23GoodEVS156.965.03274.054.072.31
30.29PremiumIVS262.458.03344.204.232.63
40.31GoodJSI263.358.03354.344.352.75
\n", + "
" + ], + "text/plain": [ + " carat cut color clarity depth table price x y z\n", + "0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43\n", + "1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31\n", + "2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31\n", + "3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63\n", + "4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# exploring data\n", + "diamonds.head()" ] }, { @@ -127,11 +249,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "# The numbers we see in initial exploration fall within the ranges defined in the codebook. \n", + "# There are a few labels i don't really understand because I don't know much about diamonds." ] }, { @@ -145,11 +268,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(53940, 10)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "#your code here" + "diamonds.shape" ] }, { @@ -163,11 +297,218 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#your code here" + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcutcolorclaritydepthtablepricexyz
00.23IdealE161.555.03263.953.982.43
10.21PremiumE259.861.03263.893.842.31
20.23GoodE456.965.03274.054.072.31
30.29PremiumI362.458.03344.204.232.63
40.31GoodJ163.358.03354.344.352.75
.................................
539350.72IdealD260.857.027575.755.763.50
539360.72GoodD263.155.027575.695.753.61
539370.70Very GoodD262.860.027575.665.683.56
539380.86PremiumH161.058.027576.156.123.74
539390.75IdealD162.255.027575.835.873.64
\n", + "

53940 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " carat cut color clarity depth table price x y z\n", + "0 0.23 Ideal E 1 61.5 55.0 326 3.95 3.98 2.43\n", + "1 0.21 Premium E 2 59.8 61.0 326 3.89 3.84 2.31\n", + "2 0.23 Good E 4 56.9 65.0 327 4.05 4.07 2.31\n", + "3 0.29 Premium I 3 62.4 58.0 334 4.20 4.23 2.63\n", + "4 0.31 Good J 1 63.3 58.0 335 4.34 4.35 2.75\n", + "... ... ... ... ... ... ... ... ... ... ...\n", + "53935 0.72 Ideal D 2 60.8 57.0 2757 5.75 5.76 3.50\n", + "53936 0.72 Good D 2 63.1 55.0 2757 5.69 5.75 3.61\n", + "53937 0.70 Very Good D 2 62.8 60.0 2757 5.66 5.68 3.56\n", + "53938 0.86 Premium H 1 61.0 58.0 2757 6.15 6.12 3.74\n", + "53939 0.75 Ideal D 1 62.2 55.0 2757 5.83 5.87 3.64\n", + "\n", + "[53940 rows x 10 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# using a dictionary to replace strings by values\n", + "clarity_dict = {'I1': 0, 'SI2': 1, 'SI1': 2, 'VS2': 3, 'VS1': 4, 'VVS2': 5, 'VVS1': 6, 'IF': 7}\n", + "diamonds['clarity'] = diamonds['clarity'].map(clarity_dict)\n", + "diamonds" ] }, { @@ -181,11 +522,218 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#your code here" + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcutcolorclaritydepthtablepricexyz
00.23Ideal5161.555.03263.953.982.43
10.21Premium5259.861.03263.893.842.31
20.23Good5456.965.03274.054.072.31
30.29Premium1362.458.03344.204.232.63
40.31Good0163.358.03354.344.352.75
.................................
539350.72Ideal6260.857.027575.755.763.50
539360.72Good6263.155.027575.695.753.61
539370.70Very Good6262.860.027575.665.683.56
539380.86Premium2161.058.027576.156.123.74
539390.75Ideal6162.255.027575.835.873.64
\n", + "

53940 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " carat cut color clarity depth table price x y z\n", + "0 0.23 Ideal 5 1 61.5 55.0 326 3.95 3.98 2.43\n", + "1 0.21 Premium 5 2 59.8 61.0 326 3.89 3.84 2.31\n", + "2 0.23 Good 5 4 56.9 65.0 327 4.05 4.07 2.31\n", + "3 0.29 Premium 1 3 62.4 58.0 334 4.20 4.23 2.63\n", + "4 0.31 Good 0 1 63.3 58.0 335 4.34 4.35 2.75\n", + "... ... ... ... ... ... ... ... ... ... ...\n", + "53935 0.72 Ideal 6 2 60.8 57.0 2757 5.75 5.76 3.50\n", + "53936 0.72 Good 6 2 63.1 55.0 2757 5.69 5.75 3.61\n", + "53937 0.70 Very Good 6 2 62.8 60.0 2757 5.66 5.68 3.56\n", + "53938 0.86 Premium 2 1 61.0 58.0 2757 6.15 6.12 3.74\n", + "53939 0.75 Ideal 6 1 62.2 55.0 2757 5.83 5.87 3.64\n", + "\n", + "[53940 rows x 10 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# repeating the process above but for the color column\n", + "color_dict = {'D': 6, 'E': 5, 'F': 4, 'G': 3, 'H': 2, 'I': 1, 'J': 0}\n", + "diamonds['color'] = diamonds['color'].map(color_dict)\n", + "diamonds" ] }, { @@ -199,11 +747,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 53940 entries, 0 to 53939\n", + "Data columns (total 10 columns):\n", + "carat 53940 non-null float64\n", + "cut 53940 non-null object\n", + "color 53940 non-null int64\n", + "clarity 53940 non-null int64\n", + "depth 53940 non-null float64\n", + "table 53940 non-null float64\n", + "price 53940 non-null int64\n", + "x 53940 non-null float64\n", + "y 53940 non-null float64\n", + "z 53940 non-null float64\n", + "dtypes: float64(6), int64(3), object(1)\n", + "memory usage: 4.1+ MB\n" + ] + } + ], + "source": [ + "diamonds.info()" + ] + }, + { + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "#your code here" + "**With the info method we confirm that there are no null values, and we can see the same number of rows and columns**" ] }, { @@ -221,11 +798,34 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#your code here" + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "carat float64\n", + "cut object\n", + "color int64\n", + "clarity int64\n", + "depth float64\n", + "table float64\n", + "price int64\n", + "x float64\n", + "y float64\n", + "z float64\n", + "dtype: object" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diamonds.dtypes\n", + "# So far everything appears to be ok. Essentially everthing is a float (which should be), \n", + "# except price, color and clarity (which we changed into ints earlier) and the cut which are string values." ] }, { @@ -241,11 +841,173 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#your code and comments here" + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcolorclaritydepthtablepricexyz
count53940.00000053940.00000053940.00000053940.00000053940.00000053940.00000053940.00000053940.00000053940.000000
mean0.7979403.4058033.05102061.74940557.4571843932.7997225.7311575.7345263.538734
std0.4740111.7011051.6471361.4326212.2344913989.4397381.1217611.1421350.705699
min0.2000000.0000000.00000043.00000043.000000326.0000000.0000000.0000000.000000
25%0.4000002.0000002.00000061.00000056.000000950.0000004.7100004.7200002.910000
50%0.7000003.0000003.00000061.80000057.0000002401.0000005.7000005.7100003.530000
75%1.0400005.0000004.00000062.50000059.0000005324.2500006.5400006.5400004.040000
max5.0100006.0000007.00000079.00000095.00000018823.00000010.74000058.90000031.800000
\n", + "
" + ], + "text/plain": [ + " carat color clarity depth table \\\n", + "count 53940.000000 53940.000000 53940.000000 53940.000000 53940.000000 \n", + "mean 0.797940 3.405803 3.051020 61.749405 57.457184 \n", + "std 0.474011 1.701105 1.647136 1.432621 2.234491 \n", + "min 0.200000 0.000000 0.000000 43.000000 43.000000 \n", + "25% 0.400000 2.000000 2.000000 61.000000 56.000000 \n", + "50% 0.700000 3.000000 3.000000 61.800000 57.000000 \n", + "75% 1.040000 5.000000 4.000000 62.500000 59.000000 \n", + "max 5.010000 6.000000 7.000000 79.000000 95.000000 \n", + "\n", + " price x y z \n", + "count 53940.000000 53940.000000 53940.000000 53940.000000 \n", + "mean 3932.799722 5.731157 5.734526 3.538734 \n", + "std 3989.439738 1.121761 1.142135 0.705699 \n", + "min 326.000000 0.000000 0.000000 0.000000 \n", + "25% 950.000000 4.710000 4.720000 2.910000 \n", + "50% 2401.000000 5.700000 5.710000 3.530000 \n", + "75% 5324.250000 6.540000 6.540000 4.040000 \n", + "max 18823.000000 10.740000 58.900000 31.800000 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diamonds.describe()\n", + "\n", + "# the describe methods shows us the summary statistics for every numeric column in our dataset." ] }, { @@ -261,11 +1023,339 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#your code here" + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcutcolorclaritydepthtablepricexyz
22071.00Premium3159.159.031426.556.480.0
23141.01Premium2058.159.031676.666.600.0
47911.10Premium3163.059.036966.506.470.0
54711.01Premium4159.258.038376.506.470.0
101671.50Good3064.061.047317.157.040.0
111821.07Ideal4161.656.049540.006.620.0
119631.00Very Good2363.353.051390.000.000.0
136011.15Ideal3359.256.055646.886.830.0
159511.14Fair3457.567.063810.000.000.0
243942.18Premium2159.461.0126318.498.450.0
245201.56Ideal3362.254.0128000.000.000.0
261232.25Premium1261.358.0153978.528.420.0
262431.20Premium6662.159.0156860.000.000.0
271122.20Premium2261.259.0172658.428.370.0
274292.25Premium2162.859.0180340.000.000.0
275032.02Premium2362.753.0182078.027.950.0
277392.80Good3163.858.0187888.908.850.0
495560.71Good4164.160.021300.000.000.0
495570.71Good4164.160.021300.000.000.0
515061.12Premium3060.459.023836.716.670.0
\n", + "
" + ], + "text/plain": [ + " carat cut color clarity depth table price x y z\n", + "2207 1.00 Premium 3 1 59.1 59.0 3142 6.55 6.48 0.0\n", + "2314 1.01 Premium 2 0 58.1 59.0 3167 6.66 6.60 0.0\n", + "4791 1.10 Premium 3 1 63.0 59.0 3696 6.50 6.47 0.0\n", + "5471 1.01 Premium 4 1 59.2 58.0 3837 6.50 6.47 0.0\n", + "10167 1.50 Good 3 0 64.0 61.0 4731 7.15 7.04 0.0\n", + "11182 1.07 Ideal 4 1 61.6 56.0 4954 0.00 6.62 0.0\n", + "11963 1.00 Very Good 2 3 63.3 53.0 5139 0.00 0.00 0.0\n", + "13601 1.15 Ideal 3 3 59.2 56.0 5564 6.88 6.83 0.0\n", + "15951 1.14 Fair 3 4 57.5 67.0 6381 0.00 0.00 0.0\n", + "24394 2.18 Premium 2 1 59.4 61.0 12631 8.49 8.45 0.0\n", + "24520 1.56 Ideal 3 3 62.2 54.0 12800 0.00 0.00 0.0\n", + "26123 2.25 Premium 1 2 61.3 58.0 15397 8.52 8.42 0.0\n", + "26243 1.20 Premium 6 6 62.1 59.0 15686 0.00 0.00 0.0\n", + "27112 2.20 Premium 2 2 61.2 59.0 17265 8.42 8.37 0.0\n", + "27429 2.25 Premium 2 1 62.8 59.0 18034 0.00 0.00 0.0\n", + "27503 2.02 Premium 2 3 62.7 53.0 18207 8.02 7.95 0.0\n", + "27739 2.80 Good 3 1 63.8 58.0 18788 8.90 8.85 0.0\n", + "49556 0.71 Good 4 1 64.1 60.0 2130 0.00 0.00 0.0\n", + "49557 0.71 Good 4 1 64.1 60.0 2130 0.00 0.00 0.0\n", + "51506 1.12 Premium 3 0 60.4 59.0 2383 6.71 6.67 0.0" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# checking the selection where any of x, y or z are zero.\n", + "diamonds[(diamonds['x'] == 0) | (diamonds['y'] == 0) | (diamonds['z'] == 0)]" ] }, { @@ -284,11 +1374,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "def calculate_z(x, y, d):\n", + " \"\"\"\n", + " Calculates depth of diamond.\n", + " Input: height, width and depth as percentage.\n", + " Output: diamonds depth.\n", + " \"\"\"\n", + " d = d/100\n", + " return (d*x + d*y)/2" ] }, { @@ -300,11 +1397,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "# using apply only on the rows which have a value zero for z\n", + "diamonds.loc[(diamonds['z'] == 0) & (diamonds['x'] != 0) & (diamonds['y'] != 0), 'z'] = diamonds[(diamonds['z'] == 0) & (diamonds['x'] != 0) & (diamonds['y'] != 0)].apply(lambda row: calculate_z(row.x, row.y, row.depth), axis=1)" ] }, { @@ -320,11 +1418,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "# replacing the zeros we cannot calculate by NaN\n", + "diamonds.x.replace(0, np.nan, inplace= True)\n", + "diamonds.y.replace(0, np.nan, inplace= True)\n", + "diamonds.z.replace(0, np.nan, inplace= True)" ] }, { @@ -336,11 +1437,171 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#your code here" + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcolorclaritydepthtablepricexyz
count53940.00000053940.00000053940.00000053940.00000053940.00000053940.00000053932.00000053933.00000053932.000000
mean0.7979403.4058033.05102061.74940557.4571843932.7997225.7320075.7352703.540265
std0.4740111.7011051.6471361.4326212.2344913989.4397381.1196701.1403390.702667
min0.2000000.0000000.00000043.00000043.000000326.0000003.7300003.6800001.070000
25%0.4000002.0000002.00000061.00000056.000000950.0000004.7100004.7200002.910000
50%0.7000003.0000003.00000061.80000057.0000002401.0000005.7000005.7100003.530000
75%1.0400005.0000004.00000062.50000059.0000005324.2500006.5400006.5400004.040000
max5.0100006.0000007.00000079.00000095.00000018823.00000010.74000058.90000031.800000
\n", + "
" + ], + "text/plain": [ + " carat color clarity depth table \\\n", + "count 53940.000000 53940.000000 53940.000000 53940.000000 53940.000000 \n", + "mean 0.797940 3.405803 3.051020 61.749405 57.457184 \n", + "std 0.474011 1.701105 1.647136 1.432621 2.234491 \n", + "min 0.200000 0.000000 0.000000 43.000000 43.000000 \n", + "25% 0.400000 2.000000 2.000000 61.000000 56.000000 \n", + "50% 0.700000 3.000000 3.000000 61.800000 57.000000 \n", + "75% 1.040000 5.000000 4.000000 62.500000 59.000000 \n", + "max 5.010000 6.000000 7.000000 79.000000 95.000000 \n", + "\n", + " price x y z \n", + "count 53940.000000 53932.000000 53933.000000 53932.000000 \n", + "mean 3932.799722 5.732007 5.735270 3.540265 \n", + "std 3989.439738 1.119670 1.140339 0.702667 \n", + "min 326.000000 3.730000 3.680000 1.070000 \n", + "25% 950.000000 4.710000 4.720000 2.910000 \n", + "50% 2401.000000 5.700000 5.710000 3.530000 \n", + "75% 5324.250000 6.540000 6.540000 4.040000 \n", + "max 18823.000000 10.740000 58.900000 31.800000 " + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diamonds.describe()" ] }, { @@ -361,11 +1622,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "x (%): 0.00014831294030404152\n", + "y (%): 0.00012977382276603634\n", + "z (%): 0.00014831294030404152\n" + ] + } + ], "source": [ - "#your code here" + "for col in diamonds.columns:\n", + " if diamonds[col].isnull().sum():\n", + " print(col + ' (%): ' + str(sum(diamonds[col].isnull())/len(diamonds)))\n", + " \n", + "# these values are such a small percentage of our dataset that we can just drop them\n", + "diamonds.dropna(inplace= True)" ] }, { @@ -380,20 +1656,183 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#your code here" + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcolorclaritydepthtablepricexyz
count53932.00000053932.00000053932.00000053932.00000053932.00000053932.00000053932.00000053932.00000053932.000000
mean0.7978793.4057893.05110161.74933657.4570293932.1360795.7320075.7352543.540265
std0.4739861.7011651.6471091.4325142.2340643988.7348351.1196701.1403430.702667
min0.2000000.0000000.00000043.00000043.000000326.0000003.7300003.6800001.070000
25%0.4000002.0000002.00000061.00000056.000000949.7500004.7100004.7200002.910000
50%0.7000003.0000003.00000061.80000057.0000002401.0000005.7000005.7100003.530000
75%1.0400005.0000004.00000062.50000059.0000005324.0000006.5400006.5400004.040000
max5.0100006.0000007.00000079.00000095.00000018823.00000010.74000058.90000031.800000
\n", + "
" + ], + "text/plain": [ + " carat color clarity depth table \\\n", + "count 53932.000000 53932.000000 53932.000000 53932.000000 53932.000000 \n", + "mean 0.797879 3.405789 3.051101 61.749336 57.457029 \n", + "std 0.473986 1.701165 1.647109 1.432514 2.234064 \n", + "min 0.200000 0.000000 0.000000 43.000000 43.000000 \n", + "25% 0.400000 2.000000 2.000000 61.000000 56.000000 \n", + "50% 0.700000 3.000000 3.000000 61.800000 57.000000 \n", + "75% 1.040000 5.000000 4.000000 62.500000 59.000000 \n", + "max 5.010000 6.000000 7.000000 79.000000 95.000000 \n", + "\n", + " price x y z \n", + "count 53932.000000 53932.000000 53932.000000 53932.000000 \n", + "mean 3932.136079 5.732007 5.735254 3.540265 \n", + "std 3988.734835 1.119670 1.140343 0.702667 \n", + "min 326.000000 3.730000 3.680000 1.070000 \n", + "25% 949.750000 4.710000 4.720000 2.910000 \n", + "50% 2401.000000 5.700000 5.710000 3.530000 \n", + "75% 5324.000000 6.540000 6.540000 4.040000 \n", + "max 18823.000000 10.740000 58.900000 31.800000 " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diamonds.describe()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ - "#your comments here" + "# The values haven't changed much since the last .describe() but now we don't any any min = 0\n", + "# for measurements that shouldn't be zero! \n", + "# There also seem to be some pretty high max values in the price, y and z columns. Much much higher than the mean,\n", + "# this could mean they are outliers." ] }, { @@ -407,11 +1846,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "# taking the top 25% of our data based on price.\n", + "outliers = diamonds[diamonds['price'] > diamonds['price'].quantile(.75)]\n", + "\n", + "# This selected 13485 data points." ] }, { @@ -423,11 +1865,26 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#Run this code\n", + "execution_count": 20, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAl4AAAEhCAYAAACuva4PAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAXSklEQVR4nO3df4xdZ33n8ffXM2FxoGzi8cRrOUmdEmsLEo1TjaKsAlXi2O00W0hawaoRwlMR4Ur8cKCoJa1K0kW7FaxaUowqdl2S5VqiQBpAMcgMGXsDiH+yjMGNQ02xGxnixNjDJSlJ84PMzHf/mBPWMZ5kxp7znHOv3y9pdO859xzfTyLr+HOec+Y8kZlIkiSpfsuaDiBJknS2sHhJkiQVYvGSJEkqxOIlSZJUiMVLkiSpEIuXJElSIYNNB1iIlStX5tq1a5uOIZ119u7d++PMHG46h06Px06pGS927OyJ4rV27VomJyebjiGddSLiB01n0Onz2Ck148WOnV5qlCRJKsTiJUmSVEitxSsizouIuyPiexFxICL+U0SsiIiJiDhYvZ5fZwZJkqS2qHvE62PAeGb+KnAZcAC4BdiTmeuAPdWyJElS36uteEXEq4DfAO4AyMyfZebjwPVAp9qsA9xQVwZJkqQ2qXPE61eAKeB/R8R3IuKTEfEKYFVmHgWoXi841c4RsSUiJiNicmpqqsaYkiRJZdRZvAaBXwc+kZmXA//GIi4rZub2zBzJzJHhYR8j1Iu63S5bt26l2+02HUWSeobHzv5WZ/E6AhzJzPur5buZK2LHImI1QPV6vMYMalCn02H//v3s2LGj6SiS1DM8dva32opXZv4IeDgi/mO16lrgn4CdwFi1bgy4p64Mak6322V8fJzMZHx83DM3SVoAj539r+7fanwP8OmIeABYD/wl8GFgU0QcBDZVy+oznU6H2dlZAGZmZjxzk6QF8NjZ/2otXpm5r7pP69cy84bMfCwzu5l5bWauq15/UmcGNWP37t1MT08DMD09zcTERMOJJKn9PHb2P59cr1ps3LiRwcG5qUAHBwfZtGlTw4kkqf08dvY/i5dqMTY2xrJlc3+9BgYG2Lx5c8OJJKn9PHb2P4uXajE0NMTo6CgRwejoKENDQ01HkqTW89jZ/wabDqD+NTY2xuHDhz1jk6RF8NjZ3yxeqs3Q0BDbtm1rOoYk9RSPnf3NS42SJEmFWLwkSZIKsXhJkiQVYvGSJEkqxOIlSZJUiMVLklokIl4eEf83Iv4xIr4bEf+1Wn9JRNwfEQcj4nMR8bKms0paPIuXJLXLs8CGzLwMWA+MRsSVwEeA2zNzHfAYcFODGSWdJouXJLVIznmyWjyn+klgA3B3tb4D3NBAPElnyOIlSS0TEQMRsQ84DkwA/wI8npnT1SZHgDXz7LslIiYjYnJqaqpMYEkLZvGSpJbJzJnMXA9cCFwBvOZUm82z7/bMHMnMkeHh4TpjSjoNFi9JaqnMfBz4GnAlcF5EPD/N24XAo03lknT6LF6S1CIRMRwR51XvlwMbgQPAfcCbq83GgHuaSSjpTDhJtiS1y2qgExEDzJ0c35WZX46IfwI+GxH/DfgOcEeTISWdHouXJLVIZj4AXH6K9Q8xd7+XpB7mpUZJkqRCLF6SJEmFWLwkSZIKsXhJkiQVYvGSJEkqxOIlSZJUiMVLkiSpEIuXJElSIRYvSZKkQixekiRJhVi8JEmSCrF4SZIkFWLxkiRJKsTiJUmSVIjFS5IkqRCLlyRJUiGDdf7hEXEYeAKYAaYzcyQiVgCfA9YCh4H/kpmP1ZlDkiSpDUqMeF2Tmeszc6RavgXYk5nrgD3VsiRJUt9r4lLj9UCnet8BbmgggyRJUnF1F68E7o2IvRGxpVq3KjOPAlSvF9ScQZIkqRVqvccLuCozH42IC4CJiPjeQnesitoWgIsvvriufJIkScXUOuKVmY9Wr8eBLwJXAMciYjVA9Xp8nn23Z+ZIZo4MDw/XGVOSJKmI2opXRLwiIn7p+ffAbwIPAjuBsWqzMeCeujJIkiS1SZ2XGlcBX4yI57/n7zNzPCK+BdwVETcBPwTeUmMGSZKk1qiteGXmQ8Blp1jfBa6t63slSZLayifXS5IkFWLxkiRJKsTiJUmSVIjFS5IkqRCLlyRJUiEWL0mSpEIsXpLUIhFxUUTcFxEHIuK7EXFztf4vIuKRiNhX/VzXdFZJi1f3XI2SpMWZBt6fmd+uZv/YGxET1We3Z+ZfNZhN0hmyeElSi2TmUeBo9f6JiDgArGk2laSl4qVGSWqpiFgLXA7cX616d0Q8EBF3RsT5jQWTdNosXpLUQhHxSuDzwHsz86fAJ4BXA+uZGxH763n22xIRkxExOTU1VSyvpIWxeElSy0TEOcyVrk9n5hcAMvNYZs5k5izwd8AVp9o3M7dn5khmjgwPD5cLLWlBLF6S1CIREcAdwIHM/OgJ61efsNnvAg+WzibpzHlzvSS1y1XA24D9EbGvWvdnwI0RsR5I4DDwh83Ek3QmLF6S1CKZ+U0gTvHRrtJZJC09LzVKkiQVYvGSJEkqxOIlSZJUiMVLkiSpEIuXJElSIRYvSZKkQixekiRJhVi8JEmSCrF4SZIkFWLxkiRJKsTiJUmSVIjFS5IkqRCLlyRJUiEWL0mSpEIsXpIkSYVYvCRJkgqxeEmSJBVi8ZIkSSrE4iVJklSIxUuSJKkQi5ckSVIhtReviBiIiO9ExJer5Usi4v6IOBgRn4uIl9WdQZIkqQ1KjHjdDBw4YfkjwO2ZuQ54DLipQAZJkqTG1Vq8IuJC4D8Dn6yWA9gA3F1t0gFuqDODJElSW9Q94vU3wJ8As9XyEPB4Zk5Xy0eANafaMSK2RMRkRExOTU3VHFOSJKl+tRWviPgd4Hhm7j1x9Sk2zVPtn5nbM3MkM0eGh4dryShJklTSYI1/9lXAmyLiOuDlwKuYGwE7LyIGq1GvC4FHa8wgSZLUGrWNeGXmn2bmhZm5Fvh94P9k5luB+4A3V5uNAffUlUGSJKlNmniO1weAP4qIQ8zd83VHAxkkSZKKq/NS489l5teAr1XvHwKuKPG9kiRJbeKT6yVJkgqxeElSi0TERRFxX0QciIjvRsTN1foVETFRzfoxERHnN51V0uJZvCSpXaaB92fma4ArgXdFxGuBW4A91awfe6plST3G4iVJLZKZRzPz29X7J5ibcm0NcD1zs32As35IPcviJUktFRFrgcuB+4FVmXkU5soZcEFzySSdLouXJLVQRLwS+Dzw3sz86SL2c7o1qcUsXpLUMhFxDnOl69OZ+YVq9bGIWF19vho4fqp9nW5NajeLlyS1SEQEcw+WPpCZHz3ho53MzfYBzvoh9awiD1CVJC3YVcDbgP0Rsa9a92fAh4G7IuIm4IfAWxrKJ+kMWLwkqUUy85tAzPPxtSWzSFp6XmqUJEkqxOIlSZJUiMVLkiSpEIuXJElSIRYvSZKkQixekiRJhVi8JEmSCrF4SZIkFWLxkiRJKsTiJUmSVIjFS5IkqRCLlyRJUiEWL0mSpEIsXpIkSYVYvCRJkgqxeEmSJBVi8ZIkSSrE4qXadLtdtm7dSrfbbTqKJEmtYPFSbTqdDvv372fHjh1NR5EkqRUsXqpFt9tlfHyczGR8fNxRL0mSsHipJp1Oh9nZWQBmZmYc9ZIkCYuXarJ7926mp6cBmJ6eZmJiouFEkiQ17yWLV0S8OyLOLxFG/WPjxo0MDg4CMDg4yKZNmxpOJElS8xYy4vUfgG9FxF0RMRoRUXco9b6xsTGWLZv76zUwMMDmzZsbTiSVFRF7IuK6k9ZtbyqPpHZ4yeKVmX8OrAPuAP4AOBgRfxkRr645m3rY0NAQo6OjRASjo6MMDQ01HUkq7RLgAxFx2wnrRpoKI6kdFnSPV2Ym8KPqZxo4H7g7Iv5HjdnU48bGxnjd617naJfOVo8D1wKrIuJLEfHvmw4kqXmDL7VBRGwFxoAfA58E/jgzn4uIZcBB4E/m2e/lwDeAf1d9z92ZeVtEXAJ8FlgBfBt4W2b+bCn+Y9QuQ0NDbNu2rekYUlMiM6eBd0bEHwDfZO6kVdJZbCEjXiuB38vM38rMf8jM5wAycxb4nRfZ71lgQ2ZeBqwHRiPiSuAjwO2ZuQ54DLjpjP4LJKmd/ufzbzLzU8zdqnFvU2EktcNC7vG6NTN/MM9nB15kv8zMJ6vFc6qfBDYAd1frO8ANi0osST0gM//XSct7M/PtTeWR1A61PscrIgYiYh9wHJgA/gV4vBp+BzgCrJln3y0RMRkRk1NTU3XGlCRJKqLW4pWZM5m5HrgQuAJ4zak2m2ff7Zk5kpkjw8PDdcaUJEkqosiT6zPzceBrwJXAeRHx/E39FwKPlsggSZLUtNqKV0QMR8R51fvlwEbgAHAf8OZqszHgnroySFKviYg7I+J4RDx4wrq/iIhHImJf9XPdi/0ZktqrzhGv1cB9EfEA8C1gIjO/DHwA+KOIOAQMMfdgVknSnE8Bo6dYf3tmrq9+dhXOJGmJvORzvE5XZj4AXH6K9Q8xd7+XJOkkmfmNiFjbdA5J9Shyj5ck6Yy9OyIeqC5FzvsgVn8jXGo3i5cktd8ngFcz9zDqo8Bfz7ehvxEutZvFS5JaLjOPVY/nmQX+Dm/XkHqWxUuSWi4iVp+w+LvAg/NtK6ndaru5XpK0eBHxGeBqYGVEHAFuA66OiPXMPXD6MPCHjQWUdEYsXpLUIpl54ylW+9gdqU94qVGSJKkQi5ckSVIhFi9JkqRCLF6SJEmFWLwkSZIKsXhJkiQVYvGSJEkqxOIlSZJUiMVLkiSpEIuXJElSIRYvSZKkQixekiRJhVi8JEmSCrF4SZIkFWLxkiRJKsTiJUmSVIjFS5IkqRCLlyRJUiEWL0mSpEIsXpIkSYVYvCRJkgqxeEmSJBVi8ZIkSSrE4iVJklSIxUuSJKkQi5ckSVIhFi9JkqRCLF6SJEmFWLwkSZIKsXhJkiQVUlvxioiLIuK+iDgQEd+NiJur9SsiYiIiDlav59eVQZIkqU3qHPGaBt6fma8BrgTeFRGvBW4B9mTmOmBPtSxJAiLizog4HhEPnrDOE1apT9RWvDLzaGZ+u3r/BHAAWANcD3SqzTrADXVlkKQe9Clg9KR1nrBKfaLIPV4RsRa4HLgfWJWZR2GunAEXzLPPloiYjIjJqampEjElqXGZ+Q3gJyet9oRV6hO1F6+IeCXweeC9mfnThe6XmdszcyQzR4aHh+sLKEntt6ATVvCkVWq7WotXRJzDXOn6dGZ+oVp9LCJWV5+vBo7XmUGSziaetErtVudvNQZwB3AgMz96wkc7gbHq/RhwT10ZJKlPeMIq9Yk6R7yuAt4GbIiIfdXPdcCHgU0RcRDYVC1LkubnCavUJwbr+oMz85tAzPPxtXV9ryT1soj4DHA1sDIijgC3MXeCeldE3AT8EHhLcwklnYnaipckafEy88Z5PvKEVeoDThkkSZJUiMVLkiSpEIuXJElSIRYvSZKkQixekiRJhVi8JEmSCrF4SZLUIt1ul61bt9LtdpuOohpYvCRJapFOp8P+/fvZsWNH01FUA4uXJEkt0e12GR8fJzMZHx931KsPWbwkSWqJTqfD7OwsADMzM4569SGLlyRJLbF7926mp6cBmJ6eZmJiouFEWmoWL0mSWmLjxo0MDs5Nozw4OMimTZsaTqSlZvGSJKklxsbGWLZs7p/mgYEBNm/e3HAiLTWLlyRJLTE0NMTo6CgRwejoKENDQ01H0hKzeEmS1CJvetObOPfcc3njG9/YdBTVwOIlSVKL7Ny5k6eeeoovfelLTUdRDSxekiS1hM/x6n8WL0mSWqLT6TAzMwPMPU7C53j1H4uXJEktsXv37p8Xr5mZGZ/j1YcsXpIktcTrX//6Fyy/4Q1vaCiJ6mLxkiSpJSKi6QiqmcVLkqSW+PrXv/6iy+p9Fi9JklrKEbD+Y/GSJKklnnnmmRcsP/300w0lUV0sXpIkSYVYvCRJkgqxeEmSJBUy2HQASZLq9PGPf5xDhw41HeO03XzzzU1HWJBLL72U97znPU3HaD1HvCRJkgpxxEuS1Nd6aRTm6quv/oV1H/vYx8oHUW0c8ZIkqSXe+ta3vmB58+bNDSVRXSxeqk2322Xr1q10u92mo0h9ISIOR8T+iNgXEZNN59HSe8c73vGC5be//e0NJVFdLF6qTafTYf/+/ezYsaPpKFI/uSYz12fmSNNBVI+VK1cCjnb1K4uXatHtdvnKV75CZrJr1y5HvSRpgdasWcNll13maFefqq14RcSdEXE8Ih48Yd2KiJiIiIPV6/l1fb+a1el0eO655wB47rnnHPWSlkYC90bE3ojYcqoNImJLRExGxOTU1FTheJJeSp0jXp8CRk9adwuwJzPXAXuqZfWhe++99wXLX/3qVxtKIvWVqzLz14HfBt4VEb9x8gaZuT0zRzJzZHh4uHxCSS+qtuKVmd8AfnLS6uuBTvW+A9xQ1/erXSKi6QhSz8vMR6vX48AXgSuaTSRpsUrf47UqM48CVK8XFP5+FfLMM8+8YPnpp59uKInUHyLiFRHxS8+/B34TePDF95LUNq19gGp1/8IWgIsvvrjhNJLUuFXAF6vR40Hg7zNzvNlIkhardPE6FhGrM/NoRKwGjs+3YWZuB7YDjIyMZKmAktRGmfkQcFnTOSSdmdLFaycwBny4er2n8Pf3NCd6LcOJXiVJdamteEXEZ4CrgZURcQS4jbnCdVdE3AT8EHhLXd8vSVp6vX4C2Aue///bKyervayJE+3aildm3jjPR9fW9Z39rpdGYa655hoy//8V4ohwolepDxw6dIh9Dx5g5twVTUfpW8t+Nnfs3PvQsYaT9LeBp05+8EIZrb25Xr3tgx/8IB/60Id+vnzrrbc2mEbSUpo5dwVP/+p1TceQzsjy7+1q5HudMki12LBhw8/fRwTXXHNNg2kkSWoHR7xUm4suuoiHH37Y0S6pjzzyyCMMPPWvjY0WSEtl4KkujzwyXfx7LV6qzYoVK1ixYoWjXZIkVSxekqQFW7NmDT96dtB7vNTzln9vF2vWrCr+vd7jJUmSVIjFS5IkqRAvNUqSFmXgqZ94c32Nlj3zUwBmX/6qhpP0t7nneJW/1GjxkiQt2KWXXtp0hL536NATAFz6K+VLwdllVSN/n8/a4uW0F/Vz2otynF9Spfj3rH7PHzOd7aM/nbXFy2kv6ue0F2U0Ne2FJGnxztriBU57of7gvTaS1Dv8rUZJkqRCztoRL6e9UL9oatoLSdLiOeIlSZJUyFk74uW0F+oXTU17IUlaPEe8JEmSCjlrR7zApy/Xzacvl9HU05clSYt31hYvn75cP5++XEozT1+WJC3eWVu8fPpy/Xz6sqQ26LWZSr7//e/z7LPP8s53vpNzzjmn6TgL5gwaC+M9XpIktcjs7Cyzs7McO+asH/3orB3xkiSdHXppFKbb7XLjjTcC8OSTT3LrrbcyNDTUcCotJYtXD+m14fJenSTb4XJJTel0OszOzgIwMzPDjh07eN/73tdwKi0lLzWqNsuXL2f58uVNx5CknrF7926mp+dmopienmZiYqLhRFpqjnj1EEdhJKm/bdy4kV27djE9Pc3g4CCbNm1qOpKWmCNekiS1xNjYGMuWzf3TPDAwwObNmxtOpKVm8ZIkqSWGhoYYHR0lIhgdHfXG+j7kpUZJklpkbGyMw4cPO9rVpxzxkqQeERGjEfHPEXEoIm5pOo/qMTQ0xLZt2xzt6lMWL0nqARExAPwt8NvAa4EbI+K1zaaStFgWL0nqDVcAhzLzocz8GfBZ4PqGM0laJIuXJPWGNcDDJywfqdZJ6iEWL0nqDXGKdfkLG0VsiYjJiJicmpoqEEvSYli8JKk3HAEuOmH5QuDRkzfKzO2ZOZKZI8PDw8XCSVoYi5ck9YZvAesi4pKIeBnw+8DOhjNJWqTI/IWR6taJiCngB03n0GlZCfy46RA6bb+cmQ6btEREXAf8DTAA3JmZ//0ltvfY2bs8dva2eY+dPVG81LsiYjIzR5rOIUm9xGNn//JSoyRJUiEWL0mSpEIsXqrb9qYDSFIP8tjZp7zHS5IkqRBHvCRJkgqxeEmSJBVi8ZIkSSrE4iVJklSIxUuSJKmQ/wcvAo8vg+faGAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Run this code\n", "fig, ax = plt.subplots(1,2, figsize=(10, 5))\n", "sns.boxplot(y=diamonds.y, ax=ax[0])\n", "sns.boxplot(y=diamonds.z, ax=ax[1])\n", @@ -443,11 +1900,73 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#your code here" + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcutcolorclaritydepthtablepricexyz
484100.51Very Good5461.854.719705.125.1531.8
\n", + "
" + ], + "text/plain": [ + " carat cut color clarity depth table price x y z\n", + "48410 0.51 Very Good 5 4 61.8 54.7 1970 5.12 5.15 31.8" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# filtering for z > 10\n", + "diamonds.loc[diamonds['z'] > 10]" ] }, { @@ -461,11 +1980,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "# re-calculate z for the outlier\n", + "diamonds.loc[diamonds['z'] > 10, 'z'] = diamonds.loc[diamonds['z'] > 10].apply(lambda row: calculate_z(row.x, row.y, row.depth), axis= 1)" ] }, { @@ -477,11 +1997,63 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "diamonds.loc[48410]" + "execution_count": 23, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcutcolorclaritydepthtablepricexyz
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [carat, cut, color, clarity, depth, table, price, x, y, z]\n", + "Index: []" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diamonds.loc[diamonds['z'] > 10]\n", + "# aaand it's gone." ] }, { @@ -495,11 +2067,129 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#your code here" + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcutcolorclaritydepthtablepricexyz
240672.00Premium2158.957.0122108.0958.908.06
259984.01Premium1061.061.01522310.1410.106.17
274155.01Fair0065.559.01801810.7410.546.98
276304.50Fair0065.858.01853110.2310.166.72
491890.51Ideal5461.855.020755.1531.805.12
\n", + "
" + ], + "text/plain": [ + " carat cut color clarity depth table price x y z\n", + "24067 2.00 Premium 2 1 58.9 57.0 12210 8.09 58.90 8.06\n", + "25998 4.01 Premium 1 0 61.0 61.0 15223 10.14 10.10 6.17\n", + "27415 5.01 Fair 0 0 65.5 59.0 18018 10.74 10.54 6.98\n", + "27630 4.50 Fair 0 0 65.8 58.0 18531 10.23 10.16 6.72\n", + "49189 0.51 Ideal 5 4 61.8 55.0 2075 5.15 31.80 5.12" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# same for y > 10\n", + "diamonds.loc[diamonds['y'] > 10]" ] }, { @@ -513,11 +2203,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "def calculate_y(x, z, d):\n", + " \"\"\"\n", + " Calculates diamonds width based on length and depth.\n", + " Input: diamonds length, depth and percentage depth.\n", + " Output: diamonds width.\n", + " \"\"\"\n", + " d = d/100\n", + " return ((2*z) - (d*x))/d\n", + "\n", + "# recalculating y for the outlier rows\n", + "diamonds.loc[(diamonds['y'] > 10), 'y'] = diamonds.loc[diamonds['y'] > 10].apply(lambda row: calculate_y(row.x, row.z, row.depth), axis= 1)" ] }, { @@ -531,11 +2231,135 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#your thoughts here" + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
caratcutcolorclaritydepthtablepricexyz
240672.00Premium2158.957.0122108.0919.2784218.06
259984.01Premium1061.061.01522310.1410.0895086.17
274155.01Fair0065.559.01801810.7410.5729776.98
276304.50Fair0065.858.01853110.2310.1955326.72
491890.51Ideal5461.855.020755.1511.4195795.12
\n", + "
" + ], + "text/plain": [ + " carat cut color clarity depth table price x y \\\n", + "24067 2.00 Premium 2 1 58.9 57.0 12210 8.09 19.278421 \n", + "25998 4.01 Premium 1 0 61.0 61.0 15223 10.14 10.089508 \n", + "27415 5.01 Fair 0 0 65.5 59.0 18018 10.74 10.572977 \n", + "27630 4.50 Fair 0 0 65.8 58.0 18531 10.23 10.195532 \n", + "49189 0.51 Ideal 5 4 61.8 55.0 2075 5.15 11.419579 \n", + "\n", + " z \n", + "24067 8.06 \n", + "25998 6.17 \n", + "27415 6.98 \n", + "27630 6.72 \n", + "49189 5.12 " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diamonds.loc[diamonds['y'] > 10]" ] }, { @@ -547,9 +2371,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAl4AAAEkCAYAAAD+cD+8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAZxUlEQVR4nO3df5Bd5X3f8c9He7GRiInhshZEoK7JJtAGB2yuMTFjSECbbBxsMv0xAzNE64TpTt10d63atY3TMZM/4nFdnCIpM2l3DOFqwshjY9ykrrxlReJidwjuCuRIRErZEglpBWi5qg2N+KG7++0fe2GkrbT6ted57rn7fs3s7H3OPfbzGUZz9NFzz32OI0IAAAAo3rLcAQAAAJYKihcAAEAiFC8AAIBEKF4AAACJULwAAAASoXgBAAAkUljxsn2Z7b+0vcv2M7ZHWscvtD1u+9nW7wuKygAAANBOXNQ+XrYvkXRJRDxl+12Stkn6TUmfkHQoIr5s+/OSLoiIzxUSAgAAoI0UtuIVES9ExFOt169K2iVplaTbJNVbp9U1V8YAAAA6XmErXsdMYvdIelzSVZKej4h3H/Xe/4mI/+/jRtuDkgYl6bzzzrv2yiuvLDwngGNt27bt5Yjozp0DZ+aiiy6Knp6e3DGAJWeha2el6Mlt/5Skb0n6VES8YvuU/ncRMSppVJJqtVpMTEwUFxLAcdnemzsDzlxPT4+4dgLpLXTtLPRbjbbP0VzpeigiHmkdfql1/9db94EdLDIDAABAuyjyW42WdL+kXRHxh0e99eeSBlqvByT9WVEZAAAA2kmRHzXeIOm3JO2wvb117AuSvizpG7bvkvS8pH9WYAYAAIC2UVjxiogfSDrRDV23FDUvAABAu2LnegAAgEQoXgAAAIlQvFCoRqOh4eFhNRqN3FEAoBS4bnY2ihcKVa/XtWPHDm3atCl3FAAoBa6bnY3ihcI0Gg2NjY0pIjQ2Nsa/3gDgJLhudj6KFwpTr9c1OzsrSZqZmeFfb8BZsr3O9jO2d9rebPvc3JmwuLhudj6KFwqzdetWNZtNSVKz2dT4+HjmREB52V4laVhSLSKuktQl6fa8qbDYuG52PooXCrNmzRpVKnNbxVUqFfX19WVOBJReRdJy2xVJKyQdyJwHi4zrZuejeKEwAwMDWrZs7o9YV1eX1q5dmzkRUF4RMSXpXs098eMFST+JiEfnn2d70PaE7Ynp6enUMXGWuG52PooXClOtVtXf3y/b6u/vV7VazR0JKC3bF0i6TdJ7Jf2MpPNs3zn/vIgYjYhaRNS6u7tTx8RZ4rrZ+SheKNTAwIDe97738a824OytkfR3ETEdEUckPSLpw5kzoQBcNztbkQ/JBlStVrVhw4bcMYBO8Lyk622vkPSa5p55O5E3EorAdbOzseIFACUQEU9KeljSU5J2aO76PZo1FIDTxooXAJRERNwj6Z7cOQCcOVa8AAAAEqF4AQAAJELxAgAASITiBQAAkAjFCwAAIBGKFwAAQCIULwAAgEQoXgAAAIlQvAAAABIprHjZfsD2Qds7jzp2je2/sr3d9oTt64qaHwAAoN0UueL1oKT+ece+Iun3I+IaSV9sjQEAAJaEwopXRDwu6dD8w5LOb73+aUkHipofAACg3aR+SPanJP032/dqrvR9+EQn2h6UNChJq1evTpMOAACgQKlvrv+kpHURcZmkdZLuP9GJETEaEbWIqHV3dycLCAAAUJTUxWtA0iOt19+UxM31AABgyUhdvA5Iuqn1+mZJzyaeHwAAIJvC7vGyvVnSL0u6yPZ+SfdI+ueS1tuuSHpdrXu4AAAAloLCildE3HGCt64tak4AAIB2xs71AAAAiVC8AAAAEqF4AQAAJELxAgAASITiBQAAkAjFCwAAIBGKFwAAQCIULwAAgEQoXgBQAravsL39qJ9XbH8qdy4svkajoeHhYTUajdxRUACKFwCUQET8bURcExHXaO4JIIclfTtzLBSgXq9rx44d2rRpU+4oKADFCwDK5xZJ/zsi9uYOgsXVaDQ0NjamiNDY2BirXh2I4gUA5XO7pM3He8P2oO0J2xPT09OJY+Fs1et1zc7OSpJmZmZY9epAFC8AKBHb75D0cUnfPN77ETEaEbWIqHV3d6cNh7O2detWNZtNSVKz2dT4+HjmRFhsFC8AKJdfl/RURLyUOwgW35o1a1SpVCRJlUpFfX19mRNhsVG8AKBc7tAJPmZE+Q0MDGjZsrm/mru6urR27drMibDYKF4AUBK2V0jqk/RI7iwoRrVaVX9/v2yrv79f1Wo1dyQsskruAACAUxMRhyXxN3GHGxgY0J49e1jt6lCseAEAACRC8QIAoI2wgWpno3gBANAm2EC181G8AABoE2yg2vkoXgAAtAk2UO18FC8AANoEG6h2vsKKl+0HbB+0vXPe8SHbf2v7GdtfKWp+AADKhg1UO1+RK14PSuo/+oDtX5F0m6RfjIhfkHRvgfMDAFAqbKDa+QrbQDUiHrfdM+/wJyV9OSLeaJ1zsKj5AQAoIzZQ7Wyp7/H6eUkfsf2k7f9u+4MnOtH2oO0J2xPT09MJIwIAkE+1WtWGDRtY7epQqYtXRdIFkq6X9G8kfcO2j3diRIxGRC0iat3d3SkzAgCQTaPR0PDwMHt4dajUxWu/pEdizg8lzUq6KHEGAADaFjvXd7bUxes/S7pZkmz/vKR3SHo5cQYAANrS0TvXf/e732XVqwMVuZ3EZklPSLrC9n7bd0l6QNLlrS0mvi5pICKiqAwAAJRJvV7XkSNHJElHjhxh1asDFfmtxjtO8NadRc0JAECZjY+P6631iIjQo48+qnXr1mVOhcXEzvUAALSJlStXLjhG+VG8AABoEy+99NKCY5QfxQsAgDbxkY985JjxjTfemCkJikLxAgCgTbz55pvHjN94441MSVAUihcAAG3i+9///oJjlB/FCwCANjE7O7vgGOVH8QIAoE3Mf4reCZ6qhxKjeAEA0CY+9KEPLThG+VG8AABoE3v27DlmvHfv3jxBUBiKFwAAbeKFF144ZnzgwIFMSVAUihcAAG2Ce7w6H8ULAErC9rttP2x7t+1dtn8pdyYsrptuumnBMcqP4gUA5bFe0lhEXCnpakm7MufBIrv11luPGX/sYx/LlARFoXgBQAnYPl/SjZLul6SIeDMifpw3FRbb+vXrjxnfd999mZKgKBQvACiHyyVNS/oT20/b/prt83KHwuLat2/fgmOUH8ULAMqhIukDkv44It4v6e8lfX7+SbYHbU/Ynpienk6dEcBJULwAoBz2S9ofEU+2xg9rrogdIyJGI6IWEbXu7u6kAQGcHMULAEogIl6UtM/2Fa1Dt0j6m4yRUAC2k+h8ldwBAACnbEjSQ7bfIek5Sb+dOQ8WWUQsOEb5UbwAoCQiYrukWu4cAM4cHzUCAAAkQvECAKBNnHvuuQuOUX6FFS/bD9g+aHvncd77jO2wfVFR8wMAUDavv/76gmOUX5H3eD0o6Y8kbTr6oO3LJPVJer7AuQEAkCRt3LhRk5OTuWOcsZGRkdwRTklvb6+GhoZyx2h7ha14RcTjkg4d563/IOmzkviqBgAAWFKSfqvR9sclTUXEj062N4ntQUmDkrR69eoE6QAAnahMqzCf/vSntW3btrfH1157rb761a9mTITFluzmetsrJP2epC+eyvnsvgwAWGq+8IUvLDhG+aX8VuPPSnqvpB/Z3iPpUklP2b44YQYAANpWtVrVihUrJM2tdlWr1cyJsNiSfdQYETskveetcat81SLi5VQZAABodz09Pdq7dy+rXR2qyO0kNkt6QtIVtvfbvquouQAA6BTnnHOOent7We3qUIWteEXEHSd5v6eouQEAANoRO9cDAAAkQvECAABIhOIFAACQCMULAAAgEYoXAABAIhQvAACARCheAAAAiVC8AAAAEqF4AQAAJELxAgAASITiBQAAkAjFCwAAIBGKFwAAQCIULwAAgEQoXgAAAIlQvAAAABKp5A4AADg1tvdIelXSjKRmRNTyJgJwuiheAFAuvxIRL+cOAeDM8FEjAABAIhQvACiPkPSo7W22B493gu1B2xO2J6anpxPHA3AyFC8AKI8bIuIDkn5d0u/avnH+CRExGhG1iKh1d3enTwhgQRQvACiJiDjQ+n1Q0rclXZc3EYDTRfECgBKwfZ7td731WtKvStqZNxWA01VY8bL9gO2Dtncedezf295t+69tf9v2u4uaHwA6zEpJP7D9I0k/lPRfI2IscyYAp6nIFa8HJfXPOzYu6aqI+EVJ/0vS3QXODwAdIyKei4irWz+/EBF/kDsTgNNXWPGKiMclHZp37NGIaLaGfyXp0qLmBwAAaDc57/H6HUnfPdGbfCUaAAB0mizFy/bvSWpKeuhE5/CVaAAA0GmSPzLI9oCkWyXdEhGRen4AAIBckhYv2/2SPifppog4nHJuAACA3IrcTmKzpCckXWF7v+27JP2RpHdJGre93fZ/LGp+AACAdlPktxrviIhLIuKciLg0Iu6PiN6IuCwirmn9/Iui5kd7aDQaGh4eVqPRyB0FAIDs2LkeharX69qxY4c2bdqUOwoAANmdtHjZ/le2L0gRBp2l0WhobGxMEaGxsTFWvQAAS96prHhdLOl/2v6G7X7bLjoUOkO9Xtfs7KwkaWZmhlUvoMX2Y7Y/Ou/YaK48ANI5afGKiH8r6eck3S/pE5Ketf0l2z9bcDaU3NatW9Vszj2ooNlsanx8PHMioG28V9LnbN9z1LFarjAA0jmle7xa+2292PppSrpA0sO2v1JgNpTcmjVrVKnM7VhSqVTU19eXORHQNn4s6RZJK23/F9s/nTsQgDRO5R6vYdvbJH1F0v+Q9L6I+KSkayX9k4LzocQGBga0bNncH7Guri6tXbs2cyKgbTgimhHxLyV9S9IPJL0ncyYACZzKitdFkv5xRPxaRHwzIo5IUkTMam4HeuC4qtWq+vv7ZVv9/f2qVqu5IwHt4u09DCPiQc3dxvForjAA0jnpzvUR8cUF3tu1uHHQaQYGBrRnzx5Wu4CjRMR/mjfeJul3MsUBkFDyZzViaalWq9qwYUPuGAAAtAU2UAUAAEiE4gUAAJAIxQsAACARihcAAEAiFC8UqtFoaHh4mOc0AgAgihcKVq/XtWPHDp7TCACAKF4oUKPR0NjYmCJCY2NjrHoBAJY8ihcKU6/XNTs7K0mamZlh1QsAsORRvFCYrVu3qtlsSpKazabGx8czJwIAIC+KFwqzZs0aVSpzD0eoVCrq6+vLnAgAgLwoXijMwMCAbEuSli1bxvMagUVgu8v207a/kzsLgNPHsxpRmGq1qosvvlj79u3TypUrVa1Wc0cCOsGIpF2Szs8x+caNGzU5OZlj6iXjrf++IyMjmZN0vt7eXg0NDSWdk+KFwjQaDU1NTUmSpqam1Gg0KF/AWbB9qaTfkPQHkv51jgyTk5PavnOXZlZcmGP6JWHZmyFJ2vbcS5mTdLauw4eyzEvxQmFGR0ff/lbj7OysRkdHdffdd2dOBZTafZI+K+ldJzrB9qCkQUlavXp1ISFmVlyo1678aCH/30Aqy3dvyTJvYfd42X7A9kHbO486dqHtcdvPtn5fUNT8yO+xxx5bcAzg1Nm+VdLBiNi20HkRMRoRtYiodXd3J0oH4FQVeXP9g5L65x37vKTHIuLnJD3WGqNDRcSCYwCn5QZJH7e9R9LXJd1s+0/zRgJwugorXhHxuKT5H6DeJqneel2X9JtFzY/8PvjBDx4zvu666zIlAcovIu6OiEsjokfS7ZL+IiLuzBwLwGlKvZ3Eyoh4QZJav99zohNtD9qesD0xPT2dLCAWz/PPP3/MeO/evZmSAADQHtp2Hy/uUyi/AwcOLDgGcGYi4nsRcWvuHABOX+ri9ZLtSySp9ftg4vkBAACySV28/lzSQOv1gKQ/Szw/Eurq6lpwDADAUlPkdhKbJT0h6Qrb+23fJenLkvpsPyuprzVGh5p/Mz031wMAlrrCNlCNiDtO8NYtRc2J9rJv375jxvv378+UBACA9tC2N9ej/OYXrflFDACApYbihcJwjxcAAMeieKEwMzMzC44BAFhqKF4AAACJULwAAAASKexbjVh8Gzdu1OTkZO4YZ2VkZCR3hJPq7e3V0NBQ7hgAgA5E8UJhbCsijhkDKLepqSl1Hf6Jlu/ekjsKcFa6Djc0NdVMPi/Fq0TKtgozMTGhz3zmM2+P7733Xl177bUZEwEAkBfFC4Wp1Wpvr3q9853vpHQBHWDVqlV68Y2KXrvyo7mjAGdl+e4tWrVqZfJ5ubkeherp6ZEkfelLX8obBACANkDxQqHOP/98XX311ax2AQAgihcAAEAyFC8AAIBEKF4AAACJULwAAAASoXgBAAAkQvECAABIhOIFAACQCMULAAAgEYoXAABAIhQvAACARCheAAAAiWQpXrbX2X7G9k7bm22fmyMHAJSF7XNt/9D2j1rXz9/PnQnA6UtevGyvkjQsqRYRV0nqknR76hwAUDJvSLo5Iq6WdI2kftvXZ84E4DRVMs673PYRSSskHciUAwBKISJC0v9tDc9p/US+RADORPIVr4iYknSvpOclvSDpJxHxaOocAFA2trtsb5d0UNJ4RDyZOxOA05Pjo8YLJN0m6b2SfkbSebbvPM55g7YnbE9MT0+njgkAbSciZiLiGkmXSrrO9lXzz+HaCbS3HDfXr5H0dxExHRFHJD0i6cPzT4qI0YioRUStu7s7eUgAaFcR8WNJ35PUf5z3uHYCbSzHPV7PS7re9gpJr0m6RdJEhhwAUBq2uyUdiYgf216uuX/E/rscWboOH9Ly3VtyTL0kLHv9FUnS7LnnZ07S2boOH5K0Mvm8yYtXRDxp+2FJT0lqSnpa0mjqHABQMpdIqtvu0tynFd+IiO+kDtHb25t6yiVncvJVSVLv5elLwdKyMsuf5yzfaoyIeyTdk2NuACijiPhrSe/PnWNoaCh3hI43MjIiSVq/fn3mJCgCO9cDAAAkQvECAABIhOIFAACQCMULAAAgEYoXAABAIhQvAACARCheAAAAiVC8AAAAEqF4AQAAJELxAgAASITiBQAAkAjFCwAAIBGKFwAAQCIULwAAgEQoXgAAAIlQvAAAABKheAEAACRSyR0gl40bN2pycjJ3jI731n/jkZGRzEk6W29vr4aGhnLHAACcxJItXpOTk9q+c5dmVlyYO0pHW/ZmSJK2PfdS5iSdq+vwodwRAACnaMkWL0maWXGhXrvyo7ljAGdl+e4tuSMAAE4R93gBAAAkQvECAABIhOIFAACQSJbiZfvdth+2vdv2Ltu/lCMHAABASrlurl8vaSwi/qntd0hakSkHAABAMsmLl+3zJd0o6ROSFBFvSnozdQ4AAIDUcnzUeLmkaUl/Yvtp21+zfd78k2wP2p6wPTE9PZ0+JQAAwCLLUbwqkj4g6Y8j4v2S/l7S5+efFBGjEVGLiFp3d3fqjADQVmxfZvsvW/fFPmObx0EAJZSjeO2XtD8inmyNH9ZcEQMAnFhT0qcj4h9Kul7S79r+R5kzAThNyYtXRLwoaZ/tK1qHbpH0N6lzAECZRMQLEfFU6/WrknZJWpU3FYDTletbjUOSHmp9o/E5Sb+dOsDU1JS6Dv+Ex62g9LoONzQ11cwdAwnZ7pH0fklPHue9QUmDkrR69eqkuQCcXJbiFRHbJdVyzA0AZWb7pyR9S9KnIuKV+e9HxKikUUmq1WqROB6Ak1iyD8letWqVXnyjwkOyUXrLd2/RqlUrc8dAArbP0VzpeigiHsmdB8Dp45FBAFACti3pfkm7IuIPc+cBcGYoXgBQDjdI+i1JN9ve3vphyR4omSX7USMAlElE/ECSc+cAcHZY8QIAAEiE4gUAAJAIxQsAACARihcAAEAiFC8AAIBEKF4AAACJULwAAAASoXgBAAAkQvECAABIZEnvXN91+JCW796SO0ZHW/b6K5Kk2XPPz5ykc3UdPiSJh2QDQBks2eLV29ubO8KSMDn5qiSp93KKQXFW8ucZAEpiyRavoaGh3BGWhJGREUnS+vXrMycBACA/7vECAABIhOIFAACQyJL9qBEAsDRs3LhRk5OTuWOcsreyvnWrRln09vZyG88poHgBANBGli9fnjsCCkTxAgB0NFZh0E64xwsAACARihcAAEAi2YqX7S7bT9v+Tq4MAAAAKeVc8RqRtCvj/AAAAEllKV62L5X0G5K+lmN+AACAHHKteN0n6bOSZk90gu1B2xO2J6anp9MlAwAAKEjy4mX7VkkHI2LbQudFxGhE1CKi1t3dnSgdAAB5NRoNDQ8Pq9Fo5I6CAuRY8bpB0sdt75H0dUk32/7TDDkAAGg79XpdO3bs0KZNm3JHQQGSF6+IuDsiLo2IHkm3S/qLiLgzdQ4AANpNo9HQ2NiYIkJjY2OsenUgdq4vkbI9b0wq5zPHeN4Y2pHtByS9davGVbnzoBj1el2zs3O3P8/MzGjTpk1at25d5lRYTFk3UI2I70XErTkzoFjLly/nuWPA4nhQUn/uECjW1q1b1Ww2JUnNZlPj4+OZE2GxseJVIqzCAEtXRDxuuyd3DhRrzZo12rJli5rNpiqVivr6+nJHwiLjkUEA0EHYiqfcBgYGtGzZ3F/NXV1dWrt2beZEWGwULwDoIGzFU27ValX9/f2yrf7+flWr1dyRsMj4qBEAgDYyMDCgPXv2sNrVoSheAAC0kWq1qg0bNuSOgYLwUSMAlIDtzZKekHSF7f2278qdCcDpY8ULAEogIu7InQHA2WPFCwAAIBGKFwAAQCIULwAAgEQoXgAAAIk4InJnOCnb05L25s6BM3aRpJdzh8AZ+QcRwS6cJcW1s9S4bpbbCa+dpSheKDfbExFRy50DAMqC62bn4qNGAACARCheAAAAiVC8kMJo7gAAUDJcNzsU93gBAAAkwooXAABAIhQvAACARCheAAAAiVC8AAAAEqF4AQAAJPL/AKBjWXmewTQ9AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], "source": [ "#Run this code\n", "fig, ax = plt.subplots(1,2, figsize=(10, 5))\n", @@ -567,11 +2404,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ - "#your thoughts here" + "# These values seem to fall more within reasonable range of the overall data distribution." ] }, { @@ -583,11 +2420,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ - "#your code here" + "diamonds.to_csv('diamonds_clean.csv')" ] } ], @@ -607,7 +2444,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.7.4" } }, "nbformat": 4,