diff --git a/05_lab_imbalance.ipynb b/05_lab_imbalance.ipynb new file mode 100644 index 0000000..e407f06 --- /dev/null +++ b/05_lab_imbalance.ipynb @@ -0,0 +1,1482 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "wm5qqtoK7_Uj" + }, + "source": [ + "# Inbalanced Classes\n", + "## In this lab, we are going to explore a case of imbalanced classes.\n", + "\n", + "\n", + "Like we disussed in class, when we have noisy data, if we are not careful, we can end up fitting our model to the noise in the data and not the 'signal'-- the factors that actually determine the outcome. This is called overfitting, and results in good results in training, and in bad results when the model is applied to real data. Similarly, we could have a model that is too simplistic to accurately model the signal. This produces a model that doesnt work well (ever).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LD51-ZWi7_Uo" + }, + "source": [ + "### Note: before doing the first commit, make sure you don't include the large csv file, either by adding it to .gitignore, or by deleting it." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cHCNDp7X7_Up" + }, + "source": [ + "### First, download the data from: https://www.kaggle.com/datasets/ealaxi/paysim1. Import the dataset and provide some discriptive statistics and plots. What do you think will be the important features in determining the outcome?\n", + "### Note: don't use the entire dataset, use a sample instead, with n=100000 elements, so your computer doesn't freeze." + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" + ], + "metadata": { + "id": "KtupXbbW8o-T" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "THpAQQcP7_Uq", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "outputId": "74416ed7-5ef3-4d8d-f094-7a3e001c3f61" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " step type amount nameOrig oldbalanceOrg newbalanceOrig \\\n", + "251624 14 PAYMENT 3229.29 C294223571 0.0 0.0 \n", + "385062 17 PAYMENT 19036.88 C2107114068 5639.0 0.0 \n", + "391504 17 PAYMENT 11626.88 C177162832 0.0 0.0 \n", + "119204 11 PAYMENT 15820.53 C41067616 0.0 0.0 \n", + "129295 11 CASH_OUT 141208.82 C1190920182 62486.0 0.0 \n", + "\n", + " nameDest oldbalanceDest newbalanceDest isFraud isFlaggedFraud \n", + "251624 M2113985372 0.00 0.00 0.0 0.0 \n", + "385062 M384092134 0.00 0.00 0.0 0.0 \n", + "391504 M476808705 0.00 0.00 0.0 0.0 \n", + "119204 M1928968864 0.00 0.00 0.0 0.0 \n", + "129295 C1414963218 764252.95 1308463.31 0.0 0.0 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steptypeamountnameOrigoldbalanceOrgnewbalanceOrignameDestoldbalanceDestnewbalanceDestisFraudisFlaggedFraud
25162414PAYMENT3229.29C2942235710.00.0M21139853720.000.000.00.0
38506217PAYMENT19036.88C21071140685639.00.0M3840921340.000.000.00.0
39150417PAYMENT11626.88C1771628320.00.0M4768087050.000.000.00.0
11920411PAYMENT15820.53C410676160.00.0M19289688640.000.000.00.0
12929511CASH_OUT141208.82C119092018262486.00.0C1414963218764252.951308463.310.00.0
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 2 + } + ], + "source": [ + "# Just a first look\n", + "df = pd.read_csv(\"/PS_20174392719_1491204439457_log.csv\")\n", + "sample = df.sample(100000)\n", + "sample.head()" + ] + }, + { + "cell_type": "code", + "source": [ + "# Taking a look at the shape of the dataset and checking for categorical/numerical\n", + "print(f\"The actual shape of my data is {sample.shape[0]} rows and {sample.shape[1]} columns\")" + ], + "metadata": { + "id": "wyMYRepSAaYt", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "43da3cf8-365a-4c44-db9d-c2ffbbfacae9" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The actual shape of my data is 100000 rows and 11 columns\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Just out of curiosity, these last columns might contain my target, just by checking the unique\n", + "print(sample[\"isFraud\"].unique()) # this will be my target, most likely\n", + "print(sample[\"isFlaggedFraud\"].unique())" + ], + "metadata": { + "id": "9KCm2s3LAwJ-", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "bef5045f-81e3-4afb-f94d-8ab0911a1141" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[0. 1.]\n", + "[0.]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# If it's what I am thinking, 2 out of 1000 are fraud? Maybe. Let's sit back and enjoy the show\n", + "print(sample[\"isFraud\"].value_counts())\n", + "sample[\"isFraud\"].value_counts().plot(kind = \"bar\")" + ], + "metadata": { + "id": "M1kNevqiQtl9", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 507 + }, + "outputId": "ff0bcbe5-79bb-4502-c0b1-e294823f2896" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0.0 99947\n", + "1.0 53\n", + "Name: isFraud, dtype: int64\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 5 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAGlCAYAAAAWDpmnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAkFklEQVR4nO3deXDU9f3H8VcSzAGyGw7JkiFCWhFIoRxBw6LSWjIsJTimYgVJETWCR2KBqByKESmKBhGIHKlXQ6ekIp1CkUggDQNYiQGCyGWQjjBEmQ0wkF1JJRzZ3x+dfH8sIIfdsCSf52NmZ5rv553v9/Nlinmy7C4hPp/PJwAAAAOFBnsDAAAAwUIIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADBWs2Bv4HpWV1enQ4cOqWXLlgoJCQn2dgAAwBXw+Xz67rvvFBsbq9DQSz/nQwhdwqFDhxQXFxfsbQAAgB+hsrJSHTp0uOQMIXQJLVu2lPTfX0ibzRbk3QAAgCvh9XoVFxdn/Ry/FELoEur/OsxmsxFCAAA0MlfyshZeLA0AAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjXXUIbdy4Uffcc49iY2MVEhKiFStW+K37fD5lZ2erffv2ioqKUnJysvbt2+c3c+zYMaWlpclmsyk6Olrp6ek6ceKE38yOHTt01113KTIyUnFxccrJyblgL8uWLVPXrl0VGRmpHj166OOPP77qvQAAAHNddQjV1NSoZ8+eWrBgwUXXc3JylJubq7y8PJWVlalFixZyuVw6efKkNZOWlqbdu3eruLhYq1at0saNGzV27Fhr3ev1atCgQerYsaPKy8s1a9YsTZs2TW+//bY1s2nTJj344INKT0/X559/rtTUVKWmpmrXrl1XtRcAAGAw3/9Akm/58uXW13V1dT6Hw+GbNWuWday6utoXERHh++tf/+rz+Xy+PXv2+CT5tmzZYs2sXr3aFxIS4vv22299Pp/Pt3DhQl+rVq18tbW11sykSZN8Xbp0sb5+4IEHfCkpKX77SUpK8j3++ONXvJfL8Xg8Pkk+j8dzRfMAACD4rubnd0BfI7R//3653W4lJydbx+x2u5KSklRaWipJKi0tVXR0tPr27WvNJCcnKzQ0VGVlZdbMgAEDFB4ebs24XC7t3btXx48ft2bOvU79TP11rmQv56utrZXX6/V7AACApqtZIE/mdrslSTExMX7HY2JirDW326127dr5b6JZM7Vu3dpvJj4+/oJz1K+1atVKbrf7ste53F7ON3PmTL388stXdrNNXKfJhcHeAq6hA6+lBHsLABAUvGvsHFOmTJHH47EelZWVwd4SAABoQAENIYfDIUmqqqryO15VVWWtORwOHT582G/9zJkzOnbsmN/Mxc5x7jV+aObc9cvt5XwRERGy2Wx+DwAA0HQFNITi4+PlcDhUUlJiHfN6vSorK5PT6ZQkOZ1OVVdXq7y83JpZt26d6urqlJSUZM1s3LhRp0+ftmaKi4vVpUsXtWrVypo59zr1M/XXuZK9AAAAs111CJ04cULbt2/X9u3bJf33Rcnbt2/XwYMHFRISovHjx2vGjBlauXKldu7cqYceekixsbFKTU2VJHXr1k2DBw/WmDFjtHnzZn366afKzMzUiBEjFBsbK0kaOXKkwsPDlZ6ert27d2vp0qWaN2+esrKyrH2MGzdORUVFmj17tioqKjRt2jRt3bpVmZmZknRFewEAAGa76hdLb926VXfffbf1dX2cjB49Wvn5+Zo4caJqamo0duxYVVdX684771RRUZEiIyOt71myZIkyMzM1cOBAhYaGatiwYcrNzbXW7Xa71q5dq4yMDCUmJqpt27bKzs72+6yh/v37q6CgQFOnTtXzzz+vzp07a8WKFerevbs1cyV7AQAA5grx+Xy+YG/ieuX1emW32+XxeIx7vRDvGjML7xoD0JRczc9v3jUGAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWAEPobNnz+rFF19UfHy8oqKi9NOf/lR/+MMf5PP5rBmfz6fs7Gy1b99eUVFRSk5O1r59+/zOc+zYMaWlpclmsyk6Olrp6ek6ceKE38yOHTt01113KTIyUnFxccrJyblgP8uWLVPXrl0VGRmpHj166OOPPw70LQMAgEYq4CH0+uuva9GiRZo/f76+/PJLvf7668rJydFbb71lzeTk5Cg3N1d5eXkqKytTixYt5HK5dPLkSWsmLS1Nu3fvVnFxsVatWqWNGzdq7Nix1rrX69WgQYPUsWNHlZeXa9asWZo2bZrefvtta2bTpk168MEHlZ6ers8//1ypqalKTU3Vrl27An3bAACgEQrxnftUTQAMHTpUMTExeu+996xjw4YNU1RUlP7yl7/I5/MpNjZWzzzzjJ599llJksfjUUxMjPLz8zVixAh9+eWXSkhI0JYtW9S3b19JUlFRkYYMGaJvvvlGsbGxWrRokV544QW53W6Fh4dLkiZPnqwVK1aooqJCkjR8+HDV1NRo1apV1l769eunXr16KS8v77L34vV6Zbfb5fF4ZLPZAvZr1Bh0mlwY7C3gGjrwWkqwtwAAAXM1P78D/oxQ//79VVJSoq+++kqS9MUXX+hf//qXfv3rX0uS9u/fL7fbreTkZOt77Ha7kpKSVFpaKkkqLS1VdHS0FUGSlJycrNDQUJWVlVkzAwYMsCJIklwul/bu3avjx49bM+dep36m/joAAMBszQJ9wsmTJ8vr9apr164KCwvT2bNn9corrygtLU2S5Ha7JUkxMTF+3xcTE2Otud1utWvXzn+jzZqpdevWfjPx8fEXnKN+rVWrVnK73Ze8zvlqa2tVW1trfe31eq/q3gEAQOMS8GeEPvzwQy1ZskQFBQXatm2bFi9erDfeeEOLFy8O9KUCbubMmbLb7dYjLi4u2FsCAAANKOAh9Nxzz2ny5MkaMWKEevTooVGjRmnChAmaOXOmJMnhcEiSqqqq/L6vqqrKWnM4HDp8+LDf+pkzZ3Ts2DG/mYud49xr/NBM/fr5pkyZIo/HYz0qKyuv+v4BAEDjEfAQ+s9//qPQUP/ThoWFqa6uTpIUHx8vh8OhkpISa93r9aqsrExOp1OS5HQ6VV1drfLycmtm3bp1qqurU1JSkjWzceNGnT592popLi5Wly5d1KpVK2vm3OvUz9Rf53wRERGy2Wx+DwAA0HQFPITuuecevfLKKyosLNSBAwe0fPlyvfnmm/rNb34jSQoJCdH48eM1Y8YMrVy5Ujt37tRDDz2k2NhYpaamSpK6deumwYMHa8yYMdq8ebM+/fRTZWZmasSIEYqNjZUkjRw5UuHh4UpPT9fu3bu1dOlSzZs3T1lZWdZexo0bp6KiIs2ePVsVFRWaNm2atm7dqszMzEDfNgAAaIQC/mLpt956Sy+++KKeeuopHT58WLGxsXr88ceVnZ1tzUycOFE1NTUaO3asqqurdeedd6qoqEiRkZHWzJIlS5SZmamBAwcqNDRUw4YNU25urrVut9u1du1aZWRkKDExUW3btlV2drbfZw31799fBQUFmjp1qp5//nl17txZK1asUPfu3QN92wAAoBEK+OcINSV8jhBMwecIAWhKgvo5QgAAAI0FIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYDRJC3377rX73u9+pTZs2ioqKUo8ePbR161Zr3efzKTs7W+3bt1dUVJSSk5O1b98+v3McO3ZMaWlpstlsio6OVnp6uk6cOOE3s2PHDt11112KjIxUXFyccnJyLtjLsmXL1LVrV0VGRqpHjx76+OOPG+KWAQBAIxTwEDp+/LjuuOMO3XDDDVq9erX27Nmj2bNnq1WrVtZMTk6OcnNzlZeXp7KyMrVo0UIul0snT560ZtLS0rR7924VFxdr1apV2rhxo8aOHWute71eDRo0SB07dlR5eblmzZqladOm6e2337ZmNm3apAcffFDp6en6/PPPlZqaqtTUVO3atSvQtw0AABqhEJ/P5wvkCSdPnqxPP/1Un3zyyUXXfT6fYmNj9cwzz+jZZ5+VJHk8HsXExCg/P18jRozQl19+qYSEBG3ZskV9+/aVJBUVFWnIkCH65ptvFBsbq0WLFumFF16Q2+1WeHi4de0VK1aooqJCkjR8+HDV1NRo1apV1vX79eunXr16KS8v77L34vV6Zbfb5fF4ZLPZ/qdfl8am0+TCYG8B19CB11KCvQUACJir+fkd8GeEVq5cqb59++q3v/2t2rVrp969e+udd96x1vfv3y+3263k5GTrmN1uV1JSkkpLSyVJpaWlio6OtiJIkpKTkxUaGqqysjJrZsCAAVYESZLL5dLevXt1/Phxa+bc69TP1F/nfLW1tfJ6vX4PAADQdAU8hL7++mstWrRInTt31po1a/Tkk0/q97//vRYvXixJcrvdkqSYmBi/74uJibHW3G632rVr57ferFkztW7d2m/mYuc49xo/NFO/fr6ZM2fKbrdbj7i4uKu+fwAA0HgEPITq6urUp08fvfrqq+rdu7fGjh2rMWPGXNFfRQXblClT5PF4rEdlZWWwtwQAABpQwEOoffv2SkhI8DvWrVs3HTx4UJLkcDgkSVVVVX4zVVVV1prD4dDhw4f91s+cOaNjx475zVzsHOde44dm6tfPFxERIZvN5vcAAABNV8BD6I477tDevXv9jn311Vfq2LGjJCk+Pl4Oh0MlJSXWutfrVVlZmZxOpyTJ6XSqurpa5eXl1sy6detUV1enpKQka2bjxo06ffq0NVNcXKwuXbpY71BzOp1+16mfqb8OAAAwW8BDaMKECfrss8/06quv6t///rcKCgr09ttvKyMjQ5IUEhKi8ePHa8aMGVq5cqV27typhx56SLGxsUpNTZX032eQBg8erDFjxmjz5s369NNPlZmZqREjRig2NlaSNHLkSIWHhys9PV27d+/W0qVLNW/ePGVlZVl7GTdunIqKijR79mxVVFRo2rRp2rp1qzIzMwN92wAAoBFqFugT3nbbbVq+fLmmTJmi6dOnKz4+XnPnzlVaWpo1M3HiRNXU1Gjs2LGqrq7WnXfeqaKiIkVGRlozS5YsUWZmpgYOHKjQ0FANGzZMubm51rrdbtfatWuVkZGhxMREtW3bVtnZ2X6fNdS/f38VFBRo6tSpev7559W5c2etWLFC3bt3D/RtAwCARijgnyPUlPA5QjAFnyMEoCkJ6ucIAQAANBaEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIzV4CH02muvKSQkROPHj7eOnTx5UhkZGWrTpo1uvPFGDRs2TFVVVX7fd/DgQaWkpKh58+Zq166dnnvuOZ05c8ZvZv369erTp48iIiJ0yy23KD8//4LrL1iwQJ06dVJkZKSSkpK0efPmhrhNAADQCDVoCG3ZskV//OMf9fOf/9zv+IQJE/TRRx9p2bJl2rBhgw4dOqT77rvPWj979qxSUlJ06tQpbdq0SYsXL1Z+fr6ys7Otmf379yslJUV33323tm/frvHjx+uxxx7TmjVrrJmlS5cqKytLL730krZt26aePXvK5XLp8OHDDXnbAACgkQjx+Xy+hjjxiRMn1KdPHy1cuFAzZsxQr169NHfuXHk8Ht10000qKCjQ/fffL0mqqKhQt27dVFpaqn79+mn16tUaOnSoDh06pJiYGElSXl6eJk2apCNHjig8PFyTJk1SYWGhdu3aZV1zxIgRqq6uVlFRkSQpKSlJt912m+bPny9JqqurU1xcnJ5++mlNnjz5svfg9Xplt9vl8Xhks9kC/Ut0Xes0uTDYW8A1dOC1lGBvAQAC5mp+fjfYM0IZGRlKSUlRcnKy3/Hy8nKdPn3a73jXrl118803q7S0VJJUWlqqHj16WBEkSS6XS16vV7t377Zmzj+3y+WyznHq1CmVl5f7zYSGhio5OdmaOV9tba28Xq/fAwAANF3NGuKkH3zwgbZt26YtW7ZcsOZ2uxUeHq7o6Gi/4zExMXK73dbMuRFUv16/dqkZr9er77//XsePH9fZs2cvOlNRUXHRfc+cOVMvv/zyld8oAABo1AL+jFBlZaXGjRunJUuWKDIyMtCnb1BTpkyRx+OxHpWVlcHeEgAAaEABD6Hy8nIdPnxYffr0UbNmzdSsWTNt2LBBubm5atasmWJiYnTq1ClVV1f7fV9VVZUcDockyeFwXPAusvqvLzdjs9kUFRWltm3bKiws7KIz9ec4X0REhGw2m98DAAA0XQEPoYEDB2rnzp3avn279ejbt6/S0tKs/33DDTeopKTE+p69e/fq4MGDcjqdkiSn06mdO3f6vburuLhYNptNCQkJ1sy556ifqT9HeHi4EhMT/Wbq6upUUlJizQAAALMF/DVCLVu2VPfu3f2OtWjRQm3atLGOp6enKysrS61bt5bNZtPTTz8tp9Opfv36SZIGDRqkhIQEjRo1Sjk5OXK73Zo6daoyMjIUEREhSXriiSc0f/58TZw4UY8++qjWrVunDz/8UIWF//9up6ysLI0ePVp9+/bV7bffrrlz56qmpkaPPPJIoG8bAAA0Qg3yYunLmTNnjkJDQzVs2DDV1tbK5XJp4cKF1npYWJhWrVqlJ598Uk6nUy1atNDo0aM1ffp0ayY+Pl6FhYWaMGGC5s2bpw4dOujdd9+Vy+WyZoYPH64jR44oOztbbrdbvXr1UlFR0QUvoAYAAGZqsM8Ragr4HCGYgs8RAtCUXBefIwQAAHC9I4QAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYK+AhNHPmTN12221q2bKl2rVrp9TUVO3du9dv5uTJk8rIyFCbNm104403atiwYaqqqvKbOXjwoFJSUtS8eXO1a9dOzz33nM6cOeM3s379evXp00cRERG65ZZblJ+ff8F+FixYoE6dOikyMlJJSUnavHlzoG8ZAAA0UgEPoQ0bNigjI0OfffaZiouLdfr0aQ0aNEg1NTXWzIQJE/TRRx9p2bJl2rBhgw4dOqT77rvPWj979qxSUlJ06tQpbdq0SYsXL1Z+fr6ys7Otmf379yslJUV33323tm/frvHjx+uxxx7TmjVrrJmlS5cqKytLL730krZt26aePXvK5XLp8OHDgb5tAADQCIX4fD5fQ17gyJEjateunTZs2KABAwbI4/HopptuUkFBge6//35JUkVFhbp166bS0lL169dPq1ev1tChQ3Xo0CHFxMRIkvLy8jRp0iQdOXJE4eHhmjRpkgoLC7Vr1y7rWiNGjFB1dbWKiookSUlJSbrttts0f/58SVJdXZ3i4uL09NNPa/LkyZfdu9frld1ul8fjkc1mC/QvzXWt0+TCYG8B19CB11KCvQUACJir+fnd4K8R8ng8kqTWrVtLksrLy3X69GklJydbM127dtXNN9+s0tJSSVJpaal69OhhRZAkuVwueb1e7d6925o59xz1M/XnOHXqlMrLy/1mQkNDlZycbM2cr7a2Vl6v1+8BAACargYNobq6Oo0fP1533HGHunfvLklyu90KDw9XdHS032xMTIzcbrc1c24E1a/Xr11qxuv16vvvv9fRo0d19uzZi87Un+N8M2fOlN1utx5xcXE/7sYBAECj0KAhlJGRoV27dumDDz5oyMsEzJQpU+TxeKxHZWVlsLcEAAAaULOGOnFmZqZWrVqljRs3qkOHDtZxh8OhU6dOqbq62u9ZoaqqKjkcDmvm/Hd31b+r7NyZ899pVlVVJZvNpqioKIWFhSksLOyiM/XnOF9ERIQiIiJ+3A0DAIBGJ+DPCPl8PmVmZmr58uVat26d4uPj/dYTExN1ww03qKSkxDq2d+9eHTx4UE6nU5LkdDq1c+dOv3d3FRcXy2azKSEhwZo59xz1M/XnCA8PV2Jiot9MXV2dSkpKrBkAAGC2gD8jlJGRoYKCAv3jH/9Qy5Ytrdfj2O12RUVFyW63Kz09XVlZWWrdurVsNpuefvppOZ1O9evXT5I0aNAgJSQkaNSoUcrJyZHb7dbUqVOVkZFhPWPzxBNPaP78+Zo4caIeffRRrVu3Th9++KEKC///3U5ZWVkaPXq0+vbtq9tvv11z585VTU2NHnnkkUDfNgAAaIQCHkKLFi2SJP3yl7/0O/6nP/1JDz/8sCRpzpw5Cg0N1bBhw1RbWyuXy6WFCxdas2FhYVq1apWefPJJOZ1OtWjRQqNHj9b06dOtmfj4eBUWFmrChAmaN2+eOnTooHfffVcul8uaGT58uI4cOaLs7Gy53W716tVLRUVFF7yAGgAAmKnBP0eoMeNzhGAKPkcIQFNyXX2OEAAAwPWKEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxjAihBQsWqFOnToqMjFRSUpI2b94c7C0BAIDrQJMPoaVLlyorK0svvfSStm3bpp49e8rlcunw4cPB3hoAAAiyJh9Cb775psaMGaNHHnlECQkJysvLU/PmzfX+++8He2sAACDImgV7Aw3p1KlTKi8v15QpU6xjoaGhSk5OVmlp6QXztbW1qq2ttb72eDySJK/X2/Cbvc7U1f4n2FvANWTi/8cBNF31/03z+XyXnW3SIXT06FGdPXtWMTExfsdjYmJUUVFxwfzMmTP18ssvX3A8Li6uwfYIXA/sc4O9AwAIvO+++052u/2SM006hK7WlClTlJWVZX1dV1enY8eOqU2bNgoJCQniznAteL1excXFqbKyUjabLdjbARBA/P42i8/n03fffafY2NjLzjbpEGrbtq3CwsJUVVXld7yqqkoOh+OC+YiICEVERPgdi46Obsgt4jpks9n4DyXQRPH72xyXeyaoXpN+sXR4eLgSExNVUlJiHaurq1NJSYmcTmcQdwYAAK4HTfoZIUnKysrS6NGj1bdvX91+++2aO3euampq9MgjjwR7awAAIMiafAgNHz5cR44cUXZ2ttxut3r16qWioqILXkANRERE6KWXXrrgr0cBNH78/sYPCfFdyXvLAAAAmqAm/RohAACASyGEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEYa8+ePXrqqafUu3dvtW/fXu3bt1fv3r311FNPac+ePcHeHoAAqq2tVW1tbbC3gesQIQQjrV69Wr1799bnn3+ue++9V9nZ2crOzta9996rL774Qn369NGaNWuCvU0A/4Pi4mINGTJErVq1UvPmzdW8eXO1atVKQ4YM0T//+c9gbw/XCT5QEUbq2bOn7r33Xk2fPv2i69OmTdPf//537dix4xrvDEAgLF68WI899pjuv/9+uVwu618TqKqq0tq1a/W3v/1N7733nkaNGhXknSLYCCEYKSoqStu3b1eXLl0uur5371716tVL33///TXeGYBAuPXWWzVu3DhlZGRcdH3hwoWaM2eO9u3bd413husNfzUGI3Xq1EmFhYU/uF5YWKiOHTtewx0BCKSDBw8qOTn5B9cHDhyob7755hruCNerJv+PrgIXM336dI0cOVLr169XcnKy39PmJSUlKioqUkFBQZB3CeDH+tnPfqb33ntPOTk5F11///33lZCQcI13hesRfzUGY23atEm5ubkqLS2V2+2WJDkcDjmdTo0bN05OpzPIOwTwY61fv15Dhw7VT37yk4v+Yefrr79WYWGhBgwYEOSdItgIIQBAk3TgwAEtWrRIn3322QV/2HniiSfUqVOn4G4Q1wVCCAAAGIsXSwMX8fzzz+vRRx8N9jYAAA2MEAIu4ptvvtGBAweCvQ0ADWT06NH61a9+Fext4DrAu8aAi/jzn/8c7C0AaECxsbEKDeW5APAaIRjs6NGjev/99y9411j//v318MMP66abbgryDgEADY0chpG2bNmiW2+9Vbm5ubLb7RowYIAGDBggu92u3Nxcde3aVVu3bg32NgE0kMrKSl4HCEk8IwRD9evXTz179lReXp5CQkL81nw+n5544gnt2LFDpaWlQdohgIZU/48rnz17NthbQZDxGiEY6YsvvlB+fv4FESRJISEhmjBhgnr37h2EnQEIhJUrV15y/euvv75GO8H1jhCCkRwOhzZv3qyuXbtedH3z5s3WJ9ECaHxSU1MVEhKiS/2lx8X+IATzEEIw0rPPPquxY8eqvLxcAwcOvODj99955x298cYbQd4lgB+rffv2Wrhwoe69996Lrm/fvl2JiYnXeFe4HhFCMFJGRobatm2rOXPmaOHChdbrBMLCwpSYmKj8/Hw98MADQd4lgB8rMTFR5eXlPxhCl3u2CObgxdIw3unTp3X06FFJUtu2bXXDDTcEeUcA/leffPKJampqNHjw4Iuu19TUaOvWrfrFL35xjXeG6w0hBAAAjMXnCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACM9X9D04TMBnBGkAAAAABJRU5ErkJggg==\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Checking for statistics. I can see the \"only 2\" fraud cases in the mean of isFraud. The rest is a bit foggy for me.\n", + "sample.describe()" + ], + "metadata": { + "id": "s8UOZaZlPD4x", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "outputId": "75b9d6bf-1cb3-4b8e-9376-212ff351ccc2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " step amount oldbalanceOrg newbalanceOrig \\\n", + "count 100000.000000 1.000000e+05 1.000000e+05 1.000000e+05 \n", + "mean 13.793930 1.672580e+05 9.083691e+05 9.284839e+05 \n", + "std 3.662865 2.737220e+05 3.003058e+06 3.040327e+06 \n", + "min 1.000000 3.000000e-01 0.000000e+00 0.000000e+00 \n", + "25% 11.000000 1.326794e+04 0.000000e+00 0.000000e+00 \n", + "50% 14.000000 8.114545e+04 1.803700e+04 0.000000e+00 \n", + "75% 17.000000 2.240173e+05 1.688683e+05 2.113392e+05 \n", + "max 19.000000 1.000000e+07 3.836475e+07 3.844183e+07 \n", + "\n", + " oldbalanceDest newbalanceDest isFraud isFlaggedFraud \n", + "count 1.000000e+05 1.000000e+05 100000.000000 100000.0 \n", + "mean 9.872217e+05 1.171648e+06 0.000530 0.0 \n", + "std 2.354548e+06 2.539133e+06 0.023016 0.0 \n", + "min 0.000000e+00 0.000000e+00 0.000000 0.0 \n", + "25% 0.000000e+00 0.000000e+00 0.000000 0.0 \n", + "50% 1.177247e+05 2.234951e+05 0.000000 0.0 \n", + "75% 9.029070e+05 1.214240e+06 0.000000 0.0 \n", + "max 4.133844e+07 4.138365e+07 1.000000 0.0 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stepamountoldbalanceOrgnewbalanceOrigoldbalanceDestnewbalanceDestisFraudisFlaggedFraud
count100000.0000001.000000e+051.000000e+051.000000e+051.000000e+051.000000e+05100000.000000100000.0
mean13.7939301.672580e+059.083691e+059.284839e+059.872217e+051.171648e+060.0005300.0
std3.6628652.737220e+053.003058e+063.040327e+062.354548e+062.539133e+060.0230160.0
min1.0000003.000000e-010.000000e+000.000000e+000.000000e+000.000000e+000.0000000.0
25%11.0000001.326794e+040.000000e+000.000000e+000.000000e+000.000000e+000.0000000.0
50%14.0000008.114545e+041.803700e+040.000000e+001.177247e+052.234951e+050.0000000.0
75%17.0000002.240173e+051.688683e+052.113392e+059.029070e+051.214240e+060.0000000.0
max19.0000001.000000e+073.836475e+073.844183e+074.133844e+074.138365e+071.0000000.0
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 6 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tNd05x_17_Us" + }, + "source": [ + "### What is the distribution of the outcome?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CHmYLtB57_Us" + }, + "outputs": [], + "source": [ + "# If the question is related to the outcome variable \"isFraud\" (if a transaction is fraudulent or not and atribioting (fraud - 1 and no fraud - 0)) on the data set.\n", + "# If my labels are correct, I have 998 instances labeled as \"no fraud\" and 2 instances labeled as \"fraud.\"\n", + "\n", + "# (Tip: If you are doing the Imbalance Lab) sure you want to hear that our outcome will be highly imbalanced in favor of \"no fraud\"." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qh9CB98A7_Ut" + }, + "source": [ + "### Clean the dataset. How are you going to integrate the time variable? Do you think the step (integer) coding in which it is given is appropriate?" + ] + }, + { + "cell_type": "code", + "source": [ + "# Cleaning the place.\n", + "# 1st. Checking nulls\n", + "print(sample.isnull().sum())\n", + "print(\"\\nThank you baby jesus\")" + ], + "metadata": { + "id": "55q2oSdGgzsS", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d2f789cf-2da3-4164-816d-3a2c169d8e65" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "step 0\n", + "type 0\n", + "amount 0\n", + "nameOrig 0\n", + "oldbalanceOrg 0\n", + "newbalanceOrig 0\n", + "nameDest 0\n", + "oldbalanceDest 0\n", + "newbalanceDest 0\n", + "isFraud 0\n", + "isFlaggedFraud 0\n", + "dtype: int64\n", + "\n", + "Thank you baby jesus\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Checking for data in columns. Ok, fine.\n", + "print(sample[\"type\"].unique())\n", + "sample[\"type\"].value_counts().plot(kind = \"bar\")" + ], + "metadata": { + "id": "vkt2bPXdg_6A", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 526 + }, + "outputId": "66f58e56-c925-4c84-ce42-02e3299004c1" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['PAYMENT' 'CASH_OUT' 'TRANSFER' 'CASH_IN' 'DEBIT']\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 9 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjkAAAHaCAYAAADi01jcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABAYUlEQVR4nO3de1iUdf7/8Reg4HHwlCCJpyyVPGPSbGmpJCq2WtqquUYe09BN2RJtzVO7araVulrWpYhuUmprtYpChIkVeMLwQGlWKrY6QKmMkoDC/P7ox3ybPCQmDHPP83Fdc9XM5z33vGfughf3fO7P7WGz2WwCAAAwGE9nNwAAAFAeCDkAAMCQCDkAAMCQCDkAAMCQCDkAAMCQCDkAAMCQCDkAAMCQCDkAAMCQqji7AWcqKSnRqVOnVLt2bXl4eDi7HQAAcANsNpvOnz+vgIAAeXpe+3iNW4ecU6dOKTAw0NltAACAm3Dy5Ek1btz4muNuHXJq164t6ecPyWQyObkbAABwI6xWqwIDA+2/x6/FrUNO6VdUJpOJkAMAgIv5rakmTDwGAACGRMgBAACGRMgBAACGRMgBAACGRMgBAACGRMgBAACGRMgBAACGRMgBAACGRMgBAACGRMgBAACGRMgBAACGRMgBAACGRMgBAACGRMgBAACGVMXZDRhds2nxzm7hlji+INzZLQAAUCYcyQEAAIZEyAEAAIZEyAEAAIZEyAEAAIZEyAEAAIZEyAEAAIZEyAEAAIZEyAEAAIZUppDzxhtvqH379jKZTDKZTDKbzdq6dat9/MEHH5SHh4fDbfz48Q7byMrKUnh4uGrUqKGGDRvqueee0+XLlx1qtm/frs6dO8vHx0ctW7ZUbGzsFb0sW7ZMzZo1U7Vq1RQSEqLdu3eX5a0AAACDK1PIady4sRYsWKD09HTt3btXPXv21IABA5SZmWmvGTt2rE6fPm2/LVy40D5WXFys8PBwFRUVKTU1VatXr1ZsbKxmzpxprzl27JjCw8PVo0cPZWRkaPLkyRozZowSExPtNevWrVNUVJRmzZqlffv2qUOHDgoLC1NOTs7v+SwAAICBeNhsNtvv2UC9evX08ssva/To0XrwwQfVsWNHLVq06Kq1W7duVf/+/XXq1Cn5+flJkpYvX67o6Gjl5ubK29tb0dHRio+P16FDh+zPGzp0qM6dO6eEhARJUkhIiO655x4tXbpUklRSUqLAwEBNmjRJ06ZNu+HerVarfH19lZeXJ5PJdJOfwPVxWQcAAG6tG/39fdNzcoqLi/Xuu+8qPz9fZrPZ/vjatWvVoEEDtW3bVtOnT9dPP/1kH0tLS1O7du3sAUeSwsLCZLVa7UeD0tLSFBoa6vBaYWFhSktLkyQVFRUpPT3docbT01OhoaH2mmspLCyU1Wp1uAEAAGMq8wU6Dx48KLPZrIKCAtWqVUvvv/++goKCJEmPP/64mjZtqoCAAB04cEDR0dE6cuSINm7cKEmyWCwOAUeS/b7FYrlujdVq1cWLF3X27FkVFxdftebw4cPX7X3+/PmaM2dOWd8yAABwQWUOOa1atVJGRoby8vL03nvvKSIiQikpKQoKCtK4cePsde3atVOjRo3Uq1cvffvtt7rjjjtuaeM3Y/r06YqKirLft1qtCgwMdGJHAACgvJQ55Hh7e6tly5aSpODgYO3Zs0eLFy/Wm2++eUVtSEiIJOmbb77RHXfcIX9//yvOgsrOzpYk+fv72/9Z+tgva0wmk6pXry4vLy95eXldtaZ0G9fi4+MjHx+fMrxbAADgqn73OjklJSUqLCy86lhGRoYkqVGjRpIks9msgwcPOpwFlZSUJJPJZP/Ky2w2Kzk52WE7SUlJ9nk/3t7eCg4OdqgpKSlRcnKyw9wgAADg3sp0JGf69Onq27evmjRpovPnzysuLk7bt29XYmKivv32W8XFxalfv36qX7++Dhw4oClTpqh79+5q3769JKl3794KCgrSiBEjtHDhQlksFs2YMUORkZH2Iyzjx4/X0qVLNXXqVI0aNUrbtm3T+vXrFR//f2cpRUVFKSIiQl26dFHXrl21aNEi5efna+TIkbfwowEAAK6sTCEnJydHTzzxhE6fPi1fX1+1b99eiYmJeuihh3Ty5El9/PHH9sARGBioQYMGacaMGfbne3l5afPmzZowYYLMZrNq1qypiIgIzZ07117TvHlzxcfHa8qUKVq8eLEaN26sFStWKCwszF4zZMgQ5ebmaubMmbJYLOrYsaMSEhKumIwMAADc1+9eJ8eVsU7OjWOdHABAZVHu6+QAAABUZoQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSFWc3QBQkZpNi3d2C7/b8QXhzm4BAFwCR3IAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhlSnkvPHGG2rfvr1MJpNMJpPMZrO2bt1qHy8oKFBkZKTq16+vWrVqadCgQcrOznbYRlZWlsLDw1WjRg01bNhQzz33nC5fvuxQs337dnXu3Fk+Pj5q2bKlYmNjr+hl2bJlatasmapVq6aQkBDt3r27LG8FAAAYXJlCTuPGjbVgwQKlp6dr79696tmzpwYMGKDMzExJ0pQpU7Rp0yZt2LBBKSkpOnXqlB599FH784uLixUeHq6ioiKlpqZq9erVio2N1cyZM+01x44dU3h4uHr06KGMjAxNnjxZY8aMUWJior1m3bp1ioqK0qxZs7Rv3z516NBBYWFhysnJ+b2fBwAAMAgPm81m+z0bqFevnl5++WUNHjxYt912m+Li4jR48GBJ0uHDh9WmTRulpaXp3nvv1datW9W/f3+dOnVKfn5+kqTly5crOjpaubm58vb2VnR0tOLj43Xo0CH7awwdOlTnzp1TQkKCJCkkJET33HOPli5dKkkqKSlRYGCgJk2apGnTpt1w71arVb6+vsrLy5PJZPo9H8M1NZsWXy7brWjHF4Q7u4Vbwgj7wyj7AgBu1o3+/r7pOTnFxcV69913lZ+fL7PZrPT0dF26dEmhoaH2mtatW6tJkyZKS0uTJKWlpaldu3b2gCNJYWFhslqt9qNBaWlpDtsorSndRlFRkdLT0x1qPD09FRoaaq8BAACoUtYnHDx4UGazWQUFBapVq5bef/99BQUFKSMjQ97e3qpTp45DvZ+fnywWiyTJYrE4BJzS8dKx69VYrVZdvHhRZ8+eVXFx8VVrDh8+fN3eCwsLVVhYaL9vtVpv/I0DAACXUuYjOa1atVJGRoZ27dqlCRMmKCIiQl9++WV59HbLzZ8/X76+vvZbYGCgs1sCAADlpMwhx9vbWy1btlRwcLDmz5+vDh06aPHixfL391dRUZHOnTvnUJ+dnS1/f39Jkr+//xVnW5Xe/60ak8mk6tWrq0GDBvLy8rpqTek2rmX69OnKy8uz306ePFnWtw8AAFzE714np6SkRIWFhQoODlbVqlWVnJxsHzty5IiysrJkNpslSWazWQcPHnQ4CyopKUkmk0lBQUH2ml9uo7SmdBve3t4KDg52qCkpKVFycrK95lp8fHzsp7+X3gAAgDGVaU7O9OnT1bdvXzVp0kTnz59XXFyctm/frsTERPn6+mr06NGKiopSvXr1ZDKZNGnSJJnNZt17772SpN69eysoKEgjRozQwoULZbFYNGPGDEVGRsrHx0eSNH78eC1dulRTp07VqFGjtG3bNq1fv17x8f93VkxUVJQiIiLUpUsXde3aVYsWLVJ+fr5Gjhx5Cz8aAADgysoUcnJycvTEE0/o9OnT8vX1Vfv27ZWYmKiHHnpIkvTaa6/J09NTgwYNUmFhocLCwvT666/bn+/l5aXNmzdrwoQJMpvNqlmzpiIiIjR37lx7TfPmzRUfH68pU6Zo8eLFaty4sVasWKGwsDB7zZAhQ5Sbm6uZM2fKYrGoY8eOSkhIuGIyMgAAcF+/e50cV8Y6OTfOKGuzGGF/GGVfAMDNKvd1cgAAACozQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADAkQg4AADCkMoWc+fPn65577lHt2rXVsGFDDRw4UEeOHHGoefDBB+Xh4eFwGz9+vENNVlaWwsPDVaNGDTVs2FDPPfecLl++7FCzfft2de7cWT4+PmrZsqViY2Ov6GfZsmVq1qyZqlWrppCQEO3evbssbwcAABhYmUJOSkqKIiMjtXPnTiUlJenSpUvq3bu38vPzHerGjh2r06dP228LFy60jxUXFys8PFxFRUVKTU3V6tWrFRsbq5kzZ9prjh07pvDwcPXo0UMZGRmaPHmyxowZo8TERHvNunXrFBUVpVmzZmnfvn3q0KGDwsLClJOTc7OfBQAAMBAPm81mu9kn5+bmqmHDhkpJSVH37t0l/Xwkp2PHjlq0aNFVn7N161b1799fp06dkp+fnyRp+fLlio6OVm5urry9vRUdHa34+HgdOnTI/ryhQ4fq3LlzSkhIkCSFhITonnvu0dKlSyVJJSUlCgwM1KRJkzRt2rQb6t9qtcrX11d5eXkymUw3+zFcV7Np8eWy3Yp2fEG4s1u4JYywP4yyLwDgZt3o7+/fNScnLy9PklSvXj2Hx9euXasGDRqobdu2mj59un766Sf7WFpamtq1a2cPOJIUFhYmq9WqzMxMe01oaKjDNsPCwpSWliZJKioqUnp6ukONp6enQkND7TVXU1hYKKvV6nADAADGVOVmn1hSUqLJkyfrvvvuU9u2be2PP/7442ratKkCAgJ04MABRUdH68iRI9q4caMkyWKxOAQcSfb7FovlujVWq1UXL17U2bNnVVxcfNWaw4cPX7Pn+fPna86cOTf7lgEAgAu56ZATGRmpQ4cO6bPPPnN4fNy4cfZ/b9eunRo1aqRevXrp22+/1R133HHznd4C06dPV1RUlP2+1WpVYGCgEzsCAADl5aZCzsSJE7V582bt2LFDjRs3vm5tSEiIJOmbb77RHXfcIX9//yvOgsrOzpYk+fv72/9Z+tgva0wmk6pXry4vLy95eXldtaZ0G1fj4+MjHx+fG3uTAADApZVpTo7NZtPEiRP1/vvva9u2bWrevPlvPicjI0OS1KhRI0mS2WzWwYMHHc6CSkpKkslkUlBQkL0mOTnZYTtJSUkym82SJG9vbwUHBzvUlJSUKDk52V4DAADcW5mO5ERGRiouLk4ffvihateubZ9D4+vrq+rVq+vbb79VXFyc+vXrp/r16+vAgQOaMmWKunfvrvbt20uSevfuraCgII0YMUILFy6UxWLRjBkzFBkZaT/KMn78eC1dulRTp07VqFGjtG3bNq1fv17x8f93ZkxUVJQiIiLUpUsXde3aVYsWLVJ+fr5Gjhx5qz4bAADgwsoUct544w1JP58m/kurVq3Sk08+KW9vb3388cf2wBEYGKhBgwZpxowZ9lovLy9t3rxZEyZMkNlsVs2aNRUREaG5c+faa5o3b674+HhNmTJFixcvVuPGjbVixQqFhYXZa4YMGaLc3FzNnDlTFotFHTt2VEJCwhWTkQEAgHv6XevkuDrWyblxRlmbxQj7wyj7AgBuVoWskwMAAFBZEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhEXIAAIAhVXF2AwDcU7Np8c5u4Xc7viDc2S0AuA6O5AAAAEMi5AAAAEMi5AAAAEMi5AAAAEMi5AAAAEMi5AAAAEMi5AAAAEMi5AAAAEMi5AAAAEMi5AAAAEMi5AAAAEMi5AAAAEMi5AAAAEMqU8iZP3++7rnnHtWuXVsNGzbUwIEDdeTIEYeagoICRUZGqn79+qpVq5YGDRqk7Oxsh5qsrCyFh4erRo0aatiwoZ577jldvnzZoWb79u3q3LmzfHx81LJlS8XGxl7Rz7Jly9SsWTNVq1ZNISEh2r17d1neDgAAMLAyhZyUlBRFRkZq586dSkpK0qVLl9S7d2/l5+fba6ZMmaJNmzZpw4YNSklJ0alTp/Too4/ax4uLixUeHq6ioiKlpqZq9erVio2N1cyZM+01x44dU3h4uHr06KGMjAxNnjxZY8aMUWJior1m3bp1ioqK0qxZs7Rv3z516NBBYWFhysnJ+T2fBwAAMAgPm81mu9kn5+bmqmHDhkpJSVH37t2Vl5en2267TXFxcRo8eLAk6fDhw2rTpo3S0tJ07733auvWrerfv79OnTolPz8/SdLy5csVHR2t3NxceXt7Kzo6WvHx8Tp06JD9tYYOHapz584pISFBkhQSEqJ77rlHS5culSSVlJQoMDBQkyZN0rRp026of6vVKl9fX+Xl5clkMt3sx3BdzabFl8t2K9rxBeHObuGWMML+YF9UHkbZF4CrudHf379rTk5eXp4kqV69epKk9PR0Xbp0SaGhofaa1q1bq0mTJkpLS5MkpaWlqV27dvaAI0lhYWGyWq3KzMy01/xyG6U1pdsoKipSenq6Q42np6dCQ0PtNQAAwL1VudknlpSUaPLkybrvvvvUtm1bSZLFYpG3t7fq1KnjUOvn5yeLxWKv+WXAKR0vHbtejdVq1cWLF3X27FkVFxdftebw4cPX7LmwsFCFhYX2+1artQzvGAAAuJKbPpITGRmpQ4cO6d13372V/ZSr+fPny9fX134LDAx0dksAAKCc3FTImThxojZv3qxPPvlEjRs3tj/u7++voqIinTt3zqE+Oztb/v7+9ppfn21Vev+3akwmk6pXr64GDRrIy8vrqjWl27ia6dOnKy8vz347efJk2d44AABwGWUKOTabTRMnTtT777+vbdu2qXnz5g7jwcHBqlq1qpKTk+2PHTlyRFlZWTKbzZIks9msgwcPOpwFlZSUJJPJpKCgIHvNL7dRWlO6DW9vbwUHBzvUlJSUKDk52V5zNT4+PjKZTA43AABgTGWakxMZGam4uDh9+OGHql27tn0Oja+vr6pXry5fX1+NHj1aUVFRqlevnkwmkyZNmiSz2ax7771XktS7d28FBQVpxIgRWrhwoSwWi2bMmKHIyEj5+PhIksaPH6+lS5dq6tSpGjVqlLZt26b169crPv7/zsaIiopSRESEunTpoq5du2rRokXKz8/XyJEjb9VnAwAAXFiZQs4bb7whSXrwwQcdHl+1apWefPJJSdJrr70mT09PDRo0SIWFhQoLC9Prr79ur/Xy8tLmzZs1YcIEmc1m1axZUxEREZo7d669pnnz5oqPj9eUKVO0ePFiNW7cWCtWrFBYWJi9ZsiQIcrNzdXMmTNlsVjUsWNHJSQkXDEZGQAAuKfftU6Oq2OdnBtnlPVAjLA/2BeVh1H2BeBqKmSdHAAAgMqKkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAyJkAMAAAypzCFnx44devjhhxUQECAPDw998MEHDuNPPvmkPDw8HG59+vRxqDlz5oyGDx8uk8mkOnXqaPTo0bpw4YJDzYEDB9StWzdVq1ZNgYGBWrhw4RW9bNiwQa1bt1a1atXUrl07bdmypaxvBwAAGFSZQ05+fr46dOigZcuWXbOmT58+On36tP32zjvvOIwPHz5cmZmZSkpK0ubNm7Vjxw6NGzfOPm61WtW7d281bdpU6enpevnllzV79my99dZb9prU1FQNGzZMo0eP1hdffKGBAwdq4MCBOnToUFnfEgAAMKAqZX1C37591bdv3+vW+Pj4yN/f/6pjX331lRISErRnzx516dJFkvSvf/1L/fr10z//+U8FBARo7dq1KioqUkxMjLy9vXX33XcrIyNDr776qj0MLV68WH369NFzzz0nSXrxxReVlJSkpUuXavny5WV9WwAAwGDKZU7O9u3b1bBhQ7Vq1UoTJkzQjz/+aB9LS0tTnTp17AFHkkJDQ+Xp6aldu3bZa7p37y5vb297TVhYmI4cOaKzZ8/aa0JDQx1eNywsTGlpadfsq7CwUFar1eEGAACM6ZaHnD59+mjNmjVKTk7WSy+9pJSUFPXt21fFxcWSJIvFooYNGzo8p0qVKqpXr54sFou9xs/Pz6Gm9P5v1ZSOX838+fPl6+trvwUGBv6+NwsAACqtMn9d9VuGDh1q//d27dqpffv2uuOOO7R9+3b16tXrVr9cmUyfPl1RUVH2+1arlaADAIBBlfsp5C1atFCDBg30zTffSJL8/f2Vk5PjUHP58mWdOXPGPo/H399f2dnZDjWl93+r5lpzgaSf5wqZTCaHGwAAMKZyDznff/+9fvzxRzVq1EiSZDabde7cOaWnp9trtm3bppKSEoWEhNhrduzYoUuXLtlrkpKS1KpVK9WtW9dek5yc7PBaSUlJMpvN5f2WAACACyhzyLlw4YIyMjKUkZEhSTp27JgyMjKUlZWlCxcu6LnnntPOnTt1/PhxJScna8CAAWrZsqXCwsIkSW3atFGfPn00duxY7d69W59//rkmTpyooUOHKiAgQJL0+OOPy9vbW6NHj1ZmZqbWrVunxYsXO3zV9MwzzyghIUGvvPKKDh8+rNmzZ2vv3r2aOHHiLfhYAACAqytzyNm7d686deqkTp06SZKioqLUqVMnzZw5U15eXjpw4ID++Mc/6q677tLo0aMVHBysTz/9VD4+PvZtrF27Vq1bt1avXr3Ur18/3X///Q5r4Pj6+uqjjz7SsWPHFBwcrL/+9a+aOXOmw1o6f/jDHxQXF6e33npLHTp00HvvvacPPvhAbdu2/T2fBwAAMIgyTzx+8MEHZbPZrjmemJj4m9uoV6+e4uLirlvTvn17ffrpp9eteeyxx/TYY4/95usBAAD3w7WrAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIRFyAACAIZU55OzYsUMPP/ywAgIC5OHhoQ8++MBh3GazaebMmWrUqJGqV6+u0NBQHT161KHmzJkzGj58uEwmk+rUqaPRo0frwoULDjUHDhxQt27dVK1aNQUGBmrhwoVX9LJhwwa1bt1a1apVU7t27bRly5ayvh0AAGBQZQ45+fn56tChg5YtW3bV8YULF2rJkiVavny5du3apZo1ayosLEwFBQX2muHDhyszM1NJSUnavHmzduzYoXHjxtnHrVarevfuraZNmyo9PV0vv/yyZs+erbfeestek5qaqmHDhmn06NH64osvNHDgQA0cOFCHDh0q61sCAAAG5GGz2Ww3/WQPD73//vsaOHCgpJ+P4gQEBOivf/2rnn32WUlSXl6e/Pz8FBsbq6FDh+qrr75SUFCQ9uzZoy5dukiSEhIS1K9fP33//fcKCAjQG2+8ob/97W+yWCzy9vaWJE2bNk0ffPCBDh8+LEkaMmSI8vPztXnzZns/9957rzp27Kjly5ffUP9Wq1W+vr7Ky8uTyWS62Y/huppNiy+X7Va04wvCnd3CLWGE/cG+qDyMsi8AV3Ojv79v6ZycY8eOyWKxKDQ01P6Yr6+vQkJClJaWJklKS0tTnTp17AFHkkJDQ+Xp6aldu3bZa7p3724POJIUFhamI0eO6OzZs/aaX75OaU3p6wAAAPdW5VZuzGKxSJL8/PwcHvfz87OPWSwWNWzY0LGJKlVUr149h5rmzZtfsY3Ssbp168pisVz3da6msLBQhYWF9vtWq7Usbw8AALgQtzq7av78+fL19bXfAgMDnd0SAAAoJ7c05Pj7+0uSsrOzHR7Pzs62j/n7+ysnJ8dh/PLlyzpz5oxDzdW28cvXuFZN6fjVTJ8+XXl5efbbyZMny/oWAQCAi7ilIad58+by9/dXcnKy/TGr1apdu3bJbDZLksxms86dO6f09HR7zbZt21RSUqKQkBB7zY4dO3Tp0iV7TVJSklq1aqW6devaa375OqU1pa9zNT4+PjKZTA43AABgTGUOORcuXFBGRoYyMjIk/TzZOCMjQ1lZWfLw8NDkyZP197//Xf/973918OBBPfHEEwoICLCfgdWmTRv16dNHY8eO1e7du/X5559r4sSJGjp0qAICAiRJjz/+uLy9vTV69GhlZmZq3bp1Wrx4saKioux9PPPMM0pISNArr7yiw4cPa/bs2dq7d68mTpz4+z8VAADg8so88Xjv3r3q0aOH/X5p8IiIiFBsbKymTp2q/Px8jRs3TufOndP999+vhIQEVatWzf6ctWvXauLEierVq5c8PT01aNAgLVmyxD7u6+urjz76SJGRkQoODlaDBg00c+ZMh7V0/vCHPyguLk4zZszQ888/rzvvvFMffPCB2rZte1MfBAAAMJbftU6Oq2OdnBtnlPVAjLA/2BeVh1H2BeBqnLJODgAAQGVByAEAAIZEyAEAAIZEyAEAAIZEyAEAAIZEyAEAAIZEyAEAAIZEyAEAAIZEyAEAAIZEyAEAAIZEyAEAAIZEyAEAAIZU5quQAwCMxQgXS5W4YCquxJEcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSIQcAABgSLc85MyePVseHh4Ot9atW9vHCwoKFBkZqfr166tWrVoaNGiQsrOzHbaRlZWl8PBw1ahRQw0bNtRzzz2ny5cvO9Rs375dnTt3lo+Pj1q2bKnY2Nhb/VYAAIALK5cjOXfffbdOnz5tv3322Wf2sSlTpmjTpk3asGGDUlJSdOrUKT366KP28eLiYoWHh6uoqEipqalavXq1YmNjNXPmTHvNsWPHFB4erh49eigjI0OTJ0/WmDFjlJiYWB5vBwAAuKAq5bLRKlXk7+9/xeN5eXlauXKl4uLi1LNnT0nSqlWr1KZNG+3cuVP33nuvPvroI3355Zf6+OOP5efnp44dO+rFF19UdHS0Zs+eLW9vby1fvlzNmzfXK6+8Iklq06aNPvvsM7322msKCwsrj7cEAABcTLkcyTl69KgCAgLUokULDR8+XFlZWZKk9PR0Xbp0SaGhofba1q1bq0mTJkpLS5MkpaWlqV27dvLz87PXhIWFyWq1KjMz017zy22U1pRu41oKCwtltVodbgAAwJhuecgJCQlRbGysEhIS9MYbb+jYsWPq1q2bzp8/L4vFIm9vb9WpU8fhOX5+frJYLJIki8XiEHBKx0vHrldjtVp18eLFa/Y2f/58+fr62m+BgYG/9+0CAIBK6pZ/XdW3b1/7v7dv314hISFq2rSp1q9fr+rVq9/qlyuT6dOnKyoqyn7farUSdAAAMKhyP4W8Tp06uuuuu/TNN9/I399fRUVFOnfunENNdna2fQ6Pv7//FWdbld7/rRqTyXTdIOXj4yOTyeRwAwAAxlTuIefChQv69ttv1ahRIwUHB6tq1apKTk62jx85ckRZWVkym82SJLPZrIMHDyonJ8dek5SUJJPJpKCgIHvNL7dRWlO6DQAAgFsecp599lmlpKTo+PHjSk1N1SOPPCIvLy8NGzZMvr6+Gj16tKKiovTJJ58oPT1dI0eOlNls1r333itJ6t27t4KCgjRixAjt379fiYmJmjFjhiIjI+Xj4yNJGj9+vL777jtNnTpVhw8f1uuvv67169drypQpt/rtAAAAF3XL5+R8//33GjZsmH788Ufddtttuv/++7Vz507ddtttkqTXXntNnp6eGjRokAoLCxUWFqbXX3/d/nwvLy9t3rxZEyZMkNlsVs2aNRUREaG5c+faa5o3b674+HhNmTJFixcvVuPGjbVixQpOHwcAAHa3POS8++671x2vVq2ali1bpmXLll2zpmnTptqyZct1t/Pggw/qiy++uKkeAQCA8XHtKgAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEiEHAAAYEhVnN0AAAD4WbNp8c5u4ZY4viDc2S1I4kgOAAAwKEIOAAAwJJcPOcuWLVOzZs1UrVo1hYSEaPfu3c5uCQAAVAIuHXLWrVunqKgozZo1S/v27VOHDh0UFhamnJwcZ7cGAACczKVDzquvvqqxY8dq5MiRCgoK0vLly1WjRg3FxMQ4uzUAAOBkLnt2VVFRkdLT0zV9+nT7Y56engoNDVVaWtpVn1NYWKjCwkL7/by8PEmS1Wottz5LCn8qt21XpPL8jCqSEfYH+6LyYF9ULkbYH+yLsm3fZrNdt85lQ84PP/yg4uJi+fn5OTzu5+enw4cPX/U58+fP15w5c654PDAwsFx6NBLfRc7uAKXYF5UH+6JyYX9UHhW1L86fPy9fX99rjrtsyLkZ06dPV1RUlP1+SUmJzpw5o/r168vDw8OJnd08q9WqwMBAnTx5UiaTydntuDX2ReXC/qg82BeVh1H2hc1m0/nz5xUQEHDdOpcNOQ0aNJCXl5eys7MdHs/Ozpa/v/9Vn+Pj4yMfHx+Hx+rUqVNeLVYok8nk0v/BGgn7onJhf1Qe7IvKwwj74npHcEq57MRjb29vBQcHKzk52f5YSUmJkpOTZTabndgZAACoDFz2SI4kRUVFKSIiQl26dFHXrl21aNEi5efna+TIkc5uDQAAOJlLh5whQ4YoNzdXM2fOlMViUceOHZWQkHDFZGQj8/Hx0axZs674Gg4Vj31RubA/Kg/2ReXhbvvCw/Zb518BAAC4IJedkwMAAHA9hBwAAGBIhBwAAGBIhBwAAGBIhBwAQIW5ePGis1uAGyHkuJAWLVroxx9/dHYbAFBmhYWFeuWVV9S8eXNntwI34tLr5Lib48ePq7i42Nlt4P9r0aKF9uzZo/r16zu7Fbe2Zs2aG6p74oknyrkTFBYWavbs2UpKSpK3t7emTp2qgQMHatWqVfrb3/4mLy8vTZkyxdltuo1Ro0Zp8eLFql27trNbcRrWyXEhnp6eslgsatiwobNbgdgflUXdunWvOebh4aH8/HxdvnyZPxAqQHR0tN58802FhoYqNTVVubm5GjlypHbu3Knnn39ejz32mLy8vJzdptvw8vLS6dOn3fpnFEdyXExiYuJvXpTsj3/8YwV1Azjf2bNnr/r46dOnNWfOHMXExOihhx6q4K7c04YNG7RmzRr98Y9/1KFDh9S+fXtdvnxZ+/fvl4eHh7Pbczscw+BIjkvx9PztKVQeHh78xVpBPD09tXr1akJnJXP+/Hm99NJLWrx4se6++27Nnz9fPXr0cHZbbsHb21vHjh3T7bffLkmqXr26du/erXbt2jm5M/fk6empo0eP6rbbbrtunatfjfx6OJLjYvh6pHKJiIi47jihs+JcunRJ//rXvzRv3jzVr19fq1at0uDBg53dllspLi6Wt7e3/X6VKlVUq1YtJ3aEu+6665pjNpvN8D+jCDkuhMO9lQ+h0/lsNpvWrFmjmTNn6vLly5o3b55Gjx7N3A8nsNlsevLJJ+0XfywoKND48eNVs2ZNh7qNGzc6oz239N5776levXrObsNpCDkuhG8WKxdCZ+XQvn17fffdd5o0aZImT56sGjVqKD8//4o6Ix+Sryx+fWTzz3/+s5M6Qan77rvPrf8QY06OCxk5cqSWLFni1qcDViacXVU5/HKu2tWCpzsckgeuhp9RHMlxKVOmTNGxY8eueNzX11dNmjThyEIFi4iIUPXq1Z3dhtv75JNPnN0CyiAnJ8etf+lWpKZNm7r917YcyXEhnp6e8vDwuOJrKw8PD1WrVk2TJ0/W3Llz3f4/agDOUaNGDZ04ccJ+Nk94eLhWrFihRo0aSZKys7MVEBDAUTVUGI7kuJCrHcWRpHPnzik9PV0vvPCC6tatq2effbaCO3NPpaHzejw8PHT58uUK6sg9Wa3WG6pjTk75KygocPgjbMeOHVdcq4q/qytO586db6hu37595dyJ8xByXEjTpk2v+XiHDh1kMpk0Z84cQk4F2bhx4zVDTlpampYsWaKSkpIK7sr91KlT57phkzk5lQtfq1ecAQMGOLsFpyPkGEhwcPA1j/bg1hs4cOAVjx05ckTTpk3Tpk2bNHz4cM2dO7fiG3MzzMkBrm7WrFnObsHpCDkGYrFYfnNlS5SPU6dOadasWVq9erXCwsKUkZGhtm3bOrstt/DAAw+UqX7BggUaP3686tSpUz4NuTEPDw+HIzW/vo/KoaioSEVFRW6xUONvXycALiE3N1cvvPACy9dXsLy8PEVHR6tly5bKzMxUcnKyNm3aRMCpxObNm6czZ844uw1Dstlsuuuuu1SvXj3Vq1dPFy5cUKdOnez3W7du7ewW3c6qVas0adIkrV27VpI0ffp01a5dW76+vnrooYf0448/OrnD8sWRHBfSqVOnq/5VlJeXp++//16tWrXS22+/7YTO3NPChQv10ksvyd/fX++88w7ff7sIJr6Wn1WrVjm7BfzCP/7xD/3jH//Qfffdp7i4OH322Wf64IMPNHfuXHl6emrJkiWaMWOG3njjDWe3Wm44hdyFzJkz56qPm0wmtWrVSmFhYZw+XoE8PT1VvXp1hYaGXvdzZwn7yqV27drav3+/WrRo4exWgHJ15513au7cuRo2bJj27t2rkJAQrV+/XoMGDZIkbd26VePHj9eJEyec3Gn54UiOC2ESWeXyxBNPMN8A+IWYmBgNHz7cfu0qOFdWVpbuv/9+SVKXLl1UpUoVh6/S27dvr9OnTzurvQpByHFBFy9eVFJSkr7++mtJUqtWrRQaGsrquxUsNjbW2S0AlcrYsWPVv39/+4rGAQEBSk1NVbNmzZzbmJu6dOmSQ+D09vZW1apV7ferVKli+KUVCDku5r///a/GjBmjH374weHxBg0aaOXKlXr44Yed1BmuhiXs4U5+Pfvh/PnzrBXlZF9++aUsFoukn/fP4cOHdeHCBUm64veIEXF2lQtJTU3V4MGD1b17d33++ec6c+aMzpw5o88++0zdunXT4MGDtXPnTme36TZq1Kih3Nxc+/3w8HCHQ7/Z2dn25exReXTr1o2jnnAbvXr1UseOHdWxY0f99NNP6t+/vzp27KhOnTopNDTU2e2VOyYeu5B+/fopMDBQb7755lXHn3rqKZ08eVJbtmyp4M7c06+v8PvrCa2lIYe/ZMsXl3WoPLy8vBzW6zKZTNq/f7+aN2/u5M7c041OKL7WavpGwNdVLmTnzp166aWXrjkeGRlZ5oXRUL6YmFz+uKxD5VG6Tk7p/ihdJ8fT0/FLA9YpqhhGDi83ipDjQi5evHjdv0Z9fX1VUFBQgR0BzvfLyzrYbDb169dPK1as0O233+7ErtwT6+RULgsXLtSkSZPsX89+/vnn6tKli30y8vnz5xUdHa3XX3/dmW2WK76uciHt27fXlClTNHLkyKuOx8TEaNGiRTpw4EAFd+aefuvQfHZ2tgICAjiCUMFYBwf4mZeXl06fPm3/St1kMikjI8PhK3Wj/4ziSI4LGTlypJ599ln5+fmpX79+DmPx8fGaOnWqnn/+eSd1535+69A8fz8AUkFBgdatW6f8/Hw99NBDuvPOO53dktv49c8gd/yZRMhxIc8884xSU1PVv39/tWrVSm3atJHNZtNXX32lo0ePauDAgZo8ebKz23QbHJoHHEVFRenSpUv617/+JennC0GazWZlZmaqRo0amjp1qpKSkmQ2m53cKdwFIceFeHp6asOGDVq3bp3eeecdHT58WJLUunVrzZ49W0OHDnVyh+4lIiLC2S3gGpjw7RwfffSR5s2bZ7+/du1anThxQkePHlWTJk00atQo/f3vf1d8fLwTu4Q7IeS4oCFDhmjIkCG/WbdgwQKNHz9ederUKf+mACd59NFHHe4XFBRo/PjxqlmzpsPjXEOs/GVlZSkoKMh+/6OPPtLgwYPtZ/k888wzV3zVjvK1YsUK1apVS5J0+fJlxcbGqkGDBpJ+nnhsdEw8NrBfTzLDrXWjn+t3331Xzp24t2tNxP81vl4sf3Xq1NGePXvs826aN2+uF154QaNGjZIkHT9+XG3atNHFixed2abbaNas2Q0d1Tx27FgFdOMcHMkxMPJr+Tp+/LiaNm2qxx9/nEs3OBHhpfJo06aNNm3apKioKGVmZiorK0s9evSwj584cUJ+fn5O7NC9HD9+3NktOB0hB7hJ69atU0xMjF599VX17dtXo0aNUr9+/a5Y+AzOceLECeXn56t169bskwoydepUDR06VPHx8crMzFS/fv0cVjvesmWLunbt6sQO3U9JSYliY2O1ceNGHT9+XB4eHmrRooUGDRqkESNGGH7+Gv/nAzfpscce09atW/XNN98oODhYU6ZMUWBgoKZNm6ajR486uz23URo0f2ncuHFq0aKF2rVrp7Zt2+rkyZNO6s69PPLII9qyZYt9Ta9169Y5jNeoUUNPP/20k7pzPzabTQ8//LDGjBmj//3vf2rXrp3uvvtuHT9+XE8++aQeeeQRZ7dY/mwwrFq1atm+/fZbZ7fhVrZv32578MEHbZ6enrYzZ844ux23EBISYouJibHf37p1q61KlSq2t99+25aenm4zm8220aNHO7FDwDliYmJstWvXtm3btu2KseTkZFvt2rVtq1evdkJnFYevq4BboKCgQO+9955iYmK0a9cuPfbYY6pRo4az23ILR48eVZcuXez3P/zwQw0YMEDDhw+XJM2bN++GJyfj98nKyrqhuiZNmpRzJ5Ckd955R88//7zDvKhSPXv21LRp07R27Vo98cQTTuiuYhByDKxbt272a5agfOzatUsrV67U+vXr1aJFC40aNUr/+c9/VLduXWe35jZ+fU231NRUjR492n6/RYsWslgszmjN7VzrbB7b/79IqvTzGkaXL1+u6Nbc0oEDB7Rw4cJrjvft21dLliypwI4qHiHHhVit1huqK/2Bv2XLlvJsx+3dfffdysnJ0eOPP66UlBR16NDB2S25paZNmyo9PV1NmzbVDz/8oMzMTN133332cYvFIl9fXyd26D6++OKLqz5us9n07rvvasmSJfY1W1D+zpw5c92z2fz8/HT27NkK7KjiEXJcSJ06da47E770ryUjX2ytMvnqq69Us2ZNrVmzRv/+97+vWXfmzJkK7Mr9REREKDIyUpmZmdq2bZtat26t4OBg+3hqaqratm3rxA7dx9WC/scff6xp06bp66+/1tSpU/XXv/7VCZ25p+LiYlWpcu1f815eXoY/qkbIcSGffPKJ/d9tNpv69eunFStW6Pbbb3diV+6L9Vkqh6lTp+qnn37Sxo0b5e/vrw0bNjiMf/7551zyxAn27dun6OhoffrppxozZoy2bNnCelIVzGaz6cknn5SPj89VxwsLCyu4o4rHiscurHbt2tq/fz8rGjtJfn7+FZcOQOVUXFwsLy8vZ7fhFr799ls9//zz+s9//qM//elP+vvf/87PKCdhNXCO5AA3rX379lq9erXuv/9+Z7eCa/j666+1cuVKrVmzRqdPn3Z2O4b39NNPa+XKlerRo4f27t2rjh07Orslt2bk8HKjCDnATRo0aJB69uypZ555Rv/4xz/k7e3t7JYg6aeffrKvRp2WlqYuXbooKirK2W25heXLl6tatWrKycmxX6/qavbt21eBXcGdEXJcnNGX5K7MFi5cqEcffVSjRo3S1q1b9e9//1udOnVydltua+fOnVqxYoU2bNigJk2a6KuvvtInn3yibt26Obs1tzFr1ixntwA4YE6OC3n00Ucd7m/atEk9e/a8Yl7Ixo0bK7Itt1dYWKgZM2Zo6dKleuihh644m4H9Ub5eeeUVxcTEKC8vT8OGDdOf//xndejQQVWrVtX+/fsVFBTk7BYBOAlHclzIr9f6+POf/+ykTvBLhYWFysnJkYeHh3x9fa97yiZuvejoaEVHR2vu3LlMLq7ErFar1q5dq5UrV2rv3r3Obgdugp/GLoRJZJVPUlKSRo0apUaNGik9PV1t2rRxdktu58UXX9SqVav073//W8OGDdOIESNYF6cS+eSTTxQTE6ONGzfK19fXPS4KiUqDr6sM4MSJE8rPz1fr1q3l6cmF5SvKU089pdWrV+v555/X3/72N44iOFlKSopiYmL03nvvqWXLlsrMzFRKSorD6seoGP/73/8UGxurVatW6dy5czp79qzi4uL0pz/9iXmEqFD8RnQhMTExevXVVx0eGzdunFq0aKF27dqpbdu2OnnypJO6cz+ff/65UlNTNXPmzCsCjs1m09atWzV48GAnded+HnjgAa1evVoWi0VPP/20goOD9cADD+gPf/jDFf/foHz85z//Ub9+/dSqVStlZGTolVde0alTp+Tp6al27doRcFDhCDku5K233nK48GNCQoJWrVqlNWvWaM+ePapTp47mzJnjxA7dy759+9S5c2eHx44dO6YXXnhBTZo00SOPPKKCggIndee+ateuraeeekq7du1SRkaGQkJCtGDBAme35RaGDBmiTp066fTp09qwYYMGDBjA0gpwKkKOCzl69Ki6dOliv//hhx9qwIABGj58uDp37qx58+YpOTnZiR26l9If3oWFhVq7dq169uypVq1aad68eYqKilJOTo42b97s5C6Nb9u2bQoKCrrqBWwDAwOVmJiouLg4J3TmfkaPHq1ly5apT58+Wr58ueEv/ojKj5DjQi5evGi/wrj084UHu3fvbr/fokULWSwWZ7TmltLT0/X000/L399fixYt0sCBA3Xy5El5enoqLCzMYV+h/CxatEhjx4696uft6+ur8ePHa9myZU7ozP28+eabOn36tMaNG6d33nlHjRo10oABA2Sz2VRSUuLs9uCGCDkupGnTpkpPT5ck/fDDD8rMzHSYVGmxWK44zRzlJyQkRD4+Ptq5c6f27Nmjv/zlL/Lz83N2W25n//796tOnzzXHe/fubf//BuWvevXqioiIUEpKig4ePKi7775bfn5+uu+++/T444+zbhQqFCHHhURERCgyMlIvvviiHnvsMbVu3VrBwcH28dTUVE6drUC9evXSypUrNXfuXCUkJIgTFZ0jOztbVatWveZ4lSpVlJubW4EdodSdd96pefPm6eTJk3r77bf1008/adiwYc5uC26EdXJcyNSpU/XTTz9p48aN8vf314YNGxzGP//8cw0dOtRJ3bmfxMREnTx5UjExMZowYYIuXryoIUOGSOJyGxXp9ttv16FDh9SyZcurjh84cECNGjWq4K7wS56ennr44YcVGhqqpUuXOrsduBHWyTGY4uJi1mtxko8//lgxMTF6//33FRgYqMGDB2vw4MFXnIGFW2vSpEnavn279uzZo2rVqjmMXbx4UV27dlWPHj20ZMkSJ3XoXnJzc7Vr1y55e3urV69e8vLy0qVLl/T6669rwYIFunTpkn744Qdntwk3QcgxiK+//lorV67UmjVrdPr0aWe34xaKi4v1z3/+U//9739VVFSkXr16adasWSooKNDbb7+tmJgYHThwQMXFxc5u1dCys7PVuXNneXl5aeLEiWrVqpUk6fDhw1q2bJmKi4u1b98+5ktVgM8++0z9+/eX1WqVh4eHunTpolWrVmngwIGqUqWK/vKXvygiIkLVq1d3dqtwFza4rPz8fFtMTIzt/vvvt3l5edlCQkJsCxcudHZbbmPu3Lk2T09PW+/evW0DBgywVatWzTZy5EiHmvT0dCd1516OHz9u69u3r83T09Pm4eFh8/DwsHl6etr69u1r++6775zdntt44IEHbMOGDbMdPHjQ9uyzz9o8PDxsd911l23Dhg3Obg1uiiM5Lmjnzp1asWKFNmzYoCZNmuirr77SJ598om7dujm7Nbdy55136tlnn9VTTz0l6eevq8LDw3Xx4kUur+EkZ8+e1TfffCObzaY777zTYfFMlL/69evr008/VVBQkC5evKhatWpp48aNGjBggLNbg5viJ7ELeeWVV3T33Xdr8ODBqlu3rnbs2KGDBw/Kw8ND9evXd3Z7bicrK0v9+vWz3w8NDZWHh4dOnTrlxK7cW926dXXPPfeoa9euBBwnOHv2rBo0aCDp51PJa9SowRmfcCrOrnIh0dHRio6O1ty5c5lcXAlcvnz5iomuVatW1aVLl5zUEeB8X375pX1RUpvNpiNHjig/P9+hpn379s5oDW6Ir6tcyPz587Vq1SoVFBRo2LBhGjFihNq2bauqVatq//79CgoKcnaLbsXT01N9+/aVj4+P/bFNmzapZ8+eqlmzpv0xFj+Du/D09JSHh8dV14wqfdzDw4PJ+KgwhBwXlJKSopiYGL333ntq2bKlMjMzlZKS4rD6McrfyJEjb6hu1apV5dwJUDmcOHHihuqaNm1azp0APyPkuLDz588rLi5OMTExSk9PV9euXTV48GBFRUU5uzUAbmju3Ll69tlnVaNGDWe3Akgi5BjGoUOHtHLlSq1du1Y5OTnObgeAG/Ly8tLp06fVsGFDZ7cCSOLsKpeybds2BQUFyWq1XjEWGBioxMRExcXFOaEzABDXb0OlQ8hxIYsWLdLYsWNlMpmuGPP19dX48eO1bNkyJ3QGAD/jum2oTPi6yoU0bdpUCQkJatOmzVXHDx8+rN69eysrK6uCOwOAn8+u8vX1/c2gc+bMmQrqCO6OdXJcSHZ2tqpWrXrN8SpVqig3N7cCOwIAR3PmzJGvr6+z2wAkEXJcyu23365Dhw6pZcuWVx0/cOCAGjVqVMFdAcD/GTp0KBOPUWkwJ8eF9OvXTy+88IIKCgquGLt48aJmzZql/v37O6EzAGA+Diof5uS4kOzsbHXu3FleXl6aOHGiWrVqJennuTjLli1TcXGx9u3bJz8/Pyd3CsAdeXp6ymKxcCQHlQYhx8WcOHFCEyZMUGJiov10TQ8PD4WFhWnZsmVq3ry5kzsEAKByIOS4qLNnz+qbb76RzWbTnXfeyRWXAQD4FUIOAAAwJCYeAwAAQyLkAAAAQyLkAAAAQyLkAAAAQyLkAAAAQyLkAAAAQyLkAAAAQyLkAAAAQ/p/1GSsOxB15N0AAAAASUVORK5CYII=\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "#gonna put type of payments into dummies style :)\n", + "sample_dummies = pd.get_dummies(sample[\"type\"])" + ], + "metadata": { + "id": "6HOvx_bvh65Q" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "sample = pd.concat([sample, sample_dummies], axis=1)" + ], + "metadata": { + "id": "uAtBV-IOiAD9" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "sample.drop(\"type\", axis=1, inplace = True)" + ], + "metadata": { + "id": "0NxS08OliC15" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Checking for data in columns. Can I drop these?\n", + "print(sample[\"nameDest\"].nunique())\n", + "print(sample[\"nameOrig\"].nunique())" + ], + "metadata": { + "id": "0VjPBVKbg_3j", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "169debca-fc6e-4f3d-a3cf-92eacac3a253" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "62991\n", + "99995\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(sample.dtypes)\n", + "## nameOrig and nameDest -- > objects. Not even thinking of using as features." + ], + "metadata": { + "id": "GN-ZA4eYg_1Z", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "42319517-20c1-4186-cd29-4ec8e8e3e840" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "step int64\n", + "amount float64\n", + "nameOrig object\n", + "oldbalanceOrg float64\n", + "newbalanceOrig float64\n", + "nameDest object\n", + "oldbalanceDest float64\n", + "newbalanceDest float64\n", + "isFraud float64\n", + "isFlaggedFraud float64\n", + "CASH_IN uint8\n", + "CASH_OUT uint8\n", + "DEBIT uint8\n", + "PAYMENT uint8\n", + "TRANSFER uint8\n", + "dtype: object\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Feature selection\n", + "numeric_columns = [\"step\", \"amount\", \"oldbalanceOrg\", \"oldbalanceDest\", \"newbalanceDest\", \"isFraud\", \"CASH_IN\", \"CASH_OUT\",\"DEBIT\",\n", + " \"PAYMENT\", \"TRANSFER\"]\n", + "\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# example in class with salaries dataset\n", + "\n", + "corr=np.abs(sample[numeric_columns].corr())\n", + "\n", + "#Set up mask for triangle representation\n", + "mask = np.zeros_like(corr, dtype=bool)\n", + "mask[np.triu_indices_from(mask)] = True\n", + "\n", + "# Set up the matplotlib figure\n", + "f, ax = plt.subplots(figsize=(14, 14))\n", + "# Generate a custom diverging colormap\n", + "cmap = sns.diverging_palette(220, 10, as_cmap=True)\n", + "# Draw the heatmap with the mask and correct aspect ratio\n", + "sns.heatmap(corr, mask=mask, vmax=1,square=True, linewidths=.5, cbar_kws={\"shrink\": .5},annot = corr)\n", + "\n", + "plt.show()" + ], + "metadata": { + "id": "j_f9TtaTPRSX" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Z96bJ7k97_Uu" + }, + "source": [ + "### Run a logisitc regression classifier and evaluate its accuracy." + ] + }, + { + "cell_type": "code", + "source": [ + "# define my X and y\n", + "\n", + "# Assuming 'sample' is your original DataFrame\n", + "features = sample[[\"step\", \"amount\", \"oldbalanceOrg\", \"oldbalanceDest\", \"newbalanceDest\", \"CASH_IN\", \"CASH_OUT\", \"DEBIT\", \"PAYMENT\", \"TRANSFER\"]]\n", + "\n", + "#features = sample.drop(columns = [\"isFraud\", \"nameOrig\", \"nameDest\",\"newbalanceOrig\"])\n", + "target = sample[\"isFraud\"]\n", + "\n", + "# Get the numeric and train\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.25, random_state = 0)" + ], + "metadata": { + "id": "TWu-u-gpkznE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ERAMCiPx7_Uu", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b68b6089-379a-414c-c78b-eadb8d63e156" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "accuracy_score is: 0.99956\n" + ] + } + ], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "sample_model = LogisticRegression (max_iter = 4000) #used in class the same.\n", + "sample_model.fit(X_train, y_train)\n", + "sample_model.score(X_test, y_test)\n", + "\n", + "print(\"accuracy_score is:\", sample_model.score(X_test, y_test))" + ] + }, + { + "cell_type": "code", + "source": [ + "# confusion_matrix, classification_report\n", + "from sklearn.metrics import confusion_matrix, classification_report\n", + "\n", + "pred = sample_model.predict(X_test)\n", + "print(\"classification_report\\n\",classification_report(y_test, pred),\"\\n\\nconfusion_matrix\") # LOL - recall 1.00\n", + "confusion_matrix(y_true = y_test, y_pred = pred)" + ], + "metadata": { + "id": "Lu3syibZn4BZ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# true negative false positive\n", + "# false negative true positive\n", + "\n", + "### The thing is, this is super wrong and I would day it's because of our inbalanced sample [0 and 1's] ---> ### Over sampling" + ], + "metadata": { + "id": "mVl6t_tJ4UoK" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "train = pd.concat([X_train, y_train], axis=1) ### ONLY WITH THE TRAIN\n", + "print(train.shape)\n", + "# now, who is our minority and our majority class? Let's separate them\n", + "no_fraud = train[train[\"isFraud\"] == 0]\n", + "yes_fraud = train[train[\"isFraud\"] == 1]\n", + "print(no_fraud.shape)\n", + "print(yes_fraud.shape)\n", + "print(\"Holy guacamole!\")" + ], + "metadata": { + "id": "D13G0trhmNTB", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "79609231-d25a-4b2a-f5b7-182721840ea2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(75000, 11)\n", + "(74959, 11)\n", + "(41, 11)\n", + "Holy guacamole!\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.utils import resample\n", + "\n", + "yes_fraud_oversampled = resample(yes_fraud, # datapoints I want to resample\n", + " replace = True, # in oversampling always True\n", + " n_samples = len(no_fraud), # len of the majority class\n", + " random_state = 0)\n", + "\n", + "# Let's check these babies now\n", + "print(yes_fraud_oversampled.shape)\n", + "print(no_fraud.shape)" + ], + "metadata": { + "id": "4ovceMnxpSQt", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "abd45dcc-3a19-4b8a-d94c-75c69aa31284" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(74959, 11)\n", + "(74959, 11)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# And let's put them together now\n", + "train_oversampled = pd.concat([no_fraud, yes_fraud_oversampled])\n", + "train_oversampled.head()\n", + "\n", + "# so we can divide again :D\n", + "# let's divided again\n", + "X_train_over = train_oversampled.drop(columns = [\"isFraud\"])\n", + "y_train_over = train_oversampled[\"isFraud\"] # target here" + ], + "metadata": { + "id": "SIHnx_LTp2lI" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "### Now let's test it:\n", + "sample_model = LogisticRegression (max_iter = 4000) #used in class the same.\n", + "sample_model.fit(X_train_over, y_train_over)\n", + "sample_model.score(X_test, y_test) #### just to test this\n", + "print(\"Accuracy_score is:\", sample_model.score(X_test, y_test))\n", + "print(\"Not a great score, but probably more reliable!\")" + ], + "metadata": { + "id": "UVMBmqiIqUIM", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "b1b770fe-9ff1-4819-9e58-851018964842" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy_score is: 0.55584\n", + "Not a great score, but probably more reliable!\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "pred = sample_model.predict(X_test)\n", + "print(classification_report(y_true = y_test, y_pred = pred)) ### Remember that we did not oversample the test, so it's y_test/y_pred" + ], + "metadata": { + "id": "IG2SkmRVqUFc" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.metrics import recall_score\n", + "recall_score(y_test, pred) #### not bad, hein?" + ], + "metadata": { + "id": "nKepCQS_qUAE" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cjbRNnsT7_Uu" + }, + "source": [ + "### Now pick a model of your choice and evaluate its accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "n9wWucgV7_Uv", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "75063e08-c356-4071-855e-0fb94a280a6c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Accuracy_score is: 0.99924\n" + ] + } + ], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "\n", + "sample_model = DecisionTreeClassifier(random_state = 1)\n", + "sample_model.fit(X_train_over, y_train_over)\n", + "print(\"Accuracy_score is:\", sample_model.score(X_test, y_test))\n", + "# not good i think" + ] + }, + { + "cell_type": "code", + "source": [ + "pred = sample_model.predict(X_test)\n", + "print(classification_report(y_true = y_test, y_pred = pred)) ### Remember that we did not oversample the test, so it's y_test/y_pred\n", + "print(\"Recall score is:\",recall_score(y_test, pred))" + ], + "metadata": { + "id": "8R7xkyVwS0qb" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "a2fECt54S0nx" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u8J_5Cmt7_Uv" + }, + "source": [ + "### Which model worked better and how do you know?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QIt8cE8A7_Uw" + }, + "outputs": [], + "source": [ + "## Just gonna open a ticket for this :D kkkkk" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1UqblAe37_Uw" + }, + "source": [ + "### Note: before doing the first commit, make sure you don't include the large csv file, either by adding it to .gitignore, or by deleting it." + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "fQR6p2LXhWjJ" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/.gitignore b/your-code/.gitignore similarity index 100% rename from .gitignore rename to your-code/.gitignore diff --git a/README.md b/your-code/README.md similarity index 100% rename from README.md rename to your-code/README.md