diff --git a/your-code/lab_imbalance.ipynb b/your-code/lab_imbalance.ipynb
index a3a5359..4ef0464 100644
--- a/your-code/lab_imbalance.ipynb
+++ b/your-code/lab_imbalance.ipynb
@@ -28,11 +28,350 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 35,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here"
+ "import pandas as pd\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Your response here\n",
+ "data = pd.read_csv(\"data.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " step | \n",
+ " type | \n",
+ " amount | \n",
+ " nameOrig | \n",
+ " oldbalanceOrg | \n",
+ " newbalanceOrig | \n",
+ " nameDest | \n",
+ " oldbalanceDest | \n",
+ " newbalanceDest | \n",
+ " isFraud | \n",
+ " isFlaggedFraud | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " PAYMENT | \n",
+ " 9839.64 | \n",
+ " C1231006815 | \n",
+ " 170136.0 | \n",
+ " 160296.36 | \n",
+ " M1979787155 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " PAYMENT | \n",
+ " 1864.28 | \n",
+ " C1666544295 | \n",
+ " 21249.0 | \n",
+ " 19384.72 | \n",
+ " M2044282225 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1 | \n",
+ " TRANSFER | \n",
+ " 181.00 | \n",
+ " C1305486145 | \n",
+ " 181.0 | \n",
+ " 0.00 | \n",
+ " C553264065 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1 | \n",
+ " CASH_OUT | \n",
+ " 181.00 | \n",
+ " C840083671 | \n",
+ " 181.0 | \n",
+ " 0.00 | \n",
+ " C38997010 | \n",
+ " 21182.0 | \n",
+ " 0.0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 1 | \n",
+ " PAYMENT | \n",
+ " 11668.14 | \n",
+ " C2048537720 | \n",
+ " 41554.0 | \n",
+ " 29885.86 | \n",
+ " M1230701703 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " step type amount nameOrig oldbalanceOrg newbalanceOrig \\\n",
+ "0 1 PAYMENT 9839.64 C1231006815 170136.0 160296.36 \n",
+ "1 1 PAYMENT 1864.28 C1666544295 21249.0 19384.72 \n",
+ "2 1 TRANSFER 181.00 C1305486145 181.0 0.00 \n",
+ "3 1 CASH_OUT 181.00 C840083671 181.0 0.00 \n",
+ "4 1 PAYMENT 11668.14 C2048537720 41554.0 29885.86 \n",
+ "\n",
+ " nameDest oldbalanceDest newbalanceDest isFraud isFlaggedFraud \n",
+ "0 M1979787155 0.0 0.0 0 0 \n",
+ "1 M2044282225 0.0 0.0 0 0 \n",
+ "2 C553264065 0.0 0.0 1 0 \n",
+ "3 C38997010 21182.0 0.0 1 0 \n",
+ "4 M1230701703 0.0 0.0 0 0 "
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(6362620, 11)"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " step | \n",
+ " type | \n",
+ " amount | \n",
+ " nameOrig | \n",
+ " oldbalanceOrg | \n",
+ " newbalanceOrig | \n",
+ " nameDest | \n",
+ " oldbalanceDest | \n",
+ " newbalanceDest | \n",
+ " isFraud | \n",
+ " isFlaggedFraud | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 5870199 | \n",
+ " 403 | \n",
+ " PAYMENT | \n",
+ " 17509.95 | \n",
+ " C482191795 | \n",
+ " 303892.08 | \n",
+ " 286382.13 | \n",
+ " M344648530 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4193018 | \n",
+ " 305 | \n",
+ " CASH_OUT | \n",
+ " 164344.44 | \n",
+ " C1742479150 | \n",
+ " 48329.60 | \n",
+ " 0.00 | \n",
+ " C1268033679 | \n",
+ " 204610.35 | \n",
+ " 368954.79 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 314193 | \n",
+ " 16 | \n",
+ " CASH_IN | \n",
+ " 182877.50 | \n",
+ " C1625640897 | \n",
+ " 3824909.55 | \n",
+ " 4007787.05 | \n",
+ " C2141776586 | \n",
+ " 3283310.23 | \n",
+ " 2965018.09 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 5790096 | \n",
+ " 401 | \n",
+ " TRANSFER | \n",
+ " 136487.99 | \n",
+ " C103052026 | \n",
+ " 2052.76 | \n",
+ " 0.00 | \n",
+ " C937885086 | \n",
+ " 2215463.06 | \n",
+ " 2351951.05 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3911997 | \n",
+ " 284 | \n",
+ " CASH_IN | \n",
+ " 217344.37 | \n",
+ " C495572889 | \n",
+ " 4917976.08 | \n",
+ " 5135320.46 | \n",
+ " C1487826082 | \n",
+ " 11181782.39 | \n",
+ " 10964438.01 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " step type amount nameOrig oldbalanceOrg \\\n",
+ "5870199 403 PAYMENT 17509.95 C482191795 303892.08 \n",
+ "4193018 305 CASH_OUT 164344.44 C1742479150 48329.60 \n",
+ "314193 16 CASH_IN 182877.50 C1625640897 3824909.55 \n",
+ "5790096 401 TRANSFER 136487.99 C103052026 2052.76 \n",
+ "3911997 284 CASH_IN 217344.37 C495572889 4917976.08 \n",
+ "\n",
+ " newbalanceOrig nameDest oldbalanceDest newbalanceDest isFraud \\\n",
+ "5870199 286382.13 M344648530 0.00 0.00 0 \n",
+ "4193018 0.00 C1268033679 204610.35 368954.79 0 \n",
+ "314193 4007787.05 C2141776586 3283310.23 2965018.09 0 \n",
+ "5790096 0.00 C937885086 2215463.06 2351951.05 0 \n",
+ "3911997 5135320.46 C1487826082 11181782.39 10964438.01 0 \n",
+ "\n",
+ " isFlaggedFraud \n",
+ "5870199 0 \n",
+ "4193018 0 \n",
+ "314193 0 \n",
+ "5790096 0 \n",
+ "3911997 0 "
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "n = 100000\n",
+ "data_sample = data.sample(n=n, axis=0)\n",
+ "\n",
+ "data_sample.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(100000, 11)"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_sample.shape"
]
},
{
@@ -44,7 +383,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
@@ -60,11 +399,599 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "step int64\n",
+ "type object\n",
+ "amount float64\n",
+ "nameOrig object\n",
+ "oldbalanceOrg float64\n",
+ "newbalanceOrig float64\n",
+ "nameDest object\n",
+ "oldbalanceDest float64\n",
+ "newbalanceDest float64\n",
+ "isFraud int64\n",
+ "isFlaggedFraud int64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Your code here\n",
+ "data_sample.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "step 0\n",
+ "type 0\n",
+ "amount 0\n",
+ "nameOrig 0\n",
+ "oldbalanceOrg 0\n",
+ "newbalanceOrig 0\n",
+ "nameDest 0\n",
+ "oldbalanceDest 0\n",
+ "newbalanceDest 0\n",
+ "isFraud 0\n",
+ "isFlaggedFraud 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 43,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_sample.isnull().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "CASH_OUT 35351\n",
+ "PAYMENT 33775\n",
+ "CASH_IN 21904\n",
+ "TRANSFER 8311\n",
+ "DEBIT 659\n",
+ "Name: type, dtype: int64"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data_sample[\"type\"].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dummies = pd.get_dummies(data_sample[\"type\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here\n"
+ "new_data = pd.concat([data_sample, dummies], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "new_data.drop(\"type\", inplace = True, axis = 1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "new_data.drop(\"nameOrig\", inplace = True, axis = 1) #deleted because is a string"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "new_data.drop(\"nameDest\", inplace = True, axis = 1) #deleted because is a string"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "new_data.drop(\"isFlaggedFraud\", inplace = True, axis = 1) #deleted because only has values with 0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " step | \n",
+ " amount | \n",
+ " oldbalanceOrg | \n",
+ " newbalanceOrig | \n",
+ " oldbalanceDest | \n",
+ " newbalanceDest | \n",
+ " isFraud | \n",
+ " CASH_IN | \n",
+ " CASH_OUT | \n",
+ " DEBIT | \n",
+ " PAYMENT | \n",
+ " TRANSFER | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 5870199 | \n",
+ " 403 | \n",
+ " 17509.95 | \n",
+ " 303892.08 | \n",
+ " 286382.13 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4193018 | \n",
+ " 305 | \n",
+ " 164344.44 | \n",
+ " 48329.60 | \n",
+ " 0.00 | \n",
+ " 204610.35 | \n",
+ " 368954.79 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 314193 | \n",
+ " 16 | \n",
+ " 182877.50 | \n",
+ " 3824909.55 | \n",
+ " 4007787.05 | \n",
+ " 3283310.23 | \n",
+ " 2965018.09 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 5790096 | \n",
+ " 401 | \n",
+ " 136487.99 | \n",
+ " 2052.76 | \n",
+ " 0.00 | \n",
+ " 2215463.06 | \n",
+ " 2351951.05 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 3911997 | \n",
+ " 284 | \n",
+ " 217344.37 | \n",
+ " 4917976.08 | \n",
+ " 5135320.46 | \n",
+ " 11181782.39 | \n",
+ " 10964438.01 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " step amount oldbalanceOrg newbalanceOrig oldbalanceDest \\\n",
+ "5870199 403 17509.95 303892.08 286382.13 0.00 \n",
+ "4193018 305 164344.44 48329.60 0.00 204610.35 \n",
+ "314193 16 182877.50 3824909.55 4007787.05 3283310.23 \n",
+ "5790096 401 136487.99 2052.76 0.00 2215463.06 \n",
+ "3911997 284 217344.37 4917976.08 5135320.46 11181782.39 \n",
+ "\n",
+ " newbalanceDest isFraud CASH_IN CASH_OUT DEBIT PAYMENT TRANSFER \n",
+ "5870199 0.00 0 0 0 0 1 0 \n",
+ "4193018 368954.79 0 0 1 0 0 0 \n",
+ "314193 2965018.09 0 1 0 0 0 0 \n",
+ "5790096 2351951.05 0 0 0 0 0 1 \n",
+ "3911997 10964438.01 0 1 0 0 0 0 "
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "new_data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "step int64\n",
+ "amount float64\n",
+ "oldbalanceOrg float64\n",
+ "newbalanceOrig float64\n",
+ "oldbalanceDest float64\n",
+ "newbalanceDest float64\n",
+ "isFraud int64\n",
+ "CASH_IN uint8\n",
+ "CASH_OUT uint8\n",
+ "DEBIT uint8\n",
+ "PAYMENT uint8\n",
+ "TRANSFER uint8\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "new_data.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " step | \n",
+ " amount | \n",
+ " oldbalanceOrg | \n",
+ " newbalanceOrig | \n",
+ " oldbalanceDest | \n",
+ " newbalanceDest | \n",
+ " isFraud | \n",
+ " CASH_IN | \n",
+ " CASH_OUT | \n",
+ " DEBIT | \n",
+ " PAYMENT | \n",
+ " TRANSFER | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | step | \n",
+ " 1.000000 | \n",
+ " 0.022357 | \n",
+ " -0.013393 | \n",
+ " -0.013325 | \n",
+ " 0.026208 | \n",
+ " 0.024380 | \n",
+ " 0.035586 | \n",
+ " 0.007531 | \n",
+ " -0.018334 | \n",
+ " 0.001295 | \n",
+ " 0.008759 | \n",
+ " 0.005080 | \n",
+ "
\n",
+ " \n",
+ " | amount | \n",
+ " 0.022357 | \n",
+ " 1.000000 | \n",
+ " -0.004457 | \n",
+ " -0.009061 | \n",
+ " 0.271424 | \n",
+ " 0.433445 | \n",
+ " 0.073067 | \n",
+ " -0.010418 | \n",
+ " -0.003892 | \n",
+ " -0.022992 | \n",
+ " -0.192782 | \n",
+ " 0.359373 | \n",
+ "
\n",
+ " \n",
+ " | oldbalanceOrg | \n",
+ " -0.013393 | \n",
+ " -0.004457 | \n",
+ " 1.000000 | \n",
+ " 0.998834 | \n",
+ " 0.064978 | \n",
+ " 0.041942 | \n",
+ " 0.007765 | \n",
+ " 0.508080 | \n",
+ " -0.201436 | \n",
+ " -0.021589 | \n",
+ " -0.189429 | \n",
+ " -0.081529 | \n",
+ "
\n",
+ " \n",
+ " | newbalanceOrig | \n",
+ " -0.013325 | \n",
+ " -0.009061 | \n",
+ " 0.998834 | \n",
+ " 1.000000 | \n",
+ " 0.066294 | \n",
+ " 0.041518 | \n",
+ " -0.009859 | \n",
+ " 0.528501 | \n",
+ " -0.211654 | \n",
+ " -0.021998 | \n",
+ " -0.193733 | \n",
+ " -0.086935 | \n",
+ "
\n",
+ " \n",
+ " | oldbalanceDest | \n",
+ " 0.026208 | \n",
+ " 0.271424 | \n",
+ " 0.064978 | \n",
+ " 0.066294 | \n",
+ " 1.000000 | \n",
+ " 0.978410 | \n",
+ " -0.005100 | \n",
+ " 0.071319 | \n",
+ " 0.085454 | \n",
+ " 0.011058 | \n",
+ " -0.224243 | \n",
+ " 0.126101 | \n",
+ "
\n",
+ " \n",
+ " | newbalanceDest | \n",
+ " 0.024380 | \n",
+ " 0.433445 | \n",
+ " 0.041942 | \n",
+ " 0.041518 | \n",
+ " 0.978410 | \n",
+ " 1.000000 | \n",
+ " 0.002068 | \n",
+ " 0.032236 | \n",
+ " 0.093539 | \n",
+ " 0.008347 | \n",
+ " -0.232754 | \n",
+ " 0.186032 | \n",
+ "
\n",
+ " \n",
+ " | isFraud | \n",
+ " 0.035586 | \n",
+ " 0.073067 | \n",
+ " 0.007765 | \n",
+ " -0.009859 | \n",
+ " -0.005100 | \n",
+ " 0.002068 | \n",
+ " 1.000000 | \n",
+ " -0.018736 | \n",
+ " 0.017057 | \n",
+ " -0.002881 | \n",
+ " -0.025265 | \n",
+ " 0.042662 | \n",
+ "
\n",
+ " \n",
+ " | CASH_IN | \n",
+ " 0.007531 | \n",
+ " -0.010418 | \n",
+ " 0.508080 | \n",
+ " 0.528501 | \n",
+ " 0.071319 | \n",
+ " 0.032236 | \n",
+ " -0.018736 | \n",
+ " 1.000000 | \n",
+ " -0.391622 | \n",
+ " -0.043135 | \n",
+ " -0.378211 | \n",
+ " -0.159447 | \n",
+ "
\n",
+ " \n",
+ " | CASH_OUT | \n",
+ " -0.018334 | \n",
+ " -0.003892 | \n",
+ " -0.201436 | \n",
+ " -0.211654 | \n",
+ " 0.085454 | \n",
+ " 0.093539 | \n",
+ " 0.017057 | \n",
+ " -0.391622 | \n",
+ " 1.000000 | \n",
+ " -0.060228 | \n",
+ " -0.528088 | \n",
+ " -0.222632 | \n",
+ "
\n",
+ " \n",
+ " | DEBIT | \n",
+ " 0.001295 | \n",
+ " -0.022992 | \n",
+ " -0.021589 | \n",
+ " -0.021998 | \n",
+ " 0.011058 | \n",
+ " 0.008347 | \n",
+ " -0.002881 | \n",
+ " -0.043135 | \n",
+ " -0.060228 | \n",
+ " 1.000000 | \n",
+ " -0.058165 | \n",
+ " -0.024521 | \n",
+ "
\n",
+ " \n",
+ " | PAYMENT | \n",
+ " 0.008759 | \n",
+ " -0.192782 | \n",
+ " -0.189429 | \n",
+ " -0.193733 | \n",
+ " -0.224243 | \n",
+ " -0.232754 | \n",
+ " -0.025265 | \n",
+ " -0.378211 | \n",
+ " -0.528088 | \n",
+ " -0.058165 | \n",
+ " 1.000000 | \n",
+ " -0.215008 | \n",
+ "
\n",
+ " \n",
+ " | TRANSFER | \n",
+ " 0.005080 | \n",
+ " 0.359373 | \n",
+ " -0.081529 | \n",
+ " -0.086935 | \n",
+ " 0.126101 | \n",
+ " 0.186032 | \n",
+ " 0.042662 | \n",
+ " -0.159447 | \n",
+ " -0.222632 | \n",
+ " -0.024521 | \n",
+ " -0.215008 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " step amount oldbalanceOrg newbalanceOrig \\\n",
+ "step 1.000000 0.022357 -0.013393 -0.013325 \n",
+ "amount 0.022357 1.000000 -0.004457 -0.009061 \n",
+ "oldbalanceOrg -0.013393 -0.004457 1.000000 0.998834 \n",
+ "newbalanceOrig -0.013325 -0.009061 0.998834 1.000000 \n",
+ "oldbalanceDest 0.026208 0.271424 0.064978 0.066294 \n",
+ "newbalanceDest 0.024380 0.433445 0.041942 0.041518 \n",
+ "isFraud 0.035586 0.073067 0.007765 -0.009859 \n",
+ "CASH_IN 0.007531 -0.010418 0.508080 0.528501 \n",
+ "CASH_OUT -0.018334 -0.003892 -0.201436 -0.211654 \n",
+ "DEBIT 0.001295 -0.022992 -0.021589 -0.021998 \n",
+ "PAYMENT 0.008759 -0.192782 -0.189429 -0.193733 \n",
+ "TRANSFER 0.005080 0.359373 -0.081529 -0.086935 \n",
+ "\n",
+ " oldbalanceDest newbalanceDest isFraud CASH_IN CASH_OUT \\\n",
+ "step 0.026208 0.024380 0.035586 0.007531 -0.018334 \n",
+ "amount 0.271424 0.433445 0.073067 -0.010418 -0.003892 \n",
+ "oldbalanceOrg 0.064978 0.041942 0.007765 0.508080 -0.201436 \n",
+ "newbalanceOrig 0.066294 0.041518 -0.009859 0.528501 -0.211654 \n",
+ "oldbalanceDest 1.000000 0.978410 -0.005100 0.071319 0.085454 \n",
+ "newbalanceDest 0.978410 1.000000 0.002068 0.032236 0.093539 \n",
+ "isFraud -0.005100 0.002068 1.000000 -0.018736 0.017057 \n",
+ "CASH_IN 0.071319 0.032236 -0.018736 1.000000 -0.391622 \n",
+ "CASH_OUT 0.085454 0.093539 0.017057 -0.391622 1.000000 \n",
+ "DEBIT 0.011058 0.008347 -0.002881 -0.043135 -0.060228 \n",
+ "PAYMENT -0.224243 -0.232754 -0.025265 -0.378211 -0.528088 \n",
+ "TRANSFER 0.126101 0.186032 0.042662 -0.159447 -0.222632 \n",
+ "\n",
+ " DEBIT PAYMENT TRANSFER \n",
+ "step 0.001295 0.008759 0.005080 \n",
+ "amount -0.022992 -0.192782 0.359373 \n",
+ "oldbalanceOrg -0.021589 -0.189429 -0.081529 \n",
+ "newbalanceOrig -0.021998 -0.193733 -0.086935 \n",
+ "oldbalanceDest 0.011058 -0.224243 0.126101 \n",
+ "newbalanceDest 0.008347 -0.232754 0.186032 \n",
+ "isFraud -0.002881 -0.025265 0.042662 \n",
+ "CASH_IN -0.043135 -0.378211 -0.159447 \n",
+ "CASH_OUT -0.060228 -0.528088 -0.222632 \n",
+ "DEBIT 1.000000 -0.058165 -0.024521 \n",
+ "PAYMENT -0.058165 1.000000 -0.215008 \n",
+ "TRANSFER -0.024521 -0.215008 1.000000 "
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "new_data.corr()"
]
},
{
@@ -76,11 +1003,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 57,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.9985\n",
+ "Training data accuracy was 0.9987\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here"
+ "# Your code here\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.metrics import accuracy_score\n",
+ "\n",
+ "features = new_data.drop(\"isFraud\", axis = 1)\n",
+ "target = new_data[\"isFraud\"]\n",
+ "\n",
+ "X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = 0.20, random_state = 0)\n",
+ "\n",
+ "log_reg = LogisticRegression()\n",
+ "log_reg.fit(X_train, y_train)\n",
+ "\n",
+ "print(log_reg.score(X_test, y_test))\n",
+ "print(f\"Training data accuracy was {log_reg.score(X_train, y_train)}\")"
]
},
{
@@ -92,11 +1042,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 59,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.5383219853506562\n",
+ "Training data accuracy was 0.9989275131167832\n"
+ ]
+ }
+ ],
"source": [
- "# Your code here"
+ "# Your code here\n",
+ "#Extreme Gradient Boosting\n",
+ "import xgboost\n",
+ "\n",
+ "xgb_reg = xgboost.XGBRegressor(max_depth = 5, n_estimators = 500)\n",
+ "xgb_reg.fit(X_train, y_train)\n",
+ "\n",
+ "print(xgb_reg.score(X_test, y_test))\n",
+ "print(f\"Training data accuracy was {xgb_reg.score(X_train, y_train)}\")"
]
},
{
@@ -108,11 +1075,11 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "# Your response here"
+ "# Your response here: Logistic logisitc regression worked better. Extreme Gradient Boosting is overfitting."
]
},
{
@@ -125,7 +1092,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -139,7 +1106,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.8"
+ "version": "3.10.9"
}
},
"nbformat": 4,