diff --git a/your-code/lab_imbalance.ipynb b/your-code/lab_imbalance.ipynb
index a3a5359..21af561 100644
--- a/your-code/lab_imbalance.ipynb
+++ b/your-code/lab_imbalance.ipynb
@@ -28,18 +28,199 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here"
+    "import pandas as pd"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "### What is the distribution of the outcome? "
+    "entire_data=pd.read_csv(\"C:/Users/milena.xavier/Downloads/archive (2)/PS_20174392719_1491204439457_log.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample=entire_data.sample(n=100000, random_state=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>step</th>\n",
+       "      <th>type</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>nameOrig</th>\n",
+       "      <th>oldbalanceOrg</th>\n",
+       "      <th>newbalanceOrig</th>\n",
+       "      <th>nameDest</th>\n",
+       "      <th>oldbalanceDest</th>\n",
+       "      <th>newbalanceDest</th>\n",
+       "      <th>isFraud</th>\n",
+       "      <th>isFlaggedFraud</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>6322570</th>\n",
+       "      <td>688</td>\n",
+       "      <td>CASH_IN</td>\n",
+       "      <td>23557.12</td>\n",
+       "      <td>C867750533</td>\n",
+       "      <td>8059.00</td>\n",
+       "      <td>31616.12</td>\n",
+       "      <td>C1026934669</td>\n",
+       "      <td>169508.66</td>\n",
+       "      <td>145951.53</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3621196</th>\n",
+       "      <td>274</td>\n",
+       "      <td>PAYMENT</td>\n",
+       "      <td>6236.13</td>\n",
+       "      <td>C601099070</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>M701283411</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1226256</th>\n",
+       "      <td>133</td>\n",
+       "      <td>PAYMENT</td>\n",
+       "      <td>33981.87</td>\n",
+       "      <td>C279540931</td>\n",
+       "      <td>18745.72</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>M577905776</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2803274</th>\n",
+       "      <td>225</td>\n",
+       "      <td>CASH_OUT</td>\n",
+       "      <td>263006.42</td>\n",
+       "      <td>C11675531</td>\n",
+       "      <td>20072.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>C529577791</td>\n",
+       "      <td>390253.56</td>\n",
+       "      <td>653259.98</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3201247</th>\n",
+       "      <td>249</td>\n",
+       "      <td>CASH_OUT</td>\n",
+       "      <td>152013.74</td>\n",
+       "      <td>C530649214</td>\n",
+       "      <td>20765.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>C1304175579</td>\n",
+       "      <td>252719.19</td>\n",
+       "      <td>404732.93</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         step      type     amount    nameOrig  oldbalanceOrg  newbalanceOrig  \\\n",
+       "6322570   688   CASH_IN   23557.12  C867750533        8059.00        31616.12   \n",
+       "3621196   274   PAYMENT    6236.13  C601099070           0.00            0.00   \n",
+       "1226256   133   PAYMENT   33981.87  C279540931       18745.72            0.00   \n",
+       "2803274   225  CASH_OUT  263006.42   C11675531       20072.00            0.00   \n",
+       "3201247   249  CASH_OUT  152013.74  C530649214       20765.00            0.00   \n",
+       "\n",
+       "            nameDest  oldbalanceDest  newbalanceDest  isFraud  isFlaggedFraud  \n",
+       "6322570  C1026934669       169508.66       145951.53        0               0  \n",
+       "3621196   M701283411            0.00            0.00        0               0  \n",
+       "1226256   M577905776            0.00            0.00        0               0  \n",
+       "2803274   C529577791       390253.56       653259.98        0               0  \n",
+       "3201247  C1304175579       252719.19       404732.93        0               0  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sample.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "step                int64\n",
+       "type               object\n",
+       "amount            float64\n",
+       "nameOrig           object\n",
+       "oldbalanceOrg     float64\n",
+       "newbalanceOrig    float64\n",
+       "nameDest           object\n",
+       "oldbalanceDest    float64\n",
+       "newbalanceDest    float64\n",
+       "isFraud             int64\n",
+       "isFlaggedFraud      int64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sample.dtypes"
    ]
   },
   {
@@ -48,7 +229,559 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your response here"
+    "#there are 3 columns with text. Working on the first one:Type"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "CASH_OUT    35209\n",
+       "PAYMENT     33694\n",
+       "CASH_IN     21987\n",
+       "TRANSFER     8416\n",
+       "DEBIT         694\n",
+       "Name: type, dtype: int64"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sample[\"type\"].value_counts()\n",
+    "#there are only 5 types, I could create dummies with this column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample_dummy=pd.get_dummies(sample, columns=['type'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample_dummy.drop(columns=\"type\", axis=1, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>step</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>nameOrig</th>\n",
+       "      <th>oldbalanceOrg</th>\n",
+       "      <th>newbalanceOrig</th>\n",
+       "      <th>nameDest</th>\n",
+       "      <th>oldbalanceDest</th>\n",
+       "      <th>newbalanceDest</th>\n",
+       "      <th>isFraud</th>\n",
+       "      <th>isFlaggedFraud</th>\n",
+       "      <th>type_CASH_IN</th>\n",
+       "      <th>type_CASH_OUT</th>\n",
+       "      <th>type_DEBIT</th>\n",
+       "      <th>type_PAYMENT</th>\n",
+       "      <th>type_TRANSFER</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>6322570</th>\n",
+       "      <td>688</td>\n",
+       "      <td>23557.12</td>\n",
+       "      <td>C867750533</td>\n",
+       "      <td>8059.00</td>\n",
+       "      <td>31616.12</td>\n",
+       "      <td>C1026934669</td>\n",
+       "      <td>169508.66</td>\n",
+       "      <td>145951.53</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3621196</th>\n",
+       "      <td>274</td>\n",
+       "      <td>6236.13</td>\n",
+       "      <td>C601099070</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>M701283411</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1226256</th>\n",
+       "      <td>133</td>\n",
+       "      <td>33981.87</td>\n",
+       "      <td>C279540931</td>\n",
+       "      <td>18745.72</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>M577905776</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2803274</th>\n",
+       "      <td>225</td>\n",
+       "      <td>263006.42</td>\n",
+       "      <td>C11675531</td>\n",
+       "      <td>20072.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>C529577791</td>\n",
+       "      <td>390253.56</td>\n",
+       "      <td>653259.98</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3201247</th>\n",
+       "      <td>249</td>\n",
+       "      <td>152013.74</td>\n",
+       "      <td>C530649214</td>\n",
+       "      <td>20765.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>C1304175579</td>\n",
+       "      <td>252719.19</td>\n",
+       "      <td>404732.93</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         step     amount    nameOrig  oldbalanceOrg  newbalanceOrig  \\\n",
+       "6322570   688   23557.12  C867750533        8059.00        31616.12   \n",
+       "3621196   274    6236.13  C601099070           0.00            0.00   \n",
+       "1226256   133   33981.87  C279540931       18745.72            0.00   \n",
+       "2803274   225  263006.42   C11675531       20072.00            0.00   \n",
+       "3201247   249  152013.74  C530649214       20765.00            0.00   \n",
+       "\n",
+       "            nameDest  oldbalanceDest  newbalanceDest  isFraud  isFlaggedFraud  \\\n",
+       "6322570  C1026934669       169508.66       145951.53        0               0   \n",
+       "3621196   M701283411            0.00            0.00        0               0   \n",
+       "1226256   M577905776            0.00            0.00        0               0   \n",
+       "2803274   C529577791       390253.56       653259.98        0               0   \n",
+       "3201247  C1304175579       252719.19       404732.93        0               0   \n",
+       "\n",
+       "         type_CASH_IN  type_CASH_OUT  type_DEBIT  type_PAYMENT  type_TRANSFER  \n",
+       "6322570             1              0           0             0              0  \n",
+       "3621196             0              0           0             1              0  \n",
+       "1226256             0              0           0             1              0  \n",
+       "2803274             0              1           0             0              0  \n",
+       "3201247             0              1           0             0              0  "
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sample_dummy.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<bound method DataFrame.nunique of             nameOrig\n",
+       "6322570   C867750533\n",
+       "3621196   C601099070\n",
+       "1226256   C279540931\n",
+       "2803274    C11675531\n",
+       "3201247   C530649214\n",
+       "...              ...\n",
+       "4225513  C1059072914\n",
+       "4989642  C1543222456\n",
+       "2099701   C171437065\n",
+       "249322   C1831253634\n",
+       "4679267    C41194212\n",
+       "\n",
+       "[100000 rows x 1 columns]>"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#Now, the columns NameOrig and NameDest:\n",
+    "sample_dummy[[\"nameOrig\"]].nunique"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<bound method DataFrame.nunique of             nameDest\n",
+       "6322570  C1026934669\n",
+       "3621196   M701283411\n",
+       "1226256   M577905776\n",
+       "2803274   C529577791\n",
+       "3201247  C1304175579\n",
+       "...              ...\n",
+       "4225513   C759673946\n",
+       "4989642   M441713839\n",
+       "2099701  C1175649845\n",
+       "249322    M912660596\n",
+       "4679267   C724844824\n",
+       "\n",
+       "[100000 rows x 1 columns]>"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sample_dummy[[\"nameDest\"]].nunique"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#as we can see, it is impossible to create dummies with those columns since there are many unique values. Thus, I am going to discard them\n",
+    "\n",
+    "sample_dummy.drop(columns=[\"nameDest\", \"nameOrig\"], axis=1, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>step</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>oldbalanceOrg</th>\n",
+       "      <th>newbalanceOrig</th>\n",
+       "      <th>oldbalanceDest</th>\n",
+       "      <th>newbalanceDest</th>\n",
+       "      <th>isFraud</th>\n",
+       "      <th>isFlaggedFraud</th>\n",
+       "      <th>type_CASH_IN</th>\n",
+       "      <th>type_CASH_OUT</th>\n",
+       "      <th>type_DEBIT</th>\n",
+       "      <th>type_PAYMENT</th>\n",
+       "      <th>type_TRANSFER</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>6322570</th>\n",
+       "      <td>688</td>\n",
+       "      <td>23557.12</td>\n",
+       "      <td>8059.00</td>\n",
+       "      <td>31616.12</td>\n",
+       "      <td>169508.66</td>\n",
+       "      <td>145951.53</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3621196</th>\n",
+       "      <td>274</td>\n",
+       "      <td>6236.13</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1226256</th>\n",
+       "      <td>133</td>\n",
+       "      <td>33981.87</td>\n",
+       "      <td>18745.72</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2803274</th>\n",
+       "      <td>225</td>\n",
+       "      <td>263006.42</td>\n",
+       "      <td>20072.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>390253.56</td>\n",
+       "      <td>653259.98</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3201247</th>\n",
+       "      <td>249</td>\n",
+       "      <td>152013.74</td>\n",
+       "      <td>20765.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>252719.19</td>\n",
+       "      <td>404732.93</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         step     amount  oldbalanceOrg  newbalanceOrig  oldbalanceDest  \\\n",
+       "6322570   688   23557.12        8059.00        31616.12       169508.66   \n",
+       "3621196   274    6236.13           0.00            0.00            0.00   \n",
+       "1226256   133   33981.87       18745.72            0.00            0.00   \n",
+       "2803274   225  263006.42       20072.00            0.00       390253.56   \n",
+       "3201247   249  152013.74       20765.00            0.00       252719.19   \n",
+       "\n",
+       "         newbalanceDest  isFraud  isFlaggedFraud  type_CASH_IN  type_CASH_OUT  \\\n",
+       "6322570       145951.53        0               0             1              0   \n",
+       "3621196            0.00        0               0             0              0   \n",
+       "1226256            0.00        0               0             0              0   \n",
+       "2803274       653259.98        0               0             0              1   \n",
+       "3201247       404732.93        0               0             0              1   \n",
+       "\n",
+       "         type_DEBIT  type_PAYMENT  type_TRANSFER  \n",
+       "6322570           0             0              0  \n",
+       "3621196           0             1              0  \n",
+       "1226256           0             1              0  \n",
+       "2803274           0             0              0  \n",
+       "3201247           0             0              0  "
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sample_dummy.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    100000\n",
+       "Name: isFlaggedFraud, dtype: int64"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sample_dummy[\"isFlaggedFraud\"].value_counts()\n",
+    "#since there is no relevant data in this columns, no different items, I am going to remove as well"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sample_dummy.drop(columns=\"isFlaggedFraud\", axis=1,inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "step              0\n",
+       "amount            0\n",
+       "oldbalanceOrg     0\n",
+       "newbalanceOrig    0\n",
+       "oldbalanceDest    0\n",
+       "newbalanceDest    0\n",
+       "isFraud           0\n",
+       "type_CASH_IN      0\n",
+       "type_CASH_OUT     0\n",
+       "type_DEBIT        0\n",
+       "type_PAYMENT      0\n",
+       "type_TRANSFER     0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sample_dummy.isna().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Now, the database is ready to be used :)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### What is the distribution of the outcome? "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    99876\n",
+       "1      124\n",
+       "Name: isFraud, dtype: int64"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sample_dummy[\"isFraud\"].value_counts()\n",
+    "#The outcome is very unbalanced. Many more unfraud cases"
    ]
   },
   {
@@ -64,7 +797,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here\n"
+    "#Dataset already cleaned\n"
    ]
   },
   {
@@ -76,11 +809,65 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here"
+    "from sklearn.linear_model import LogisticRegression\n",
+    "X = sample_dummy.drop('isFraud',axis = 1)\n",
+    "y = sample_dummy['isFraud']\n",
+    "\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train accuracy score:  0.9981066666666667\n",
+      "Test accuracy score:  0.99836\n"
+     ]
+    }
+   ],
+   "source": [
+    "LR = LogisticRegression(max_iter=1000)\n",
+    "LR.fit(X_train, y_train)\n",
+    "pred = LR.predict(X_test)\n",
+    "\n",
+    "print(\"Train accuracy score: \", LR.score(X_train, y_train))\n",
+    "print(\"Test accuracy score: \", LR.score(X_test, y_test))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[24939,    32],\n",
+       "       [    9,    20]], dtype=int64)"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.metrics import confusion_matrix\n",
+    "\n",
+    "pred = LR.predict(X_test)\n",
+    "confusion_matrix(y_test, pred)\n",
+    "\n",
+    "#there are some false positives in my data, 32. Maybe it could be reduced balancing the data better"
    ]
   },
   {
@@ -92,11 +879,307 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.utils import resample"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(99876, 12)"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(124, 12)"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "#I am going to pick the same model, but I will work on the unbalanced data\n",
+    "\n",
+    "# separate majority/minority classes\n",
+    "no_fraud = sample_dummy[sample_dummy['isFraud']==0]\n",
+    "yes_fraud = sample_dummy[sample_dummy['isFraud']==1]\n",
+    "\n",
+    "display(no_fraud.shape)\n",
+    "display(yes_fraud.shape)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your code here"
+    "# oversample minority\n",
+    "yes_fraud_oversampled = resample(yes_fraud, #<- oversample from here \n",
+    "                                    replace=True, #<- we need replacement, since we don't have enough data otherwise\n",
+    "                                    n_samples = len(no_fraud),#<- make both sets the same size # make the diabetes set equal to the size of no_diabetes\n",
+    "                                    random_state=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(99876, 12)"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(99876, 12)"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# both sets are now of a reasonable size\n",
+    "display(no_fraud.shape)\n",
+    "display(yes_fraud_oversampled.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>step</th>\n",
+       "      <th>amount</th>\n",
+       "      <th>oldbalanceOrg</th>\n",
+       "      <th>newbalanceOrig</th>\n",
+       "      <th>oldbalanceDest</th>\n",
+       "      <th>newbalanceDest</th>\n",
+       "      <th>isFraud</th>\n",
+       "      <th>type_CASH_IN</th>\n",
+       "      <th>type_CASH_OUT</th>\n",
+       "      <th>type_DEBIT</th>\n",
+       "      <th>type_PAYMENT</th>\n",
+       "      <th>type_TRANSFER</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>6322570</th>\n",
+       "      <td>688</td>\n",
+       "      <td>23557.12</td>\n",
+       "      <td>8059.00</td>\n",
+       "      <td>31616.12</td>\n",
+       "      <td>169508.66</td>\n",
+       "      <td>145951.53</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3621196</th>\n",
+       "      <td>274</td>\n",
+       "      <td>6236.13</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1226256</th>\n",
+       "      <td>133</td>\n",
+       "      <td>33981.87</td>\n",
+       "      <td>18745.72</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2803274</th>\n",
+       "      <td>225</td>\n",
+       "      <td>263006.42</td>\n",
+       "      <td>20072.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>390253.56</td>\n",
+       "      <td>653259.98</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3201247</th>\n",
+       "      <td>249</td>\n",
+       "      <td>152013.74</td>\n",
+       "      <td>20765.00</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>252719.19</td>\n",
+       "      <td>404732.93</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         step     amount  oldbalanceOrg  newbalanceOrig  oldbalanceDest  \\\n",
+       "6322570   688   23557.12        8059.00        31616.12       169508.66   \n",
+       "3621196   274    6236.13           0.00            0.00            0.00   \n",
+       "1226256   133   33981.87       18745.72            0.00            0.00   \n",
+       "2803274   225  263006.42       20072.00            0.00       390253.56   \n",
+       "3201247   249  152013.74       20765.00            0.00       252719.19   \n",
+       "\n",
+       "         newbalanceDest  isFraud  type_CASH_IN  type_CASH_OUT  type_DEBIT  \\\n",
+       "6322570       145951.53        0             1              0           0   \n",
+       "3621196            0.00        0             0              0           0   \n",
+       "1226256            0.00        0             0              0           0   \n",
+       "2803274       653259.98        0             0              1           0   \n",
+       "3201247       404732.93        0             0              1           0   \n",
+       "\n",
+       "         type_PAYMENT  type_TRANSFER  \n",
+       "6322570             0              0  \n",
+       "3621196             1              0  \n",
+       "1226256             1              0  \n",
+       "2803274             0              0  \n",
+       "3201247             0              0  "
+      ]
+     },
+     "execution_count": 56,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#applying it into the train data:\n",
+    "train_oversampled = pd.concat([no_fraud,yes_fraud_oversampled])\n",
+    "train_oversampled.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Now, naming X and Y:\n",
+    "\n",
+    "y_train_over = train_oversampled['isFraud'].copy()\n",
+    "X_train_over = train_oversampled.drop('isFraud',axis = 1).copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "LR = LogisticRegression(max_iter=1000)\n",
+    "LR.fit(X_train_over, y_train_over)\n",
+    "pred = LR.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train accuracy score:  0.9580666666666666\n",
+      "Test accuracy score:  0.9566\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Train accuracy score: \", LR.score(X_train, y_train))\n",
+    "print(\"Test accuracy score: \", LR.score(X_test, y_test))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[23887,  1084],\n",
+       "       [    1,    28]], dtype=int64)"
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pred = LR.predict(X_test)\n",
+    "confusion_matrix(y_test, pred)"
    ]
   },
   {
@@ -112,7 +1195,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Your response here"
+    "#The model that worked better was the first one without resizing the data. I analyzed the false positives and also the score of the model. Both metrics worked better without reshaping my data."
    ]
   },
   {
@@ -125,7 +1208,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -139,7 +1222,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.9.13"
   }
  },
  "nbformat": 4,

	step	type	amount	nameOrig	oldbalanceOrg	newbalanceOrig	nameDest	oldbalanceDest	newbalanceDest
6322570	688	CASH_IN	23557.12	C867750533	8059.00	31616.12	C1026934669	169508.66	145951.53
3621196	274	PAYMENT	6236.13	C601099070	0.00	0.00	M701283411	0.00	0.00
1226256	133	PAYMENT	33981.87	C279540931	18745.72	0.00	M577905776	0.00	0.00
2803274	225	CASH_OUT	263006.42	C11675531	20072.00	0.00	C529577791	390253.56	653259.98
3201247	249	CASH_OUT	152013.74	C530649214	20765.00	0.00	C1304175579	252719.19	404732.93