diff --git a/your-code/lab_imbalance.ipynb b/your-code/lab_imbalance.ipynb
index a3a5359..8516c40 100644
--- a/your-code/lab_imbalance.ipynb
+++ b/your-code/lab_imbalance.ipynb
@@ -30,9 +30,145 @@
"cell_type": "code",
"execution_count": 1,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " step \n",
+ " type \n",
+ " amount \n",
+ " nameOrig \n",
+ " oldbalanceOrg \n",
+ " newbalanceOrig \n",
+ " nameDest \n",
+ " oldbalanceDest \n",
+ " newbalanceDest \n",
+ " isFraud \n",
+ " isFlaggedFraud \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 1 \n",
+ " PAYMENT \n",
+ " 9839.64 \n",
+ " C1231006815 \n",
+ " 170136.0 \n",
+ " 160296.36 \n",
+ " M1979787155 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1 \n",
+ " PAYMENT \n",
+ " 1864.28 \n",
+ " C1666544295 \n",
+ " 21249.0 \n",
+ " 19384.72 \n",
+ " M2044282225 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 1 \n",
+ " TRANSFER \n",
+ " 181.00 \n",
+ " C1305486145 \n",
+ " 181.0 \n",
+ " 0.00 \n",
+ " C553264065 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 1 \n",
+ " CASH_OUT \n",
+ " 181.00 \n",
+ " C840083671 \n",
+ " 181.0 \n",
+ " 0.00 \n",
+ " C38997010 \n",
+ " 21182.0 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 1 \n",
+ " PAYMENT \n",
+ " 11668.14 \n",
+ " C2048537720 \n",
+ " 41554.0 \n",
+ " 29885.86 \n",
+ " M1230701703 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " step type amount nameOrig oldbalanceOrg newbalanceOrig \\\n",
+ "0 1 PAYMENT 9839.64 C1231006815 170136.0 160296.36 \n",
+ "1 1 PAYMENT 1864.28 C1666544295 21249.0 19384.72 \n",
+ "2 1 TRANSFER 181.00 C1305486145 181.0 0.00 \n",
+ "3 1 CASH_OUT 181.00 C840083671 181.0 0.00 \n",
+ "4 1 PAYMENT 11668.14 C2048537720 41554.0 29885.86 \n",
+ "\n",
+ " nameDest oldbalanceDest newbalanceDest isFraud isFlaggedFraud \n",
+ "0 M1979787155 0.0 0.0 0 0 \n",
+ "1 M2044282225 0.0 0.0 0 0 \n",
+ "2 C553264065 0.0 0.0 1 0 \n",
+ "3 C38997010 21182.0 0.0 1 0 \n",
+ "4 M1230701703 0.0 0.0 0 0 "
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here"
+ "import pandas as pd\n",
+ "import numpy as np \n",
+ "\n",
+ "data = pd.read_csv('data.csv', nrows = 100000)\n",
+ "data.head()\n",
+ "#outcome would be isFraud\n",
+ "#features: step, tupe, amount,oldbalance,newbalance,oldbalancedest,newbalancedest, nameDest"
]
},
{
@@ -44,11 +180,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAGrCAYAAAAsBPjXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAngUlEQVR4nO3df3RU9Z3/8deQkDFkk2sgZIbZZjVus1loWGWDJwRcoQsELCHHbru4GzstZ9moGyVNSRbhdLciPU3kV+DUyA+tLVTR9OzB7HoEYrLaQ40QwGhag2DPHpEEyRC6DBOIcRLC3T/8cr8OQfzRgZB8no9zco5z73tmPpdTzbOf+YHLtm1bAAAABhox2AsAAAAYLIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIwVO9gLuN5duHBBJ06cUGJiolwu12AvBwAAfA62bevs2bPy+XwaMeLT930Ioc9w4sQJpaWlDfYyAADAl9De3q6vfOUrn3qeEPoMiYmJkj7+g0xKShrk1QAAgM+jq6tLaWlpzu/xT0MIfYaLL4clJSURQgAADDGf9bYW3iwNAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWF84hH7zm99o/vz58vl8crlc+s///M+I87Zta8WKFfL5fIqPj9eMGTN06NChiJlwOKzFixcrJSVFCQkJKigo0PHjxyNmgsGg/H6/LMuSZVny+/06c+ZMxExbW5vmz5+vhIQEpaSkqKSkRL29vREzb7/9tqZPn674+Hj96Z/+qVauXCnbtr/oZQMAgGHoC4dQd3e3br31VlVXV1/2/OrVq1VVVaXq6modPHhQXq9Xs2fP1tmzZ52Z0tJS1dbWqqamRo2NjTp37pzy8/PV39/vzBQWFqqlpUV1dXWqq6tTS0uL/H6/c76/v1/z5s1Td3e3GhsbVVNTox07dqisrMyZ6erq0uzZs+Xz+XTw4EE9/vjjWrt2raqqqr7oZQMAgOHI/iNIsmtra53bFy5csL1er/3YY485xz766CPbsix78+bNtm3b9pkzZ+yRI0faNTU1zswHH3xgjxgxwq6rq7Nt27bfeecdW5Ld1NTkzOzbt8+WZB85csS2bdvetWuXPWLECPuDDz5wZp5//nnb7XbboVDItm3b3rhxo21Zlv3RRx85M5WVlbbP57MvXLhw2Wv66KOP7FAo5Py0t7fbkpzHBAAA179QKPS5fn9H9T1CR48eVSAQUF5ennPM7XZr+vTp2rt3rySpublZfX19ETM+n09ZWVnOzL59+2RZlnJycpyZKVOmyLKsiJmsrCz5fD5nZs6cOQqHw2pubnZmpk+fLrfbHTFz4sQJvf/++5e9hsrKSuflOMuylJaW9kf+qQAAgOtVbDQfLBAISJI8Hk/EcY/Ho2PHjjkzcXFxSk5OHjBz8f6BQECpqakDHj81NTVi5tLnSU5OVlxcXMTMzTffPOB5Lp5LT08f8BzLly/XkiVLnNtdXV3GxtDNy3YO9hJwDb3/2LzBXgIAXHNRDaGLXC5XxG3btgccu9SlM5ebj8aM/f/eKP1p63G73RE7SAAAYPiK6ktjXq9X0v/fGbqos7PT2Ynxer3q7e1VMBi84szJkycHPP6pU6ciZi59nmAwqL6+vivOdHZ2Shq4awUAAMwT1RBKT0+X1+tVQ0ODc6y3t1d79uzR1KlTJUnZ2dkaOXJkxExHR4daW1udmdzcXIVCIR04cMCZ2b9/v0KhUMRMa2urOjo6nJn6+nq53W5lZ2c7M7/5zW8iPlJfX18vn8834CUzAABgni8cQufOnVNLS4taWlokffwG6ZaWFrW1tcnlcqm0tFQVFRWqra1Va2urFi5cqFGjRqmwsFCSZFmWFi1apLKyMr3yyit666239J3vfEcTJ07UrFmzJEnjx4/X3LlzVVRUpKamJjU1NamoqEj5+fnKzMyUJOXl5WnChAny+/1666239Morr6i8vFxFRUVKSkqS9PFH8N1utxYuXKjW1lbV1taqoqJCS5Ys+cyX6gAAwPD3hd8j9MYbb+jrX/+6c/viG4u/973vaevWrVq6dKl6enpUXFysYDConJwc1dfXKzEx0bnP+vXrFRsbqwULFqinp0czZ87U1q1bFRMT48xs375dJSUlzqfLCgoKIr67KCYmRjt37lRxcbGmTZum+Ph4FRYWau3atc6MZVlqaGjQgw8+qMmTJys5OVlLliyJeDM0AAAwl8u2+ZrlK+nq6pJlWQqFQs5Okyn41JhZ+NQYgOHk8/7+5u8aAwAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYKyoh9D58+f1b//2b0pPT1d8fLxuueUWrVy5UhcuXHBmbNvWihUr5PP5FB8frxkzZujQoUMRjxMOh7V48WKlpKQoISFBBQUFOn78eMRMMBiU3++XZVmyLEt+v19nzpyJmGlra9P8+fOVkJCglJQUlZSUqLe3N9qXDQAAhqCoh9CqVau0efNmVVdX6/Dhw1q9erXWrFmjxx9/3JlZvXq1qqqqVF1drYMHD8rr9Wr27Nk6e/asM1NaWqra2lrV1NSosbFR586dU35+vvr7+52ZwsJCtbS0qK6uTnV1dWppaZHf73fO9/f3a968eeru7lZjY6Nqamq0Y8cOlZWVRfuyAQDAEOSybduO5gPm5+fL4/Ho6aefdo5961vf0qhRo/TMM8/Itm35fD6Vlpbq4YcflvTx7o/H49GqVat0//33KxQKaezYsXrmmWd0zz33SJJOnDihtLQ07dq1S3PmzNHhw4c1YcIENTU1KScnR5LU1NSk3NxcHTlyRJmZmdq9e7fy8/PV3t4un88nSaqpqdHChQvV2dmppKSkz7yerq4uWZalUCj0ueaHk5uX7RzsJeAaev+xeYO9BACIms/7+zvqO0J33HGHXnnlFf3+97+XJP32t79VY2OjvvGNb0iSjh49qkAgoLy8POc+brdb06dP1969eyVJzc3N6uvri5jx+XzKyspyZvbt2yfLspwIkqQpU6bIsqyImaysLCeCJGnOnDkKh8Nqbm6+7PrD4bC6uroifgAAwPAUG+0HfPjhhxUKhfSXf/mXiomJUX9/v37yk5/oH//xHyVJgUBAkuTxeCLu5/F4dOzYMWcmLi5OycnJA2Yu3j8QCCg1NXXA86empkbMXPo8ycnJiouLc2YuVVlZqUcfffSLXjYAABiCor4j9Ktf/UrPPvusnnvuOb355pvatm2b1q5dq23btkXMuVyuiNu2bQ84dqlLZy43/2VmPmn58uUKhULOT3t7+xXXBAAAhq6o7wj967/+q5YtW6Z/+Id/kCRNnDhRx44dU2Vlpb73ve/J6/VK+ni3Zty4cc79Ojs7nd0br9er3t5eBYPBiF2hzs5OTZ061Zk5efLkgOc/depUxOPs378/4nwwGFRfX9+AnaKL3G633G73l718AAAwhER9R+jDDz/UiBGRDxsTE+N8fD49PV1er1cNDQ3O+d7eXu3Zs8eJnOzsbI0cOTJipqOjQ62trc5Mbm6uQqGQDhw44Mzs379foVAoYqa1tVUdHR3OTH19vdxut7Kzs6N85QAAYKiJ+o7Q/Pnz9ZOf/ER/9md/pq997Wt66623VFVVpX/6p3+S9PFLVaWlpaqoqFBGRoYyMjJUUVGhUaNGqbCwUJJkWZYWLVqksrIyjRkzRqNHj1Z5ebkmTpyoWbNmSZLGjx+vuXPnqqioSFu2bJEk3XfffcrPz1dmZqYkKS8vTxMmTJDf79eaNWt0+vRplZeXq6ioyLhPgAEAgIGiHkKPP/64/v3f/13FxcXq7OyUz+fT/fffrx/96EfOzNKlS9XT06Pi4mIFg0Hl5OSovr5eiYmJzsz69esVGxurBQsWqKenRzNnztTWrVsVExPjzGzfvl0lJSXOp8sKCgpUXV3tnI+JidHOnTtVXFysadOmKT4+XoWFhVq7dm20LxsAAAxBUf8eoeGG7xGCKfgeIQDDyaB9jxAAAMBQQQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAw1lUJoQ8++EDf+c53NGbMGI0aNUq33XabmpubnfO2bWvFihXy+XyKj4/XjBkzdOjQoYjHCIfDWrx4sVJSUpSQkKCCggIdP348YiYYDMrv98uyLFmWJb/frzNnzkTMtLW1af78+UpISFBKSopKSkrU29t7NS4bAAAMMVEPoWAwqGnTpmnkyJHavXu33nnnHa1bt0433nijM7N69WpVVVWpurpaBw8elNfr1ezZs3X27FlnprS0VLW1taqpqVFjY6POnTun/Px89ff3OzOFhYVqaWlRXV2d6urq1NLSIr/f75zv7+/XvHnz1N3drcbGRtXU1GjHjh0qKyuL9mUDAIAhyGXbth3NB1y2bJlef/11vfbaa5c9b9u2fD6fSktL9fDDD0v6ePfH4/Fo1apVuv/++xUKhTR27Fg988wzuueeeyRJJ06cUFpamnbt2qU5c+bo8OHDmjBhgpqampSTkyNJampqUm5uro4cOaLMzEzt3r1b+fn5am9vl8/nkyTV1NRo4cKF6uzsVFJS0oD1hcNhhcNh53ZXV5fS0tIUCoUuOz+c3bxs52AvAdfQ+4/NG+wlAEDUdHV1ybKsz/z9HfUdoRdffFGTJ0/W3//93ys1NVWTJk3SU0895Zw/evSoAoGA8vLynGNut1vTp0/X3r17JUnNzc3q6+uLmPH5fMrKynJm9u3bJ8uynAiSpClTpsiyrIiZrKwsJ4Ikac6cOQqHwxEv1X1SZWWl81KbZVlKS0uLwp8KAAC4HkU9hN577z1t2rRJGRkZevnll/XAAw+opKREv/zlLyVJgUBAkuTxeCLu5/F4nHOBQEBxcXFKTk6+4kxqauqA509NTY2YufR5kpOTFRcX58xcavny5QqFQs5Pe3v7F/0jAAAAQ0RstB/wwoULmjx5sioqKiRJkyZN0qFDh7Rp0yZ997vfdeZcLlfE/WzbHnDsUpfOXG7+y8x8ktvtltvtvuI6AADA8BD1HaFx48ZpwoQJEcfGjx+vtrY2SZLX65WkATsynZ2dzu6N1+tVb2+vgsHgFWdOnjw54PlPnToVMXPp8wSDQfX19Q3YKQIAAOaJeghNmzZN7777bsSx3//+97rpppskSenp6fJ6vWpoaHDO9/b2as+ePZo6daokKTs7WyNHjoyY6ejoUGtrqzOTm5urUCikAwcOODP79+9XKBSKmGltbVVHR4czU19fL7fbrezs7ChfOQAAGGqi/tLYD37wA02dOlUVFRVasGCBDhw4oCeffFJPPvmkpI9fqiotLVVFRYUyMjKUkZGhiooKjRo1SoWFhZIky7K0aNEilZWVacyYMRo9erTKy8s1ceJEzZo1S9LHu0xz585VUVGRtmzZIkm67777lJ+fr8zMTElSXl6eJkyYIL/frzVr1uj06dMqLy9XUVGRcZ8AAwAAA0U9hG6//XbV1tZq+fLlWrlypdLT07Vhwwbde++9zszSpUvV09Oj4uJiBYNB5eTkqL6+XomJic7M+vXrFRsbqwULFqinp0czZ87U1q1bFRMT48xs375dJSUlzqfLCgoKVF1d7ZyPiYnRzp07VVxcrGnTpik+Pl6FhYVau3ZttC8bAAAMQVH/HqHh5vN+D8FwxPcImYXvEQIwnAza9wgBAAAMFYQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjHXVQ6iyslIul0ulpaXOMdu2tWLFCvl8PsXHx2vGjBk6dOhQxP3C4bAWL16slJQUJSQkqKCgQMePH4+YCQaD8vv9sixLlmXJ7/frzJkzETNtbW2aP3++EhISlJKSopKSEvX29l6tywUAAEPIVQ2hgwcP6sknn9Rf/dVfRRxfvXq1qqqqVF1drYMHD8rr9Wr27Nk6e/asM1NaWqra2lrV1NSosbFR586dU35+vvr7+52ZwsJCtbS0qK6uTnV1dWppaZHf73fO9/f3a968eeru7lZjY6Nqamq0Y8cOlZWVXc3LBgAAQ8RVC6Fz587p3nvv1VNPPaXk5GTnuG3b2rBhg374wx/q7/7u75SVlaVt27bpww8/1HPPPSdJCoVCevrpp7Vu3TrNmjVLkyZN0rPPPqu3335b//3f/y1JOnz4sOrq6vSzn/1Mubm5ys3N1VNPPaWXXnpJ7777riSpvr5e77zzjp599llNmjRJs2bN0rp16/TUU0+pq6vral06AAAYIq5aCD344IOaN2+eZs2aFXH86NGjCgQCysvLc4653W5Nnz5de/fulSQ1Nzerr68vYsbn8ykrK8uZ2bdvnyzLUk5OjjMzZcoUWZYVMZOVlSWfz+fMzJkzR+FwWM3NzZdddzgcVldXV8QPAAAYnmKvxoPW1NTozTff1MGDBwecCwQCkiSPxxNx3OPx6NixY85MXFxcxE7SxZmL9w8EAkpNTR3w+KmpqREzlz5PcnKy4uLinJlLVVZW6tFHH/08lwkAAIa4qO8Itbe36/vf/76effZZ3XDDDZ8653K5Im7btj3g2KUunbnc/JeZ+aTly5crFAo5P+3t7VdcEwAAGLqiHkLNzc3q7OxUdna2YmNjFRsbqz179uinP/2pYmNjnR2aS3dkOjs7nXNer1e9vb0KBoNXnDl58uSA5z916lTEzKXPEwwG1dfXN2Cn6CK3262kpKSIHwAAMDxFPYRmzpypt99+Wy0tLc7P5MmTde+996qlpUW33HKLvF6vGhoanPv09vZqz549mjp1qiQpOztbI0eOjJjp6OhQa2urM5Obm6tQKKQDBw44M/v371coFIqYaW1tVUdHhzNTX18vt9ut7OzsaF86AAAYYqL+HqHExERlZWVFHEtISNCYMWOc46WlpaqoqFBGRoYyMjJUUVGhUaNGqbCwUJJkWZYWLVqksrIyjRkzRqNHj1Z5ebkmTpzovPl6/Pjxmjt3roqKirRlyxZJ0n333af8/HxlZmZKkvLy8jRhwgT5/X6tWbNGp0+fVnl5uYqKitjpAQAAV+fN0p9l6dKl6unpUXFxsYLBoHJyclRfX6/ExERnZv369YqNjdWCBQvU09OjmTNnauvWrYqJiXFmtm/frpKSEufTZQUFBaqurnbOx8TEaOfOnSouLta0adMUHx+vwsJCrV279tpdLAAAuG65bNu2B3sR17Ouri5ZlqVQKGTcLtLNy3YO9hJwDb3/2LzBXgIARM3n/f3N3zUGAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwVtRDqLKyUrfffrsSExOVmpqqu+++W++++27EjG3bWrFihXw+n+Lj4zVjxgwdOnQoYiYcDmvx4sVKSUlRQkKCCgoKdPz48YiZYDAov98vy7JkWZb8fr/OnDkTMdPW1qb58+crISFBKSkpKikpUW9vb7QvGwAADEFRD6E9e/bowQcfVFNTkxoaGnT+/Hnl5eWpu7vbmVm9erWqqqpUXV2tgwcPyuv1avbs2Tp79qwzU1paqtraWtXU1KixsVHnzp1Tfn6++vv7nZnCwkK1tLSorq5OdXV1amlpkd/vd8739/dr3rx56u7uVmNjo2pqarRjxw6VlZVF+7IBAMAQ5LJt276aT3Dq1CmlpqZqz549uvPOO2Xbtnw+n0pLS/Xwww9L+nj3x+PxaNWqVbr//vsVCoU0duxYPfPMM7rnnnskSSdOnFBaWpp27dqlOXPm6PDhw5owYYKampqUk5MjSWpqalJubq6OHDmizMxM7d69W/n5+Wpvb5fP55Mk1dTUaOHChers7FRSUtKA9YbDYYXDYed2V1eX0tLSFAqFLjs/nN28bOdgLwHX0PuPzRvsJQBA1HR1dcmyrM/8/X3V3yMUCoUkSaNHj5YkHT16VIFAQHl5ec6M2+3W9OnTtXfvXklSc3Oz+vr6ImZ8Pp+ysrKcmX379smyLCeCJGnKlCmyLCtiJisry4kgSZozZ47C4bCam5svu97KykrnpTbLspSWlhaNPwYAAHAduqohZNu2lixZojvuuENZWVmSpEAgIEnyeDwRsx6PxzkXCAQUFxen5OTkK86kpqYOeM7U1NSImUufJzk5WXFxcc7MpZYvX65QKOT8tLe3f9HLBgAAQ0Ts1Xzwhx56SL/73e/U2Ng44JzL5Yq4bdv2gGOXunTmcvNfZuaT3G633G73FdcBAACGh6u2I7R48WK9+OKL+vWvf62vfOUrznGv1ytJA3ZkOjs7nd0br9er3t5eBYPBK86cPHlywPOeOnUqYubS5wkGg+rr6xuwUwQAAMwT9RCybVsPPfSQXnjhBb366qtKT0+POJ+eni6v16uGhgbnWG9vr/bs2aOpU6dKkrKzszVy5MiImY6ODrW2tjozubm5CoVCOnDggDOzf/9+hUKhiJnW1lZ1dHQ4M/X19XK73crOzo72pQMAgCEm6i+NPfjgg3ruuef0X//1X0pMTHR2ZCzLUnx8vFwul0pLS1VRUaGMjAxlZGSooqJCo0aNUmFhoTO7aNEilZWVacyYMRo9erTKy8s1ceJEzZo1S5I0fvx4zZ07V0VFRdqyZYsk6b777lN+fr4yMzMlSXl5eZowYYL8fr/WrFmj06dPq7y8XEVFRcZ9AgwAAAwU9RDatGmTJGnGjBkRx3/xi19o4cKFkqSlS5eqp6dHxcXFCgaDysnJUX19vRITE5359evXKzY2VgsWLFBPT49mzpyprVu3KiYmxpnZvn27SkpKnE+XFRQUqLq62jkfExOjnTt3qri4WNOmTVN8fLwKCwu1du3aaF82AAAYgq769wgNdZ/3ewiGI75HyCx8jxCA4eS6+R4hAACA6xUhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMZEUIbN25Uenq6brjhBmVnZ+u1114b7CUBAIDrwLAPoV/96lcqLS3VD3/4Q7311lv6m7/5G911111qa2sb7KUBAIBBNuxDqKqqSosWLdI///M/a/z48dqwYYPS0tK0adOmwV4aAAAYZLGDvYCrqbe3V83NzVq2bFnE8by8PO3du/ey9wmHwwqHw87tUCgkSerq6rp6C71OXQh/ONhLwDVk4v/GTZb1yMuDvQRcQ62PzhnsJVxzF/+bZtv2FeeGdQj94Q9/UH9/vzweT8Rxj8ejQCBw2ftUVlbq0UcfHXA8LS3tqqwRuF5YGwZ7BQCuFpP//T579qwsy/rU88M6hC5yuVwRt23bHnDsouXLl2vJkiXO7QsXLuj06dMaM2bMp94Hw0dXV5fS0tLU3t6upKSkwV4OgCji32+z2Lats2fPyufzXXFuWIdQSkqKYmJiBuz+dHZ2Dtglusjtdsvtdkccu/HGG6/WEnGdSkpK4j+UwDDFv9/muNJO0EXD+s3ScXFxys7OVkNDQ8TxhoYGTZ06dZBWBQAArhfDekdIkpYsWSK/36/JkycrNzdXTz75pNra2vTAAw8M9tIAAMAgG/YhdM899+h///d/tXLlSnV0dCgrK0u7du3STTfdNNhLw3XI7XbrkUceGfDyKIChj3+/cTku+7M+VwYAADBMDev3CAEAAFwJIQQAAIxFCAEAAGMRQgAAwFiEEAAAMNaw//g8cCXHjx/Xpk2btHfvXgUCAblcLnk8Hk2dOlUPPPAAf8ccAAxzfHwexmpsbNRdd92ltLQ05eXlyePxyLZtdXZ2qqGhQe3t7dq9e7emTZs22EsFcBW0t7frkUce0c9//vPBXgoGESEEY91+++264447tH79+sue/8EPfqDGxkYdPHjwGq8MwLXw29/+Vn/913+t/v7+wV4KBhEhBGPFx8erpaVFmZmZlz1/5MgRTZo0ST09Pdd4ZQCi4cUXX7zi+ffee09lZWWEkOF4jxCMNW7cOO3du/dTQ2jfvn0aN27cNV4VgGi5++675XK5dKX/v+9yua7hinA9IoRgrPLycj3wwANqbm7W7Nmz5fF45HK5FAgE1NDQoJ/97GfasGHDYC8TwJc0btw4PfHEE7r77rsve76lpUXZ2dnXdlG47hBCMFZxcbHGjBmj9evXa8uWLc72eExMjLKzs/XLX/5SCxYsGORVAviysrOz9eabb35qCH3WbhHMwHuEAEl9fX36wx/+IElKSUnRyJEjB3lFAP5Yr732mrq7uzV37tzLnu/u7tYbb7yh6dOnX+OV4XpCCAEAAGPxzdIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgCuazNmzFBpaelgL+MLWbhw4ad+ZBvA9YUQAnBde+GFF/TjH//4M+cWLlwol8s14Od//ud/rsEqAQxVfKEigOva6NGjP/fs3Llz9Ytf/CLi2NixYwfM9fb2Ki4u7o9eG4Chjx0hANe1T740tnHjRmVkZOiGG26Qx+PRt7/97YhZt9str9cb8RMTE6MZM2booYce0pIlS5SSkqLZs2dLkqqqqjRx4kQlJCQoLS1NxcXFOnfunPN4K1as0G233RbxHBs2bNDNN9/s3O7v79eSJUt04403asyYMVq6dCnfVgwMIYQQgCHhjTfeUElJiVauXKl3331XdXV1uvPOOz/3/bdt26bY2Fi9/vrr2rJliyRpxIgR+ulPf6rW1lZt27ZNr776qpYuXfqF1rVu3Tr9/Oc/19NPP63GxkadPn1atbW1X+gxAAweXhoDMCS0tbUpISFB+fn5SkxM1E033aRJkyZFzLz00kv6kz/5E+f2XXfdpf/4j/+QJH31q1/V6tWrI+Y/+Sbs9PR0/fjHP9a//Mu/aOPGjZ97XRs2bNDy5cv1rW99S5K0efNmvfzyy1/08gAMEkIIwJAwe/Zs3XTTTbrllls0d+5czZ07V9/85jc1atQoZ+brX/+6Nm3a5NxOSEhw/nny5MkDHvPXv/61Kioq9M4776irq0vnz5/XRx99pO7u7oj7fppQKKSOjg7l5uY6x2JjYzV58mReHgOGCF4aAzAkJCYm6s0339Tzzz+vcePG6Uc/+pFuvfVWnTlzxplJSEjQV7/6Vedn3LhxEec+6dixY/rGN76hrKws7dixQ83NzXriiSckffyX8Eofv3R2adBcPAdgeCCEAAwZsbGxmjVrllavXq3f/e53ev/99/Xqq69+qcd64403dP78ea1bt05TpkzRX/zFX+jEiRMRM2PHjlUgEIiIoZaWFuefLcvSuHHj1NTU5Bw7f/68mpubv9SaAFx7vDQGYEh46aWX9N577+nOO+9UcnKydu3apQsXLigzM/NLPd6f//mf6/z583r88cc1f/58vf7669q8eXPEzIwZM3Tq1CmtXr1a3/72t1VXV6fdu3crKSnJmfn+97+vxx57TBkZGRo/fryqqqoidqkAXN/YEQIwJNx444164YUX9Ld/+7caP368Nm/erOeff15f+9rXvtTj3XbbbaqqqtKqVauUlZWl7du3q7KyMmJm/Pjx2rhxo5544gndeuutOnDggMrLyyNmysrK9N3vflcLFy5Ubm6uEhMT9c1vfvNLXyeAa8tl844+AABgKHaEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGOv/AChEP1C9SaciAAAAAElFTkSuQmCC\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "# Your response here"
+ "outcome = data['isFraud'].value_counts()\n",
+ "\n",
+ "outcome.plot(kind = 'bar', xlabel='isFraud')"
]
},
{
@@ -60,11 +219,348 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here\n"
+ "# the nameOrig is the target person to act and there's not a name ith more than 1 in count, so i believe it'll not\n",
+ "# be useful to predict since there's not a pattern or high occurence there, so i'll drop \n",
+ "\n",
+ "# also, isFlaggedFraud doesnt flag really anything, so doest look that'll add anything to the forecasting\n",
+ "\n",
+ "# to end, the nameDest has some values that call attention to but ill drop it for 2 reason: 1 doesnt mean that just\n",
+ "# have many transactions that arent necesarily fraudulent, then it'll be plenty of work and would add a lot of \n",
+ "# column if used get dummies. probably we'll loose some info, but i prefer to drop it\n",
+ "\n",
+ "data.drop(['nameOrig','nameDest','isFlaggedFraud'], axis = 1, inplace = True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " step \n",
+ " amount \n",
+ " oldbalanceOrg \n",
+ " newbalanceOrig \n",
+ " oldbalanceDest \n",
+ " newbalanceDest \n",
+ " isFraud \n",
+ " _CASH_IN \n",
+ " _CASH_OUT \n",
+ " _DEBIT \n",
+ " _PAYMENT \n",
+ " _TRANSFER \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 1 \n",
+ " 9839.64 \n",
+ " 170136.0 \n",
+ " 160296.36 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1 \n",
+ " 1864.28 \n",
+ " 21249.0 \n",
+ " 19384.72 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 1 \n",
+ " 181.00 \n",
+ " 181.0 \n",
+ " 0.00 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 1 \n",
+ " 181.00 \n",
+ " 181.0 \n",
+ " 0.00 \n",
+ " 21182.0 \n",
+ " 0.0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 1 \n",
+ " 11668.14 \n",
+ " 41554.0 \n",
+ " 29885.86 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " step amount oldbalanceOrg newbalanceOrig oldbalanceDest \\\n",
+ "0 1 9839.64 170136.0 160296.36 0.0 \n",
+ "1 1 1864.28 21249.0 19384.72 0.0 \n",
+ "2 1 181.00 181.0 0.00 0.0 \n",
+ "3 1 181.00 181.0 0.00 21182.0 \n",
+ "4 1 11668.14 41554.0 29885.86 0.0 \n",
+ "\n",
+ " newbalanceDest isFraud _CASH_IN _CASH_OUT _DEBIT _PAYMENT _TRANSFER \n",
+ "0 0.0 0 0 0 0 1 0 \n",
+ "1 0.0 0 0 0 0 1 0 \n",
+ "2 0.0 1 0 0 0 0 1 \n",
+ "3 0.0 1 0 1 0 0 0 \n",
+ "4 0.0 0 0 0 0 1 0 "
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# lets deal with object variables: type and name dest\n",
+ "\n",
+ "#type has 6 values. i'll create dummies for each and drop the original columns\n",
+ "typedummies = pd.get_dummies(data['type'], prefix = '').astype(int)\n",
+ "data = pd.concat([data, typedummies], axis = 1)\n",
+ "data.drop('type', axis = 1, inplace = True)\n",
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# deal with step\n",
+ "# 1 = 1 hour of time, 9 = 9 hours of time\n",
+ "# since time is a continuous variable ill normalize it\n",
+ "from sklearn.preprocessing import MinMaxScaler\n",
+ "\n",
+ "scaler = MinMaxScaler()\n",
+ "data['step_normalized'] = scaler.fit_transform(data[['step']])\n",
+ "data.drop('step',axis=1,inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " amount \n",
+ " oldbalanceOrg \n",
+ " newbalanceOrig \n",
+ " oldbalanceDest \n",
+ " newbalanceDest \n",
+ " isFraud \n",
+ " _CASH_IN \n",
+ " _CASH_OUT \n",
+ " _DEBIT \n",
+ " _PAYMENT \n",
+ " _TRANSFER \n",
+ " step_normalized \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 9839.64 \n",
+ " 170136.0 \n",
+ " 160296.36 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1864.28 \n",
+ " 21249.0 \n",
+ " 19384.72 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 11668.14 \n",
+ " 41554.0 \n",
+ " 29885.86 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 7817.71 \n",
+ " 53860.0 \n",
+ " 46042.29 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 7107.77 \n",
+ " 183195.0 \n",
+ " 176087.23 \n",
+ " 0.0 \n",
+ " 0.0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0.0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " amount oldbalanceOrg newbalanceOrig oldbalanceDest newbalanceDest \\\n",
+ "0 9839.64 170136.0 160296.36 0.0 0.0 \n",
+ "1 1864.28 21249.0 19384.72 0.0 0.0 \n",
+ "4 11668.14 41554.0 29885.86 0.0 0.0 \n",
+ "5 7817.71 53860.0 46042.29 0.0 0.0 \n",
+ "6 7107.77 183195.0 176087.23 0.0 0.0 \n",
+ "\n",
+ " isFraud _CASH_IN _CASH_OUT _DEBIT _PAYMENT _TRANSFER step_normalized \n",
+ "0 0 0 0 0 1 0 0.0 \n",
+ "1 0 0 0 0 1 0 0.0 \n",
+ "4 0 0 0 0 1 0 0.0 \n",
+ "5 0 0 0 0 1 0 0.0 \n",
+ "6 0 0 0 0 1 0 0.0 "
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# choose to oversample to increase samples of the minorty class\n",
+ "from sklearn.utils import resample\n",
+ "\n",
+ "nofraud = data[data['isFraud'] == 0]\n",
+ "fraud = data[data['isFraud'] == 1]\n",
+ "\n",
+ "fraud_oversampled = resample(data,\n",
+ " replace = True,\n",
+ " n_samples = len(nofraud),\n",
+ " random_state = 0)\n",
+ "\n",
+ "fraud = pd.concat([nofraud, fraud_oversampled])\n",
+ "fraud.head()"
]
},
{
@@ -76,11 +572,42 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here"
+ "# setting it\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "feat = fraud.drop('isFraud', axis=1)\n",
+ "target = fraud['isFraud']\n",
+ "\n",
+ "x_train, x_test, y_train, y_test = train_test_split(feat, target, random_state=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.9997196748227944"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#training and scoring it\n",
+ "logreg = LogisticRegression()\n",
+ "logreg.fit(x_train,y_train)\n",
+ "logreg.score(x_test,y_test)"
]
},
{
@@ -92,11 +619,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 25,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.9993592567378159"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here"
+ "from sklearn.neighbors import KNeighborsClassifier\n",
+ "\n",
+ "knn = KNeighborsClassifier(n_neighbors = 5)\n",
+ "knn.fit(x_train, y_train)\n",
+ "knn.score(x_test, y_test)"
]
},
{
@@ -108,11 +650,11 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
- "# Your response here"
+ "# not much of a difference but logistic is still a very very little better"
]
},
{
@@ -125,7 +667,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -139,7 +681,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.8"
+ "version": "3.10.9"
}
},
"nbformat": 4,