From 70794872b4fa78dbee99bfcec348730a96f24e7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cdanielmdepaoli=E2=80=9D?= <“danielmdepaoli@gmail.com”> Date: Mon, 28 Aug 2023 11:45:16 +0100 Subject: [PATCH] Lab Done --- your-code/lab_imbalance.ipynb | 616 +++++++++++++++++++++++++++++++++- 1 file changed, 599 insertions(+), 17 deletions(-) diff --git a/your-code/lab_imbalance.ipynb b/your-code/lab_imbalance.ipynb index a3a5359..b388991 100644 --- a/your-code/lab_imbalance.ipynb +++ b/your-code/lab_imbalance.ipynb @@ -28,11 +28,285 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "# Your code here" + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.preprocessing import RobustScaler, StandardScaler, PolynomialFeatures, MinMaxScaler\n", + "import seaborn as sns\n", + "\n", + "import pandas as pd\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "financial = pd.read_csv(\"PS_20174392719_1491204439457_log.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
steptypeamountnameOrigoldbalanceOrgnewbalanceOrignameDestoldbalanceDestnewbalanceDestisFraudisFlaggedFraud
4116394302PAYMENT10713.74C21087473110.000.00M17414140560.000.0000
1658484158PAYMENT5572.77C873991644672938.43667365.66M13767362090.000.0000
5160837357PAYMENT10284.63C25213165720619.0010334.37M9934272120.000.0000
93576743CASH_OUT189390.35C16590276390.000.00C8089043486444744.366634134.7100
4961379351CASH_OUT76731.72C9548294580.000.00C1896395958838426.87915158.5900
....................................
5618022395TRANSFER1409050.12C11285639080.000.00C16275511816310069.867719119.9800
4792371345CASH_OUT191997.11C10972592060.000.00C2120756952541763.972733761.0800
5719272398PAYMENT53549.35C17831149190.000.00M968349270.000.0000
5469004379CASH_OUT212481.51C947804496271.000.00C20062785789831.21302312.7200
4620152329PAYMENT22589.07C1127337520.000.00M19503554070.000.0000
\n", + "

100000 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " step type amount nameOrig oldbalanceOrg \\\n", + "4116394 302 PAYMENT 10713.74 C2108747311 0.00 \n", + "1658484 158 PAYMENT 5572.77 C873991644 672938.43 \n", + "5160837 357 PAYMENT 10284.63 C252131657 20619.00 \n", + "935767 43 CASH_OUT 189390.35 C1659027639 0.00 \n", + "4961379 351 CASH_OUT 76731.72 C954829458 0.00 \n", + "... ... ... ... ... ... \n", + "5618022 395 TRANSFER 1409050.12 C1128563908 0.00 \n", + "4792371 345 CASH_OUT 191997.11 C1097259206 0.00 \n", + "5719272 398 PAYMENT 53549.35 C1783114919 0.00 \n", + "5469004 379 CASH_OUT 212481.51 C947804496 271.00 \n", + "4620152 329 PAYMENT 22589.07 C112733752 0.00 \n", + "\n", + " newbalanceOrig nameDest oldbalanceDest newbalanceDest isFraud \\\n", + "4116394 0.00 M1741414056 0.00 0.00 0 \n", + "1658484 667365.66 M1376736209 0.00 0.00 0 \n", + "5160837 10334.37 M993427212 0.00 0.00 0 \n", + "935767 0.00 C808904348 6444744.36 6634134.71 0 \n", + "4961379 0.00 C1896395958 838426.87 915158.59 0 \n", + "... ... ... ... ... ... \n", + "5618022 0.00 C1627551181 6310069.86 7719119.98 0 \n", + "4792371 0.00 C212075695 2541763.97 2733761.08 0 \n", + "5719272 0.00 M96834927 0.00 0.00 0 \n", + "5469004 0.00 C200627857 89831.21 302312.72 0 \n", + "4620152 0.00 M1950355407 0.00 0.00 0 \n", + "\n", + " isFlaggedFraud \n", + "4116394 0 \n", + "1658484 0 \n", + "5160837 0 \n", + "935767 0 \n", + "4961379 0 \n", + "... ... \n", + "5618022 0 \n", + "4792371 0 \n", + "5719272 0 \n", + "5469004 0 \n", + "4620152 0 \n", + "\n", + "[100000 rows x 11 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sample_financial = financial.sample(n=100000)\n", + "sample_financial" ] }, { @@ -44,11 +318,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "step int64\n", + "type object\n", + "amount float64\n", + "nameOrig object\n", + "oldbalanceOrg float64\n", + "newbalanceOrig float64\n", + "nameDest object\n", + "oldbalanceDest float64\n", + "newbalanceDest float64\n", + "isFraud int64\n", + "isFlaggedFraud int64\n", + "dtype: object" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your response here" + "sample_financial.dtypes" ] }, { @@ -60,27 +356,293 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 99868\n", + "1 132\n", + "Name: isFraud, dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sample_financial.isFraud.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAGYCAYAAACu6o3UAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAjSElEQVR4nO3db3BU5d3/8c+akDVkkmMgZpetuTXOZFLS0BajEwK20AEClpBx2im20R2Z0ogTJY0kRRjbis6YKCAwNRVBbfEPGh/QtE6BNKntoCkEYnStQdAHogmSJViWDcS4ieHcD/xxfl2CiN4LIbner5l9sOd8d/c6jpi3V3YXl23btgAAAAx02XAvAAAAYLgQQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMFT/cC7jUnTp1SocPH1ZycrJcLtdwLwcAAJwH27Z14sQJ+Xw+XXbZF+/7EEJf4vDhw8rIyBjuZQAAgK+hs7NTV1111ReeJ4S+RHJysqTP/0GmpKQM82oAAMD56OnpUUZGhvNz/IsQQl/i9K/DUlJSCCEAAEaYL3tbC2+WBgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLG+cgi9+uqrmj9/vnw+n1wul/785z9HnbdtWytXrpTP51NiYqJmzJihffv2Rc1EIhEtWbJEaWlpSkpKUnFxsQ4dOhQ1EwqF5Pf7ZVmWLMuS3+/X8ePHo2Y6Ojo0f/58JSUlKS0tTeXl5erv74+aefvttzV9+nQlJibqG9/4hh588EHZtv1VLxsAAIxCXzmEent79Z3vfEe1tbVnPb9q1SqtXbtWtbW1am1tldfr1ezZs3XixAlnpqKiQvX19aqrq1Nzc7NOnjypoqIiDQ4OOjMlJSUKBAJqaGhQQ0ODAoGA/H6/c35wcFDz5s1Tb2+vmpubVVdXp61bt6qystKZ6enp0ezZs+Xz+dTa2qrHHntMa9as0dq1a7/qZQMAgNHI/j+QZNfX1zv3T506ZXu9Xvvhhx92jn366ae2ZVn2E088Ydu2bR8/ftweM2aMXVdX58x89NFH9mWXXWY3NDTYtm3b77zzji3JbmlpcWZ2795tS7IPHDhg27Ztb9++3b7sssvsjz76yJl58cUXbbfbbYfDYdu2bfvxxx+3LcuyP/30U2empqbG9vl89qlTp87rGsPhsC3JeU4AAHDpO9+f3zF9j9DBgwcVDAZVWFjoHHO73Zo+fbp27dolSWpra9PAwEDUjM/nU25urjOze/duWZal/Px8Z2bKlCmyLCtqJjc3Vz6fz5mZM2eOIpGI2tranJnp06fL7XZHzRw+fFgffPDBWa8hEomop6cn6gYAAEan+Fg+WTAYlCR5PJ6o4x6PRx9++KEzk5CQoNTU1CEzpx8fDAaVnp4+5PnT09OjZs58ndTUVCUkJETNXHPNNUNe5/S5zMzMIa9RU1OjBx544Lyud7S7Zvm24V4CLqIPHp433EsAgIvugnxqzOVyRd23bXvIsTOdOXO2+VjM2P/vjdJftJ4VK1YoHA47t87OznOuGwAAjFwxDSGv1yvp/+8Mndbd3e3sxHi9XvX39ysUCp1z5siRI0Oe/+jRo1EzZ75OKBTSwMDAOWe6u7slDd21Os3tdislJSXqBgAARqeYhlBmZqa8Xq+ampqcY/39/dq5c6emTp0qScrLy9OYMWOiZrq6utTe3u7MFBQUKBwOa+/evc7Mnj17FA6Ho2ba29vV1dXlzDQ2NsrtdisvL8+ZefXVV6M+Ut/Y2CifzzfkV2YAAMA8XzmETp48qUAgoEAgIOnzN0gHAgF1dHTI5XKpoqJC1dXVqq+vV3t7uxYuXKixY8eqpKREkmRZlhYtWqTKykq98sorevPNN3Xbbbdp0qRJmjVrliRp4sSJmjt3rkpLS9XS0qKWlhaVlpaqqKhI2dnZkqTCwkLl5OTI7/frzTff1CuvvKKqqiqVlpY6uzglJSVyu91auHCh2tvbVV9fr+rqai1duvRLf1UHAABGv6/8ZunXX39dP/jBD5z7S5culSTdfvvt2rx5s5YtW6a+vj6VlZUpFAopPz9fjY2NSk5Odh6zbt06xcfHa8GCBerr69PMmTO1efNmxcXFOTNbtmxReXm58+my4uLiqO8uiouL07Zt21RWVqZp06YpMTFRJSUlWrNmjTNjWZaampp011136frrr1dqaqqWLl3qrBkAAJjNZdt8zfK59PT0yLIshcNh494vxKfGzMKnxgCMJuf785u/awwAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxYh5Cn332mX79618rMzNTiYmJuvbaa/Xggw/q1KlTzoxt21q5cqV8Pp8SExM1Y8YM7du3L+p5IpGIlixZorS0NCUlJam4uFiHDh2KmgmFQvL7/bIsS5Zlye/36/jx41EzHR0dmj9/vpKSkpSWlqby8nL19/fH+rIBAMAIFPMQeuSRR/TEE0+otrZW+/fv16pVq7R69Wo99thjzsyqVau0du1a1dbWqrW1VV6vV7Nnz9aJEyecmYqKCtXX16uurk7Nzc06efKkioqKNDg46MyUlJQoEAiooaFBDQ0NCgQC8vv9zvnBwUHNmzdPvb29am5uVl1dnbZu3arKyspYXzYAABiBXLZt27F8wqKiInk8Hj399NPOsR//+McaO3asnnvuOdm2LZ/Pp4qKCt17772SPt/98Xg8euSRR7R48WKFw2FdeeWVeu6553TLLbdIkg4fPqyMjAxt375dc+bM0f79+5WTk6OWlhbl5+dLklpaWlRQUKADBw4oOztbO3bsUFFRkTo7O+Xz+SRJdXV1Wrhwobq7u5WSkvKl19PT0yPLshQOh89rfjS5Zvm24V4CLqIPHp433EsAgJg535/fMd8RuvHGG/XKK6/ovffekyS99dZbam5u1g9/+ENJ0sGDBxUMBlVYWOg8xu12a/r06dq1a5ckqa2tTQMDA1EzPp9Pubm5zszu3btlWZYTQZI0ZcoUWZYVNZObm+tEkCTNmTNHkUhEbW1tZ11/JBJRT09P1A0AAIxO8bF+wnvvvVfhcFjf/OY3FRcXp8HBQT300EP62c9+JkkKBoOSJI/HE/U4j8ejDz/80JlJSEhQamrqkJnTjw8Gg0pPTx/y+unp6VEzZ75OamqqEhISnJkz1dTU6IEHHviqlw0AAEagmO8IvfTSS3r++ef1wgsv6I033tAzzzyjNWvW6Jlnnomac7lcUfdt2x5y7Exnzpxt/uvM/LcVK1YoHA47t87OznOuCQAAjFwx3xH61a9+peXLl+unP/2pJGnSpEn68MMPVVNTo9tvv11er1fS57s1EyZMcB7X3d3t7N54vV719/crFApF7Qp1d3dr6tSpzsyRI0eGvP7Ro0ejnmfPnj1R50OhkAYGBobsFJ3mdrvldru/7uUDAIARJOY7Qp988okuuyz6aePi4pyPz2dmZsrr9aqpqck539/fr507dzqRk5eXpzFjxkTNdHV1qb293ZkpKChQOBzW3r17nZk9e/YoHA5HzbS3t6urq8uZaWxslNvtVl5eXoyvHAAAjDQx3xGaP3++HnroIf3P//yPvvWtb+nNN9/U2rVr9fOf/1zS57+qqqioUHV1tbKyspSVlaXq6mqNHTtWJSUlkiTLsrRo0SJVVlZq/PjxGjdunKqqqjRp0iTNmjVLkjRx4kTNnTtXpaWl2rhxoyTpjjvuUFFRkbKzsyVJhYWFysnJkd/v1+rVq3Xs2DFVVVWptLTUuE+AAQCAoWIeQo899ph+85vfqKysTN3d3fL5fFq8eLF++9vfOjPLli1TX1+fysrKFAqFlJ+fr8bGRiUnJzsz69atU3x8vBYsWKC+vj7NnDlTmzdvVlxcnDOzZcsWlZeXO58uKy4uVm1trXM+Li5O27ZtU1lZmaZNm6bExESVlJRozZo1sb5sAAAwAsX8e4RGG75HCKbge4QAjCbD9j1CAAAAIwUhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFgXJIQ++ugj3XbbbRo/frzGjh2r7373u2pra3PO27atlStXyufzKTExUTNmzNC+ffuiniMSiWjJkiVKS0tTUlKSiouLdejQoaiZUCgkv98vy7JkWZb8fr+OHz8eNdPR0aH58+crKSlJaWlpKi8vV39//4W4bAAAMMLEPIRCoZCmTZumMWPGaMeOHXrnnXf06KOP6oorrnBmVq1apbVr16q2tlatra3yer2aPXu2Tpw44cxUVFSovr5edXV1am5u1smTJ1VUVKTBwUFnpqSkRIFAQA0NDWpoaFAgEJDf73fODw4Oat68eert7VVzc7Pq6uq0detWVVZWxvqyAQDACOSybduO5RMuX75c//rXv/Taa6+d9bxt2/L5fKqoqNC9994r6fPdH4/Ho0ceeUSLFy9WOBzWlVdeqeeee0633HKLJOnw4cPKyMjQ9u3bNWfOHO3fv185OTlqaWlRfn6+JKmlpUUFBQU6cOCAsrOztWPHDhUVFamzs1M+n0+SVFdXp4ULF6q7u1spKSlfej09PT2yLEvhcPi85keTa5ZvG+4l4CL64OF5w70EAIiZ8/35HfMdoZdfflnXX3+9fvKTnyg9PV2TJ0/Wk08+6Zw/ePCggsGgCgsLnWNut1vTp0/Xrl27JEltbW0aGBiImvH5fMrNzXVmdu/eLcuynAiSpClTpsiyrKiZ3NxcJ4Ikac6cOYpEIlG/qvtvkUhEPT09UTcAADA6xTyE3n//fW3YsEFZWVn629/+pjvvvFPl5eV69tlnJUnBYFCS5PF4oh7n8Xicc8FgUAkJCUpNTT3nTHp6+pDXT09Pj5o583VSU1OVkJDgzJyppqbGec+RZVnKyMj4qv8IAADACBHzEDp16pSuu+46VVdXa/LkyVq8eLFKS0u1YcOGqDmXyxV137btIcfOdObM2ea/zsx/W7FihcLhsHPr7Ow855oAAMDIFfMQmjBhgnJycqKOTZw4UR0dHZIkr9crSUN2ZLq7u53dG6/Xq/7+foVCoXPOHDlyZMjrHz16NGrmzNcJhUIaGBgYslN0mtvtVkpKStQNAACMTjEPoWnTpundd9+NOvbee+/p6quvliRlZmbK6/WqqanJOd/f36+dO3dq6tSpkqS8vDyNGTMmaqarq0vt7e3OTEFBgcLhsPbu3evM7NmzR+FwOGqmvb1dXV1dzkxjY6Pcbrfy8vJifOUAAGCkiY/1E95zzz2aOnWqqqurtWDBAu3du1ebNm3Spk2bJH3+q6qKigpVV1crKytLWVlZqq6u1tixY1VSUiJJsixLixYtUmVlpcaPH69x48apqqpKkyZN0qxZsyR9vss0d+5clZaWauPGjZKkO+64Q0VFRcrOzpYkFRYWKicnR36/X6tXr9axY8dUVVWl0tJSdnoAAEDsQ+iGG25QfX29VqxYoQcffFCZmZlav369br31Vmdm2bJl6uvrU1lZmUKhkPLz89XY2Kjk5GRnZt26dYqPj9eCBQvU19enmTNnavPmzYqLi3NmtmzZovLycufTZcXFxaqtrXXOx8XFadu2bSorK9O0adOUmJiokpISrVmzJtaXDQAARqCYf4/QaMP3CMEUfI8QgNFk2L5HCAAAYKQghAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGCsCx5CNTU1crlcqqiocI7Ztq2VK1fK5/MpMTFRM2bM0L59+6IeF4lEtGTJEqWlpSkpKUnFxcU6dOhQ1EwoFJLf75dlWbIsS36/X8ePH4+a6ejo0Pz585WUlKS0tDSVl5erv7//Ql0uAAAYQS5oCLW2tmrTpk369re/HXV81apVWrt2rWpra9Xa2iqv16vZs2frxIkTzkxFRYXq6+tVV1en5uZmnTx5UkVFRRocHHRmSkpKFAgE1NDQoIaGBgUCAfn9fuf84OCg5s2bp97eXjU3N6uurk5bt25VZWXlhbxsAAAwQlywEDp58qRuvfVWPfnkk0pNTXWO27at9evX67777tOPfvQj5ebm6plnntEnn3yiF154QZIUDof19NNP69FHH9WsWbM0efJkPf/883r77bf197//XZK0f/9+NTQ06KmnnlJBQYEKCgr05JNP6q9//aveffddSVJjY6PeeecdPf/885o8ebJmzZqlRx99VE8++aR6enou1KUDAIAR4oKF0F133aV58+Zp1qxZUccPHjyoYDCowsJC55jb7db06dO1a9cuSVJbW5sGBgaiZnw+n3Jzc52Z3bt3y7Is5efnOzNTpkyRZVlRM7m5ufL5fM7MnDlzFIlE1NbWdtZ1RyIR9fT0RN0AAMDoFH8hnrSurk5vvPGGWltbh5wLBoOSJI/HE3Xc4/Howw8/dGYSEhKidpJOz5x+fDAYVHp6+pDnT09Pj5o583VSU1OVkJDgzJyppqZGDzzwwPlcJgAAGOFiviPU2dmpX/7yl3r++ed1+eWXf+Gcy+WKum/b9pBjZzpz5mzzX2fmv61YsULhcNi5dXZ2nnNNAABg5Ip5CLW1tam7u1t5eXmKj49XfHy8du7cqd/97neKj493dmjO3JHp7u52znm9XvX39ysUCp1z5siRI0Ne/+jRo1EzZ75OKBTSwMDAkJ2i09xut1JSUqJuAABgdIp5CM2cOVNvv/22AoGAc7v++ut16623KhAI6Nprr5XX61VTU5PzmP7+fu3cuVNTp06VJOXl5WnMmDFRM11dXWpvb3dmCgoKFA6HtXfvXmdmz549CofDUTPt7e3q6upyZhobG+V2u5WXlxfrSwcAACNMzN8jlJycrNzc3KhjSUlJGj9+vHO8oqJC1dXVysrKUlZWlqqrqzV27FiVlJRIkizL0qJFi1RZWanx48dr3Lhxqqqq0qRJk5w3X0+cOFFz585VaWmpNm7cKEm64447VFRUpOzsbElSYWGhcnJy5Pf7tXr1ah07dkxVVVUqLS1lpwcAAFyYN0t/mWXLlqmvr09lZWUKhULKz89XY2OjkpOTnZl169YpPj5eCxYsUF9fn2bOnKnNmzcrLi7OmdmyZYvKy8udT5cVFxertrbWOR8XF6dt27aprKxM06ZNU2JiokpKSrRmzZqLd7EAAOCS5bJt2x7uRVzKenp6ZFmWwuGwcbtI1yzfNtxLwEX0wcPzhnsJABAz5/vzm79rDAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYKyYh1BNTY1uuOEGJScnKz09XTfffLPefffdqBnbtrVy5Ur5fD4lJiZqxowZ2rdvX9RMJBLRkiVLlJaWpqSkJBUXF+vQoUNRM6FQSH6/X5ZlybIs+f1+HT9+PGqmo6ND8+fPV1JSktLS0lReXq7+/v5YXzYAABiBYh5CO3fu1F133aWWlhY1NTXps88+U2FhoXp7e52ZVatWae3ataqtrVVra6u8Xq9mz56tEydOODMVFRWqr69XXV2dmpubdfLkSRUVFWlwcNCZKSkpUSAQUENDgxoaGhQIBOT3+53zg4ODmjdvnnp7e9Xc3Ky6ujpt3bpVlZWVsb5sAAAwArls27Yv5AscPXpU6enp2rlzp77//e/Ltm35fD5VVFTo3nvvlfT57o/H49EjjzyixYsXKxwO68orr9Rzzz2nW265RZJ0+PBhZWRkaPv27ZozZ47279+vnJwctbS0KD8/X5LU0tKigoICHThwQNnZ2dqxY4eKiorU2dkpn88nSaqrq9PChQvV3d2tlJSUL11/T0+PLMtSOBw+r/nR5Jrl24Z7CbiIPnh43nAvAQBi5nx/fl/w9wiFw2FJ0rhx4yRJBw8eVDAYVGFhoTPjdrs1ffp07dq1S5LU1tamgYGBqBmfz6fc3FxnZvfu3bIsy4kgSZoyZYosy4qayc3NdSJIkubMmaNIJKK2trazrjcSiainpyfqBgAARqcLGkK2bWvp0qW68cYblZubK0kKBoOSJI/HEzXr8Xicc8FgUAkJCUpNTT3nTHp6+pDXTE9Pj5o583VSU1OVkJDgzJyppqbGec+RZVnKyMj4qpcNAABGiAsaQnfffbf+/e9/68UXXxxyzuVyRd23bXvIsTOdOXO2+a8z899WrFihcDjs3Do7O8+5JgAAMHJdsBBasmSJXn75Zf3zn//UVVdd5Rz3er2SNGRHpru729m98Xq96u/vVygUOufMkSNHhrzu0aNHo2bOfJ1QKKSBgYEhO0Wnud1upaSkRN0AAMDoFPMQsm1bd999t/70pz/pH//4hzIzM6POZ2Zmyuv1qqmpyTnW39+vnTt3aurUqZKkvLw8jRkzJmqmq6tL7e3tzkxBQYHC4bD27t3rzOzZs0fhcDhqpr29XV1dXc5MY2Oj3G638vLyYn3pAABghImP9RPeddddeuGFF/SXv/xFycnJzo6MZVlKTEyUy+VSRUWFqqurlZWVpaysLFVXV2vs2LEqKSlxZhctWqTKykqNHz9e48aNU1VVlSZNmqRZs2ZJkiZOnKi5c+eqtLRUGzdulCTdcccdKioqUnZ2tiSpsLBQOTk58vv9Wr16tY4dO6aqqiqVlpay0wMAAGIfQhs2bJAkzZgxI+r4H//4Ry1cuFCStGzZMvX19amsrEyhUEj5+flqbGxUcnKyM79u3TrFx8drwYIF6uvr08yZM7V582bFxcU5M1u2bFF5ebnz6bLi4mLV1tY65+Pi4rRt2zaVlZVp2rRpSkxMVElJidasWRPrywYAACPQBf8eoZGO7xGCKfgeIQCjySXzPUIAAACXKkIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxjIihB5//HFlZmbq8ssvV15enl577bXhXhIAALgEjPoQeumll1RRUaH77rtPb775pr73ve/ppptuUkdHx3AvDQAADLNRH0Jr167VokWL9Itf/EITJ07U+vXrlZGRoQ0bNgz30gAAwDCLH+4FXEj9/f1qa2vT8uXLo44XFhZq165dZ31MJBJRJBJx7ofDYUlST0/PhVvoJepU5JPhXgIuIhP/HQcwep3+b5pt2+ecG9Uh9PHHH2twcFAejyfquMfjUTAYPOtjampq9MADDww5npGRcUHWCFwqrPXDvQIAiL0TJ07IsqwvPD+qQ+g0l8sVdd+27SHHTluxYoWWLl3q3D916pSOHTum8ePHf+FjMHr09PQoIyNDnZ2dSklJGe7lAIgh/nybxbZtnThxQj6f75xzozqE0tLSFBcXN2T3p7u7e8gu0Wlut1tutzvq2BVXXHGhlohLVEpKCv+hBEYp/nyb41w7QaeN6jdLJyQkKC8vT01NTVHHm5qaNHXq1GFaFQAAuFSM6h0hSVq6dKn8fr+uv/56FRQUaNOmTero6NCdd9453EsDAADDbNSH0C233KL//Oc/evDBB9XV1aXc3Fxt375dV1999XAvDZcgt9ut+++/f8ivRwGMfPz5xtm47C/7XBkAAMAoNarfIwQAAHAuhBAAADAWIQQAAIxFCAEAAGMRQgAAwFij/uPzwLkcOnRIGzZs0K5duxQMBuVyueTxeDR16lTdeeed/B1zADDK8fF5GKu5uVk33XSTMjIyVFhYKI/HI9u21d3draamJnV2dmrHjh2aNm3acC8VwAXQ2dmp+++/X3/4wx+GeykYRoQQjHXDDTfoxhtv1Lp16856/p577lFzc7NaW1sv8soAXAxvvfWWrrvuOg0ODg73UjCMCCEYKzExUYFAQNnZ2Wc9f+DAAU2ePFl9fX0XeWUAYuHll18+5/n3339flZWVhJDheI8QjDVhwgTt2rXrC0No9+7dmjBhwkVeFYBYufnmm+VyuXSu/993uVwXcUW4FBFCMFZVVZXuvPNOtbW1afbs2fJ4PHK5XAoGg2pqatJTTz2l9evXD/cyAXxNEyZM0O9//3vdfPPNZz0fCASUl5d3cReFSw4hBGOVlZVp/PjxWrdunTZu3Ohsj8fFxSkvL0/PPvusFixYMMyrBPB15eXl6Y033vjCEPqy3SKYgfcIAZIGBgb08ccfS5LS0tI0ZsyYYV4RgP+r1157Tb29vZo7d+5Zz/f29ur111/X9OnTL/LKcCkhhAAAgLH4ZmkAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsf4XKb9AFHhw3MkAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pd.value_counts(sample_financial['isFraud']).plot(kind = 'bar')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "CASH_OUT 35055\n", + "PAYMENT 33830\n", + "CASH_IN 22089\n", + "TRANSFER 8384\n", + "DEBIT 642\n", + "Name: type, dtype: int64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here\n" + "sample_financial[\"type\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stepamountoldbalanceOrgnewbalanceOrigoldbalanceDestnewbalanceDestisFraudisFlaggedFraudtype_CASH_INtype_CASH_OUTtype_DEBITtype_PAYMENTtype_TRANSFER
411639430210713.740.000.000.000.000000010
16584841585572.77672938.43667365.660.000.000000010
516083735710284.6320619.0010334.370.000.000000010
93576743189390.350.000.006444744.366634134.710001000
496137935176731.720.000.00838426.87915158.590001000
\n", + "
" + ], + "text/plain": [ + " step amount oldbalanceOrg newbalanceOrig oldbalanceDest \\\n", + "4116394 302 10713.74 0.00 0.00 0.00 \n", + "1658484 158 5572.77 672938.43 667365.66 0.00 \n", + "5160837 357 10284.63 20619.00 10334.37 0.00 \n", + "935767 43 189390.35 0.00 0.00 6444744.36 \n", + "4961379 351 76731.72 0.00 0.00 838426.87 \n", + "\n", + " newbalanceDest isFraud isFlaggedFraud type_CASH_IN type_CASH_OUT \\\n", + "4116394 0.00 0 0 0 0 \n", + "1658484 0.00 0 0 0 0 \n", + "5160837 0.00 0 0 0 0 \n", + "935767 6634134.71 0 0 0 1 \n", + "4961379 915158.59 0 0 0 1 \n", + "\n", + " type_DEBIT type_PAYMENT type_TRANSFER \n", + "4116394 0 1 0 \n", + "1658484 0 1 0 \n", + "5160837 0 1 0 \n", + "935767 0 0 0 \n", + "4961379 0 0 0 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sample_financial = pd.get_dummies(sample_financial, columns=['type'])\n", + "sample_financial.drop(['nameOrig','nameDest'], axis = 1, inplace=True)\n", + "sample_financial.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Run a logisitc regression classifier and evaluate its accuracy." + "### Run a logistic regression classifier and evaluate its accuracy." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.99935" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here" + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import classification_report, confusion_matrix\n", + "\n", + "features = sample_financial.drop(labels='isFraud', axis=1)\n", + "target = sample_financial['isFraud']\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(features, target, test_size= 0.2)\n", + "\n", + "model = LogisticRegression()\n", + "model.fit(X_train,y_train)\n", + "model.score(X_test,y_test)\n", + "pred = model.predict(X_test)\n", + "\n", + "model.score(X_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[19963, 8],\n", + " [ 4, 25]])" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pred = model.predict(X_test)\n", + "confusion_matrix(y_test, pred)" ] }, { @@ -92,11 +654,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.5306674456025794" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here" + "from sklearn.ensemble import BaggingRegressor\n", + "\n", + "bagging_reg = BaggingRegressor(\n", + " DecisionTreeRegressor(max_depth=5), \n", + " n_estimators=100, \n", + " max_samples=5000, \n", + " bootstrap = True) \n", + "\n", + "bagging_reg.fit(X_train, y_train)\n", + "bagging_reg.score(X_test,y_test)" ] }, { @@ -125,7 +707,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -139,7 +721,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.10.9" } }, "nbformat": 4,