diff --git a/your-code/lab_imbalance.ipynb b/your-code/lab_imbalance.ipynb
index a3a5359..b388991 100644
--- a/your-code/lab_imbalance.ipynb
+++ b/your-code/lab_imbalance.ipynb
@@ -28,11 +28,285 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
- "# Your code here"
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.preprocessing import LabelEncoder\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.tree import DecisionTreeClassifier\n",
+ "from sklearn.preprocessing import RobustScaler, StandardScaler, PolynomialFeatures, MinMaxScaler\n",
+ "import seaborn as sns\n",
+ "\n",
+ "import pandas as pd\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import accuracy_score"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "financial = pd.read_csv(\"PS_20174392719_1491204439457_log.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " step | \n",
+ " type | \n",
+ " amount | \n",
+ " nameOrig | \n",
+ " oldbalanceOrg | \n",
+ " newbalanceOrig | \n",
+ " nameDest | \n",
+ " oldbalanceDest | \n",
+ " newbalanceDest | \n",
+ " isFraud | \n",
+ " isFlaggedFraud | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 4116394 | \n",
+ " 302 | \n",
+ " PAYMENT | \n",
+ " 10713.74 | \n",
+ " C2108747311 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " M1741414056 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1658484 | \n",
+ " 158 | \n",
+ " PAYMENT | \n",
+ " 5572.77 | \n",
+ " C873991644 | \n",
+ " 672938.43 | \n",
+ " 667365.66 | \n",
+ " M1376736209 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 5160837 | \n",
+ " 357 | \n",
+ " PAYMENT | \n",
+ " 10284.63 | \n",
+ " C252131657 | \n",
+ " 20619.00 | \n",
+ " 10334.37 | \n",
+ " M993427212 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 935767 | \n",
+ " 43 | \n",
+ " CASH_OUT | \n",
+ " 189390.35 | \n",
+ " C1659027639 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " C808904348 | \n",
+ " 6444744.36 | \n",
+ " 6634134.71 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4961379 | \n",
+ " 351 | \n",
+ " CASH_OUT | \n",
+ " 76731.72 | \n",
+ " C954829458 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " C1896395958 | \n",
+ " 838426.87 | \n",
+ " 915158.59 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 5618022 | \n",
+ " 395 | \n",
+ " TRANSFER | \n",
+ " 1409050.12 | \n",
+ " C1128563908 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " C1627551181 | \n",
+ " 6310069.86 | \n",
+ " 7719119.98 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4792371 | \n",
+ " 345 | \n",
+ " CASH_OUT | \n",
+ " 191997.11 | \n",
+ " C1097259206 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " C212075695 | \n",
+ " 2541763.97 | \n",
+ " 2733761.08 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 5719272 | \n",
+ " 398 | \n",
+ " PAYMENT | \n",
+ " 53549.35 | \n",
+ " C1783114919 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " M96834927 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 5469004 | \n",
+ " 379 | \n",
+ " CASH_OUT | \n",
+ " 212481.51 | \n",
+ " C947804496 | \n",
+ " 271.00 | \n",
+ " 0.00 | \n",
+ " C200627857 | \n",
+ " 89831.21 | \n",
+ " 302312.72 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4620152 | \n",
+ " 329 | \n",
+ " PAYMENT | \n",
+ " 22589.07 | \n",
+ " C112733752 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " M1950355407 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
100000 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " step type amount nameOrig oldbalanceOrg \\\n",
+ "4116394 302 PAYMENT 10713.74 C2108747311 0.00 \n",
+ "1658484 158 PAYMENT 5572.77 C873991644 672938.43 \n",
+ "5160837 357 PAYMENT 10284.63 C252131657 20619.00 \n",
+ "935767 43 CASH_OUT 189390.35 C1659027639 0.00 \n",
+ "4961379 351 CASH_OUT 76731.72 C954829458 0.00 \n",
+ "... ... ... ... ... ... \n",
+ "5618022 395 TRANSFER 1409050.12 C1128563908 0.00 \n",
+ "4792371 345 CASH_OUT 191997.11 C1097259206 0.00 \n",
+ "5719272 398 PAYMENT 53549.35 C1783114919 0.00 \n",
+ "5469004 379 CASH_OUT 212481.51 C947804496 271.00 \n",
+ "4620152 329 PAYMENT 22589.07 C112733752 0.00 \n",
+ "\n",
+ " newbalanceOrig nameDest oldbalanceDest newbalanceDest isFraud \\\n",
+ "4116394 0.00 M1741414056 0.00 0.00 0 \n",
+ "1658484 667365.66 M1376736209 0.00 0.00 0 \n",
+ "5160837 10334.37 M993427212 0.00 0.00 0 \n",
+ "935767 0.00 C808904348 6444744.36 6634134.71 0 \n",
+ "4961379 0.00 C1896395958 838426.87 915158.59 0 \n",
+ "... ... ... ... ... ... \n",
+ "5618022 0.00 C1627551181 6310069.86 7719119.98 0 \n",
+ "4792371 0.00 C212075695 2541763.97 2733761.08 0 \n",
+ "5719272 0.00 M96834927 0.00 0.00 0 \n",
+ "5469004 0.00 C200627857 89831.21 302312.72 0 \n",
+ "4620152 0.00 M1950355407 0.00 0.00 0 \n",
+ "\n",
+ " isFlaggedFraud \n",
+ "4116394 0 \n",
+ "1658484 0 \n",
+ "5160837 0 \n",
+ "935767 0 \n",
+ "4961379 0 \n",
+ "... ... \n",
+ "5618022 0 \n",
+ "4792371 0 \n",
+ "5719272 0 \n",
+ "5469004 0 \n",
+ "4620152 0 \n",
+ "\n",
+ "[100000 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sample_financial = financial.sample(n=100000)\n",
+ "sample_financial"
]
},
{
@@ -44,11 +318,33 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "step int64\n",
+ "type object\n",
+ "amount float64\n",
+ "nameOrig object\n",
+ "oldbalanceOrg float64\n",
+ "newbalanceOrig float64\n",
+ "nameDest object\n",
+ "oldbalanceDest float64\n",
+ "newbalanceDest float64\n",
+ "isFraud int64\n",
+ "isFlaggedFraud int64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your response here"
+ "sample_financial.dtypes"
]
},
{
@@ -60,27 +356,293 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 17,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 99868\n",
+ "1 132\n",
+ "Name: isFraud, dtype: int64"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sample_financial.isFraud.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAGYCAYAAACu6o3UAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy88F64QAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAjSElEQVR4nO3db3BU5d3/8c+akDVkkmMgZpetuTXOZFLS0BajEwK20AEClpBx2im20R2Z0ogTJY0kRRjbis6YKCAwNRVBbfEPGh/QtE6BNKntoCkEYnStQdAHogmSJViWDcS4ieHcD/xxfl2CiN4LIbner5l9sOd8d/c6jpi3V3YXl23btgAAAAx02XAvAAAAYLgQQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMFT/cC7jUnTp1SocPH1ZycrJcLtdwLwcAAJwH27Z14sQJ+Xw+XXbZF+/7EEJf4vDhw8rIyBjuZQAAgK+hs7NTV1111ReeJ4S+RHJysqTP/0GmpKQM82oAAMD56OnpUUZGhvNz/IsQQl/i9K/DUlJSCCEAAEaYL3tbC2+WBgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLG+cgi9+uqrmj9/vnw+n1wul/785z9HnbdtWytXrpTP51NiYqJmzJihffv2Rc1EIhEtWbJEaWlpSkpKUnFxsQ4dOhQ1EwqF5Pf7ZVmWLMuS3+/X8ePHo2Y6Ojo0f/58JSUlKS0tTeXl5erv74+aefvttzV9+nQlJibqG9/4hh588EHZtv1VLxsAAIxCXzmEent79Z3vfEe1tbVnPb9q1SqtXbtWtbW1am1tldfr1ezZs3XixAlnpqKiQvX19aqrq1Nzc7NOnjypoqIiDQ4OOjMlJSUKBAJqaGhQQ0ODAoGA/H6/c35wcFDz5s1Tb2+vmpubVVdXp61bt6qystKZ6enp0ezZs+Xz+dTa2qrHHntMa9as0dq1a7/qZQMAgNHI/j+QZNfX1zv3T506ZXu9Xvvhhx92jn366ae2ZVn2E088Ydu2bR8/ftweM2aMXVdX58x89NFH9mWXXWY3NDTYtm3b77zzji3JbmlpcWZ2795tS7IPHDhg27Ztb9++3b7sssvsjz76yJl58cUXbbfbbYfDYdu2bfvxxx+3LcuyP/30U2empqbG9vl89qlTp87rGsPhsC3JeU4AAHDpO9+f3zF9j9DBgwcVDAZVWFjoHHO73Zo+fbp27dolSWpra9PAwEDUjM/nU25urjOze/duWZal/Px8Z2bKlCmyLCtqJjc3Vz6fz5mZM2eOIpGI2tranJnp06fL7XZHzRw+fFgffPDBWa8hEomop6cn6gYAAEan+Fg+WTAYlCR5PJ6o4x6PRx9++KEzk5CQoNTU1CEzpx8fDAaVnp4+5PnT09OjZs58ndTUVCUkJETNXHPNNUNe5/S5zMzMIa9RU1OjBx544Lyud7S7Zvm24V4CLqIPHp433EsAgIvugnxqzOVyRd23bXvIsTOdOXO2+VjM2P/vjdJftJ4VK1YoHA47t87OznOuGwAAjFwxDSGv1yvp/+8Mndbd3e3sxHi9XvX39ysUCp1z5siRI0Oe/+jRo1EzZ75OKBTSwMDAOWe6u7slDd21Os3tdislJSXqBgAARqeYhlBmZqa8Xq+ampqcY/39/dq5c6emTp0qScrLy9OYMWOiZrq6utTe3u7MFBQUKBwOa+/evc7Mnj17FA6Ho2ba29vV1dXlzDQ2NsrtdisvL8+ZefXVV6M+Ut/Y2CifzzfkV2YAAMA8XzmETp48qUAgoEAgIOnzN0gHAgF1dHTI5XKpoqJC1dXVqq+vV3t7uxYuXKixY8eqpKREkmRZlhYtWqTKykq98sorevPNN3Xbbbdp0qRJmjVrliRp4sSJmjt3rkpLS9XS0qKWlhaVlpaqqKhI2dnZkqTCwkLl5OTI7/frzTff1CuvvKKqqiqVlpY6uzglJSVyu91auHCh2tvbVV9fr+rqai1duvRLf1UHAABGv6/8ZunXX39dP/jBD5z7S5culSTdfvvt2rx5s5YtW6a+vj6VlZUpFAopPz9fjY2NSk5Odh6zbt06xcfHa8GCBerr69PMmTO1efNmxcXFOTNbtmxReXm58+my4uLiqO8uiouL07Zt21RWVqZp06YpMTFRJSUlWrNmjTNjWZaampp011136frrr1dqaqqWLl3qrBkAAJjNZdt8zfK59PT0yLIshcNh494vxKfGzMKnxgCMJuf785u/awwAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxYh5Cn332mX79618rMzNTiYmJuvbaa/Xggw/q1KlTzoxt21q5cqV8Pp8SExM1Y8YM7du3L+p5IpGIlixZorS0NCUlJam4uFiHDh2KmgmFQvL7/bIsS5Zlye/36/jx41EzHR0dmj9/vpKSkpSWlqby8nL19/fH+rIBAMAIFPMQeuSRR/TEE0+otrZW+/fv16pVq7R69Wo99thjzsyqVau0du1a1dbWqrW1VV6vV7Nnz9aJEyecmYqKCtXX16uurk7Nzc06efKkioqKNDg46MyUlJQoEAiooaFBDQ0NCgQC8vv9zvnBwUHNmzdPvb29am5uVl1dnbZu3arKyspYXzYAABiBXLZt27F8wqKiInk8Hj399NPOsR//+McaO3asnnvuOdm2LZ/Pp4qKCt17772SPt/98Xg8euSRR7R48WKFw2FdeeWVeu6553TLLbdIkg4fPqyMjAxt375dc+bM0f79+5WTk6OWlhbl5+dLklpaWlRQUKADBw4oOztbO3bsUFFRkTo7O+Xz+SRJdXV1Wrhwobq7u5WSkvKl19PT0yPLshQOh89rfjS5Zvm24V4CLqIPHp433EsAgJg535/fMd8RuvHGG/XKK6/ovffekyS99dZbam5u1g9/+ENJ0sGDBxUMBlVYWOg8xu12a/r06dq1a5ckqa2tTQMDA1EzPp9Pubm5zszu3btlWZYTQZI0ZcoUWZYVNZObm+tEkCTNmTNHkUhEbW1tZ11/JBJRT09P1A0AAIxO8bF+wnvvvVfhcFjf/OY3FRcXp8HBQT300EP62c9+JkkKBoOSJI/HE/U4j8ejDz/80JlJSEhQamrqkJnTjw8Gg0pPTx/y+unp6VEzZ75OamqqEhISnJkz1dTU6IEHHviqlw0AAEagmO8IvfTSS3r++ef1wgsv6I033tAzzzyjNWvW6Jlnnomac7lcUfdt2x5y7Exnzpxt/uvM/LcVK1YoHA47t87OznOuCQAAjFwx3xH61a9+peXLl+unP/2pJGnSpEn68MMPVVNTo9tvv11er1fS57s1EyZMcB7X3d3t7N54vV719/crFApF7Qp1d3dr6tSpzsyRI0eGvP7Ro0ejnmfPnj1R50OhkAYGBobsFJ3mdrvldru/7uUDAIARJOY7Qp988okuuyz6aePi4pyPz2dmZsrr9aqpqck539/fr507dzqRk5eXpzFjxkTNdHV1qb293ZkpKChQOBzW3r17nZk9e/YoHA5HzbS3t6urq8uZaWxslNvtVl5eXoyvHAAAjDQx3xGaP3++HnroIf3P//yPvvWtb+nNN9/U2rVr9fOf/1zS57+qqqioUHV1tbKyspSVlaXq6mqNHTtWJSUlkiTLsrRo0SJVVlZq/PjxGjdunKqqqjRp0iTNmjVLkjRx4kTNnTtXpaWl2rhxoyTpjjvuUFFRkbKzsyVJhYWFysnJkd/v1+rVq3Xs2DFVVVWptLTUuE+AAQCAoWIeQo899ph+85vfqKysTN3d3fL5fFq8eLF++9vfOjPLli1TX1+fysrKFAqFlJ+fr8bGRiUnJzsz69atU3x8vBYsWKC+vj7NnDlTmzdvVlxcnDOzZcsWlZeXO58uKy4uVm1trXM+Li5O27ZtU1lZmaZNm6bExESVlJRozZo1sb5sAAAwAsX8e4RGG75HCKbge4QAjCbD9j1CAAAAIwUhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFiEEAAAMBYhBAAAjEUIAQAAYxFCAADAWIQQAAAwFiEEAACMRQgBAABjEUIAAMBYhBAAADAWIQQAAIxFCAEAAGMRQgAAwFgXJIQ++ugj3XbbbRo/frzGjh2r7373u2pra3PO27atlStXyufzKTExUTNmzNC+ffuiniMSiWjJkiVKS0tTUlKSiouLdejQoaiZUCgkv98vy7JkWZb8fr+OHz8eNdPR0aH58+crKSlJaWlpKi8vV39//4W4bAAAMMLEPIRCoZCmTZumMWPGaMeOHXrnnXf06KOP6oorrnBmVq1apbVr16q2tlatra3yer2aPXu2Tpw44cxUVFSovr5edXV1am5u1smTJ1VUVKTBwUFnpqSkRIFAQA0NDWpoaFAgEJDf73fODw4Oat68eert7VVzc7Pq6uq0detWVVZWxvqyAQDACOSybduO5RMuX75c//rXv/Taa6+d9bxt2/L5fKqoqNC9994r6fPdH4/Ho0ceeUSLFy9WOBzWlVdeqeeee0633HKLJOnw4cPKyMjQ9u3bNWfOHO3fv185OTlqaWlRfn6+JKmlpUUFBQU6cOCAsrOztWPHDhUVFamzs1M+n0+SVFdXp4ULF6q7u1spKSlfej09PT2yLEvhcPi85keTa5ZvG+4l4CL64OF5w70EAIiZ8/35HfMdoZdfflnXX3+9fvKTnyg9PV2TJ0/Wk08+6Zw/ePCggsGgCgsLnWNut1vTp0/Xrl27JEltbW0aGBiImvH5fMrNzXVmdu/eLcuynAiSpClTpsiyrKiZ3NxcJ4Ikac6cOYpEIlG/qvtvkUhEPT09UTcAADA6xTyE3n//fW3YsEFZWVn629/+pjvvvFPl5eV69tlnJUnBYFCS5PF4oh7n8Xicc8FgUAkJCUpNTT3nTHp6+pDXT09Pj5o583VSU1OVkJDgzJyppqbGec+RZVnKyMj4qv8IAADACBHzEDp16pSuu+46VVdXa/LkyVq8eLFKS0u1YcOGqDmXyxV137btIcfOdObM2ea/zsx/W7FihcLhsHPr7Ow855oAAMDIFfMQmjBhgnJycqKOTZw4UR0dHZIkr9crSUN2ZLq7u53dG6/Xq/7+foVCoXPOHDlyZMjrHz16NGrmzNcJhUIaGBgYslN0mtvtVkpKStQNAACMTjEPoWnTpundd9+NOvbee+/p6quvliRlZmbK6/WqqanJOd/f36+dO3dq6tSpkqS8vDyNGTMmaqarq0vt7e3OTEFBgcLhsPbu3evM7NmzR+FwOGqmvb1dXV1dzkxjY6Pcbrfy8vJifOUAAGCkiY/1E95zzz2aOnWqqqurtWDBAu3du1ebNm3Spk2bJH3+q6qKigpVV1crKytLWVlZqq6u1tixY1VSUiJJsixLixYtUmVlpcaPH69x48apqqpKkyZN0qxZsyR9vss0d+5clZaWauPGjZKkO+64Q0VFRcrOzpYkFRYWKicnR36/X6tXr9axY8dUVVWl0tJSdnoAAEDsQ+iGG25QfX29VqxYoQcffFCZmZlav369br31Vmdm2bJl6uvrU1lZmUKhkPLz89XY2Kjk5GRnZt26dYqPj9eCBQvU19enmTNnavPmzYqLi3NmtmzZovLycufTZcXFxaqtrXXOx8XFadu2bSorK9O0adOUmJiokpISrVmzJtaXDQAARqCYf4/QaMP3CMEUfI8QgNFk2L5HCAAAYKQghAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGCsCx5CNTU1crlcqqiocI7Ztq2VK1fK5/MpMTFRM2bM0L59+6IeF4lEtGTJEqWlpSkpKUnFxcU6dOhQ1EwoFJLf75dlWbIsS36/X8ePH4+a6ejo0Pz585WUlKS0tDSVl5erv7//Ql0uAAAYQS5oCLW2tmrTpk369re/HXV81apVWrt2rWpra9Xa2iqv16vZs2frxIkTzkxFRYXq6+tVV1en5uZmnTx5UkVFRRocHHRmSkpKFAgE1NDQoIaGBgUCAfn9fuf84OCg5s2bp97eXjU3N6uurk5bt25VZWXlhbxsAAAwQlywEDp58qRuvfVWPfnkk0pNTXWO27at9evX67777tOPfvQj5ebm6plnntEnn3yiF154QZIUDof19NNP69FHH9WsWbM0efJkPf/883r77bf197//XZK0f/9+NTQ06KmnnlJBQYEKCgr05JNP6q9//aveffddSVJjY6PeeecdPf/885o8ebJmzZqlRx99VE8++aR6enou1KUDAIAR4oKF0F133aV58+Zp1qxZUccPHjyoYDCowsJC55jb7db06dO1a9cuSVJbW5sGBgaiZnw+n3Jzc52Z3bt3y7Is5efnOzNTpkyRZVlRM7m5ufL5fM7MnDlzFIlE1NbWdtZ1RyIR9fT0RN0AAMDoFH8hnrSurk5vvPGGWltbh5wLBoOSJI/HE3Xc4/Howw8/dGYSEhKidpJOz5x+fDAYVHp6+pDnT09Pj5o583VSU1OVkJDgzJyppqZGDzzwwPlcJgAAGOFiviPU2dmpX/7yl3r++ed1+eWXf+Gcy+WKum/b9pBjZzpz5mzzX2fmv61YsULhcNi5dXZ2nnNNAABg5Ip5CLW1tam7u1t5eXmKj49XfHy8du7cqd/97neKj493dmjO3JHp7u52znm9XvX39ysUCp1z5siRI0Ne/+jRo1EzZ75OKBTSwMDAkJ2i09xut1JSUqJuAABgdIp5CM2cOVNvv/22AoGAc7v++ut16623KhAI6Nprr5XX61VTU5PzmP7+fu3cuVNTp06VJOXl5WnMmDFRM11dXWpvb3dmCgoKFA6HtXfvXmdmz549CofDUTPt7e3q6upyZhobG+V2u5WXlxfrSwcAACNMzN8jlJycrNzc3KhjSUlJGj9+vHO8oqJC1dXVysrKUlZWlqqrqzV27FiVlJRIkizL0qJFi1RZWanx48dr3Lhxqqqq0qRJk5w3X0+cOFFz585VaWmpNm7cKEm64447VFRUpOzsbElSYWGhcnJy5Pf7tXr1ah07dkxVVVUqLS1lpwcAAFyYN0t/mWXLlqmvr09lZWUKhULKz89XY2OjkpOTnZl169YpPj5eCxYsUF9fn2bOnKnNmzcrLi7OmdmyZYvKy8udT5cVFxertrbWOR8XF6dt27aprKxM06ZNU2JiokpKSrRmzZqLd7EAAOCS5bJt2x7uRVzKenp6ZFmWwuGwcbtI1yzfNtxLwEX0wcPzhnsJABAz5/vzm79rDAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYKyYh1BNTY1uuOEGJScnKz09XTfffLPefffdqBnbtrVy5Ur5fD4lJiZqxowZ2rdvX9RMJBLRkiVLlJaWpqSkJBUXF+vQoUNRM6FQSH6/X5ZlybIs+f1+HT9+PGqmo6ND8+fPV1JSktLS0lReXq7+/v5YXzYAABiBYh5CO3fu1F133aWWlhY1NTXps88+U2FhoXp7e52ZVatWae3ataqtrVVra6u8Xq9mz56tEydOODMVFRWqr69XXV2dmpubdfLkSRUVFWlwcNCZKSkpUSAQUENDgxoaGhQIBOT3+53zg4ODmjdvnnp7e9Xc3Ky6ujpt3bpVlZWVsb5sAAAwArls27Yv5AscPXpU6enp2rlzp77//e/Ltm35fD5VVFTo3nvvlfT57o/H49EjjzyixYsXKxwO68orr9Rzzz2nW265RZJ0+PBhZWRkaPv27ZozZ47279+vnJwctbS0KD8/X5LU0tKigoICHThwQNnZ2dqxY4eKiorU2dkpn88nSaqrq9PChQvV3d2tlJSUL11/T0+PLMtSOBw+r/nR5Jrl24Z7CbiIPnh43nAvAQBi5nx/fl/w9wiFw2FJ0rhx4yRJBw8eVDAYVGFhoTPjdrs1ffp07dq1S5LU1tamgYGBqBmfz6fc3FxnZvfu3bIsy4kgSZoyZYosy4qayc3NdSJIkubMmaNIJKK2trazrjcSiainpyfqBgAARqcLGkK2bWvp0qW68cYblZubK0kKBoOSJI/HEzXr8Xicc8FgUAkJCUpNTT3nTHp6+pDXTE9Pj5o583VSU1OVkJDgzJyppqbGec+RZVnKyMj4qpcNAABGiAsaQnfffbf+/e9/68UXXxxyzuVyRd23bXvIsTOdOXO2+a8z899WrFihcDjs3Do7O8+5JgAAMHJdsBBasmSJXn75Zf3zn//UVVdd5Rz3er2SNGRHpru729m98Xq96u/vVygUOufMkSNHhrzu0aNHo2bOfJ1QKKSBgYEhO0Wnud1upaSkRN0AAMDoFPMQsm1bd999t/70pz/pH//4hzIzM6POZ2Zmyuv1qqmpyTnW39+vnTt3aurUqZKkvLw8jRkzJmqmq6tL7e3tzkxBQYHC4bD27t3rzOzZs0fhcDhqpr29XV1dXc5MY2Oj3G638vLyYn3pAABghImP9RPeddddeuGFF/SXv/xFycnJzo6MZVlKTEyUy+VSRUWFqqurlZWVpaysLFVXV2vs2LEqKSlxZhctWqTKykqNHz9e48aNU1VVlSZNmqRZs2ZJkiZOnKi5c+eqtLRUGzdulCTdcccdKioqUnZ2tiSpsLBQOTk58vv9Wr16tY4dO6aqqiqVlpay0wMAAGIfQhs2bJAkzZgxI+r4H//4Ry1cuFCStGzZMvX19amsrEyhUEj5+flqbGxUcnKyM79u3TrFx8drwYIF6uvr08yZM7V582bFxcU5M1u2bFF5ebnz6bLi4mLV1tY65+Pi4rRt2zaVlZVp2rRpSkxMVElJidasWRPrywYAACPQBf8eoZGO7xGCKfgeIQCjySXzPUIAAACXKkIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsQghAABgLEIIAAAYixACAADGIoQAAICxCCEAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxjIihB5//HFlZmbq8ssvV15enl577bXhXhIAALgEjPoQeumll1RRUaH77rtPb775pr73ve/ppptuUkdHx3AvDQAADLNRH0Jr167VokWL9Itf/EITJ07U+vXrlZGRoQ0bNgz30gAAwDCLH+4FXEj9/f1qa2vT8uXLo44XFhZq165dZ31MJBJRJBJx7ofDYUlST0/PhVvoJepU5JPhXgIuIhP/HQcwep3+b5pt2+ecG9Uh9PHHH2twcFAejyfquMfjUTAYPOtjampq9MADDww5npGRcUHWCFwqrPXDvQIAiL0TJ07IsqwvPD+qQ+g0l8sVdd+27SHHTluxYoWWLl3q3D916pSOHTum8ePHf+FjMHr09PQoIyNDnZ2dSklJGe7lAIgh/nybxbZtnThxQj6f75xzozqE0tLSFBcXN2T3p7u7e8gu0Wlut1tutzvq2BVXXHGhlohLVEpKCv+hBEYp/nyb41w7QaeN6jdLJyQkKC8vT01NTVHHm5qaNHXq1GFaFQAAuFSM6h0hSVq6dKn8fr+uv/56FRQUaNOmTero6NCdd9453EsDAADDbNSH0C233KL//Oc/evDBB9XV1aXc3Fxt375dV1999XAvDZcgt9ut+++/f8ivRwGMfPz5xtm47C/7XBkAAMAoNarfIwQAAHAuhBAAADAWIQQAAIxFCAEAAGMRQgAAwFij/uPzwLkcOnRIGzZs0K5duxQMBuVyueTxeDR16lTdeeed/B1zADDK8fF5GKu5uVk33XSTMjIyVFhYKI/HI9u21d3draamJnV2dmrHjh2aNm3acC8VwAXQ2dmp+++/X3/4wx+GeykYRoQQjHXDDTfoxhtv1Lp16856/p577lFzc7NaW1sv8soAXAxvvfWWrrvuOg0ODg73UjCMCCEYKzExUYFAQNnZ2Wc9f+DAAU2ePFl9fX0XeWUAYuHll18+5/n3339flZWVhJDheI8QjDVhwgTt2rXrC0No9+7dmjBhwkVeFYBYufnmm+VyuXSu/993uVwXcUW4FBFCMFZVVZXuvPNOtbW1afbs2fJ4PHK5XAoGg2pqatJTTz2l9evXD/cyAXxNEyZM0O9//3vdfPPNZz0fCASUl5d3cReFSw4hBGOVlZVp/PjxWrdunTZu3Ohsj8fFxSkvL0/PPvusFixYMMyrBPB15eXl6Y033vjCEPqy3SKYgfcIAZIGBgb08ccfS5LS0tI0ZsyYYV4RgP+r1157Tb29vZo7d+5Zz/f29ur111/X9OnTL/LKcCkhhAAAgLH4ZmkAAGAsQggAABiLEAIAAMYihAAAgLEIIQAAYCxCCAAAGIsQAgAAxiKEAACAsf4XKb9AFHhw3MkAAAAASUVORK5CYII=\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "pd.value_counts(sample_financial['isFraud']).plot(kind = 'bar')\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "CASH_OUT 35055\n",
+ "PAYMENT 33830\n",
+ "CASH_IN 22089\n",
+ "TRANSFER 8384\n",
+ "DEBIT 642\n",
+ "Name: type, dtype: int64"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here\n"
+ "sample_financial[\"type\"].value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " step | \n",
+ " amount | \n",
+ " oldbalanceOrg | \n",
+ " newbalanceOrig | \n",
+ " oldbalanceDest | \n",
+ " newbalanceDest | \n",
+ " isFraud | \n",
+ " isFlaggedFraud | \n",
+ " type_CASH_IN | \n",
+ " type_CASH_OUT | \n",
+ " type_DEBIT | \n",
+ " type_PAYMENT | \n",
+ " type_TRANSFER | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 4116394 | \n",
+ " 302 | \n",
+ " 10713.74 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1658484 | \n",
+ " 158 | \n",
+ " 5572.77 | \n",
+ " 672938.43 | \n",
+ " 667365.66 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 5160837 | \n",
+ " 357 | \n",
+ " 10284.63 | \n",
+ " 20619.00 | \n",
+ " 10334.37 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 935767 | \n",
+ " 43 | \n",
+ " 189390.35 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 6444744.36 | \n",
+ " 6634134.71 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 4961379 | \n",
+ " 351 | \n",
+ " 76731.72 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 838426.87 | \n",
+ " 915158.59 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " step amount oldbalanceOrg newbalanceOrig oldbalanceDest \\\n",
+ "4116394 302 10713.74 0.00 0.00 0.00 \n",
+ "1658484 158 5572.77 672938.43 667365.66 0.00 \n",
+ "5160837 357 10284.63 20619.00 10334.37 0.00 \n",
+ "935767 43 189390.35 0.00 0.00 6444744.36 \n",
+ "4961379 351 76731.72 0.00 0.00 838426.87 \n",
+ "\n",
+ " newbalanceDest isFraud isFlaggedFraud type_CASH_IN type_CASH_OUT \\\n",
+ "4116394 0.00 0 0 0 0 \n",
+ "1658484 0.00 0 0 0 0 \n",
+ "5160837 0.00 0 0 0 0 \n",
+ "935767 6634134.71 0 0 0 1 \n",
+ "4961379 915158.59 0 0 0 1 \n",
+ "\n",
+ " type_DEBIT type_PAYMENT type_TRANSFER \n",
+ "4116394 0 1 0 \n",
+ "1658484 0 1 0 \n",
+ "5160837 0 1 0 \n",
+ "935767 0 0 0 \n",
+ "4961379 0 0 0 "
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sample_financial = pd.get_dummies(sample_financial, columns=['type'])\n",
+ "sample_financial.drop(['nameOrig','nameDest'], axis = 1, inplace=True)\n",
+ "sample_financial.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "### Run a logisitc regression classifier and evaluate its accuracy."
+ "### Run a logistic regression classifier and evaluate its accuracy."
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 27,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.99935"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here"
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.metrics import classification_report, confusion_matrix\n",
+ "\n",
+ "features = sample_financial.drop(labels='isFraud', axis=1)\n",
+ "target = sample_financial['isFraud']\n",
+ "\n",
+ "X_train, X_test, y_train, y_test = train_test_split(features, target, test_size= 0.2)\n",
+ "\n",
+ "model = LogisticRegression()\n",
+ "model.fit(X_train,y_train)\n",
+ "model.score(X_test,y_test)\n",
+ "pred = model.predict(X_test)\n",
+ "\n",
+ "model.score(X_train,y_train)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[19963, 8],\n",
+ " [ 4, 25]])"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pred = model.predict(X_test)\n",
+ "confusion_matrix(y_test, pred)"
]
},
{
@@ -92,11 +654,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 38,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.5306674456025794"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "# Your code here"
+ "from sklearn.ensemble import BaggingRegressor\n",
+ "\n",
+ "bagging_reg = BaggingRegressor(\n",
+ " DecisionTreeRegressor(max_depth=5), \n",
+ " n_estimators=100, \n",
+ " max_samples=5000, \n",
+ " bootstrap = True) \n",
+ "\n",
+ "bagging_reg.fit(X_train, y_train)\n",
+ "bagging_reg.score(X_test,y_test)"
]
},
{
@@ -125,7 +707,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -139,7 +721,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.8"
+ "version": "3.10.9"
}
},
"nbformat": 4,