From 54b0c931a2eb35be2886f049a1f414633b8fc457 Mon Sep 17 00:00:00 2001
From: Leonardo Esteban Pagliacci
 <110601781+leonardo-pagliacci@users.noreply.github.com>
Date: Sun, 15 Oct 2023 15:22:52 +0100
Subject: [PATCH] lab 5 solved

---
 ..._analysis_round5] leonardo_pagliacci.ipynb | 784 ++++++++++++++++++
 1 file changed, 784 insertions(+)
 create mode 100644 [lab_customer_analysis_round5] leonardo_pagliacci.ipynb
diff --git a/[lab_customer_analysis_round5] leonardo_pagliacci.ipynb b/[lab_customer_analysis_round5] leonardo_pagliacci.ipynb
new file mode 100644
index 0000000..ae9b5f8
--- /dev/null
+++ b/[lab_customer_analysis_round5] leonardo_pagliacci.ipynb	
@@ -0,0 +1,784 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "bc9713bb",
+   "metadata": {},
+   "source": [
+    "# Lab | Customer Analysis Round 5\n",
+    "\n",
+    "For this lab, we still keep using the `marketing_customer_analysis.csv` file that you can find in the `files_for_lab` folder.\n",
+    "\n",
+    "### Get the data\n",
+    "\n",
+    "We are using the `marketing_customer_analysis.csv` file.\n",
+    "\n",
+    "### Dealing with the data\n",
+    "\n",
+    "Already done in the round 2.\n",
+    "\n",
+    "### Explore the data\n",
+    "\n",
+    "Done in the round 3.\n",
+    "\n",
+    "### Processing Data\n",
+    "\n",
+    "(_Further processing..._)\n",
+    "\n",
+    "- X-y split.\n",
+    "- Normalize (numerical).\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "332f3c58",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Importing the libraries\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline\n",
+    "\n",
+    "import seaborn as sns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "0112f86c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>customer</th>\n",
+       "      <th>state</th>\n",
+       "      <th>customer_lifetime_value</th>\n",
+       "      <th>response</th>\n",
+       "      <th>coverage</th>\n",
+       "      <th>education</th>\n",
+       "      <th>effective_to_date</th>\n",
+       "      <th>employmentstatus</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>income</th>\n",
+       "      <th>...</th>\n",
+       "      <th>months_since_policy_inception</th>\n",
+       "      <th>number_of_open_complaints</th>\n",
+       "      <th>number_of_policies</th>\n",
+       "      <th>policy_type</th>\n",
+       "      <th>policy</th>\n",
+       "      <th>renew_offer_type</th>\n",
+       "      <th>sales_channel</th>\n",
+       "      <th>total_claim_amount</th>\n",
+       "      <th>vehicle_class</th>\n",
+       "      <th>vehicle_size</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>DK49336</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>4809.216960</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>College</td>\n",
+       "      <td>2/18/11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>48029</td>\n",
+       "      <td>...</td>\n",
+       "      <td>52</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>9</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>Corporate L3</td>\n",
+       "      <td>Offer3</td>\n",
+       "      <td>Agent</td>\n",
+       "      <td>292.800000</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>KX64629</td>\n",
+       "      <td>California</td>\n",
+       "      <td>2228.525238</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>College</td>\n",
+       "      <td>1/18/11</td>\n",
+       "      <td>Unemployed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>26</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L3</td>\n",
+       "      <td>Offer4</td>\n",
+       "      <td>Call Center</td>\n",
+       "      <td>744.924331</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>LZ68649</td>\n",
+       "      <td>Washington</td>\n",
+       "      <td>14947.917300</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>2/10/11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>22139</td>\n",
+       "      <td>...</td>\n",
+       "      <td>31</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L3</td>\n",
+       "      <td>Offer3</td>\n",
+       "      <td>Call Center</td>\n",
+       "      <td>480.000000</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>Medsize</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>XL78013</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>22332.439460</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Extended</td>\n",
+       "      <td>College</td>\n",
+       "      <td>1/11/11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>49078</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>Corporate L3</td>\n",
+       "      <td>Offer2</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>484.013411</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>QA50777</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>9025.067525</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Premium</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>1/17/11</td>\n",
+       "      <td>Medical Leave</td>\n",
+       "      <td>F</td>\n",
+       "      <td>23675</td>\n",
+       "      <td>...</td>\n",
+       "      <td>31</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>7</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L2</td>\n",
+       "      <td>Offer1</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>707.925645</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10905</th>\n",
+       "      <td>FE99816</td>\n",
+       "      <td>Nevada</td>\n",
+       "      <td>15563.369440</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Premium</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>1/19/11</td>\n",
+       "      <td>Unemployed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>40</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>7</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L1</td>\n",
+       "      <td>Offer3</td>\n",
+       "      <td>Web</td>\n",
+       "      <td>1214.400000</td>\n",
+       "      <td>Luxury Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10906</th>\n",
+       "      <td>KX53892</td>\n",
+       "      <td>Oregon</td>\n",
+       "      <td>5259.444853</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>College</td>\n",
+       "      <td>1/6/11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>61146</td>\n",
+       "      <td>...</td>\n",
+       "      <td>68</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L3</td>\n",
+       "      <td>Offer2</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>273.018929</td>\n",
+       "      <td>Four-Door Car</td>\n",
+       "      <td>Medsize</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10907</th>\n",
+       "      <td>TL39050</td>\n",
+       "      <td>Arizona</td>\n",
+       "      <td>23893.304100</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Extended</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>2/6/11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>39837</td>\n",
+       "      <td>...</td>\n",
+       "      <td>63</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Corporate Auto</td>\n",
+       "      <td>Corporate L3</td>\n",
+       "      <td>Offer1</td>\n",
+       "      <td>Web</td>\n",
+       "      <td>381.306996</td>\n",
+       "      <td>Luxury SUV</td>\n",
+       "      <td>Medsize</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10908</th>\n",
+       "      <td>WA60547</td>\n",
+       "      <td>California</td>\n",
+       "      <td>11971.977650</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Premium</td>\n",
+       "      <td>College</td>\n",
+       "      <td>2/13/11</td>\n",
+       "      <td>Employed</td>\n",
+       "      <td>F</td>\n",
+       "      <td>64195</td>\n",
+       "      <td>...</td>\n",
+       "      <td>27</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L1</td>\n",
+       "      <td>Offer1</td>\n",
+       "      <td>Branch</td>\n",
+       "      <td>618.288849</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>Medsize</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10909</th>\n",
+       "      <td>IV32877</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>6857.519928</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Basic</td>\n",
+       "      <td>Bachelor</td>\n",
+       "      <td>1/8/11</td>\n",
+       "      <td>Unemployed</td>\n",
+       "      <td>M</td>\n",
+       "      <td>0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Personal Auto</td>\n",
+       "      <td>Personal L1</td>\n",
+       "      <td>Offer4</td>\n",
+       "      <td>Web</td>\n",
+       "      <td>1021.719397</td>\n",
+       "      <td>SUV</td>\n",
+       "      <td>Medsize</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>10910 rows × 24 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      customer       state  customer_lifetime_value response  coverage  \\\n",
+       "0      DK49336     Arizona              4809.216960       No     Basic   \n",
+       "1      KX64629  California              2228.525238       No     Basic   \n",
+       "2      LZ68649  Washington             14947.917300       No     Basic   \n",
+       "3      XL78013      Oregon             22332.439460      Yes  Extended   \n",
+       "4      QA50777      Oregon              9025.067525       No   Premium   \n",
+       "...        ...         ...                      ...      ...       ...   \n",
+       "10905  FE99816      Nevada             15563.369440       No   Premium   \n",
+       "10906  KX53892      Oregon              5259.444853       No     Basic   \n",
+       "10907  TL39050     Arizona             23893.304100       No  Extended   \n",
+       "10908  WA60547  California             11971.977650       No   Premium   \n",
+       "10909  IV32877         NaN              6857.519928      NaN     Basic   \n",
+       "\n",
+       "      education effective_to_date employmentstatus gender  income  ...  \\\n",
+       "0       College           2/18/11         Employed      M   48029  ...   \n",
+       "1       College           1/18/11       Unemployed      F       0  ...   \n",
+       "2      Bachelor           2/10/11         Employed      M   22139  ...   \n",
+       "3       College           1/11/11         Employed      M   49078  ...   \n",
+       "4      Bachelor           1/17/11    Medical Leave      F   23675  ...   \n",
+       "...         ...               ...              ...    ...     ...  ...   \n",
+       "10905  Bachelor           1/19/11       Unemployed      F       0  ...   \n",
+       "10906   College            1/6/11         Employed      F   61146  ...   \n",
+       "10907  Bachelor            2/6/11         Employed      F   39837  ...   \n",
+       "10908   College           2/13/11         Employed      F   64195  ...   \n",
+       "10909  Bachelor            1/8/11       Unemployed      M       0  ...   \n",
+       "\n",
+       "      months_since_policy_inception number_of_open_complaints  \\\n",
+       "0                                52                       0.0   \n",
+       "1                                26                       0.0   \n",
+       "2                                31                       0.0   \n",
+       "3                                 3                       0.0   \n",
+       "4                                31                       NaN   \n",
+       "...                             ...                       ...   \n",
+       "10905                            40                       NaN   \n",
+       "10906                            68                       0.0   \n",
+       "10907                            63                       0.0   \n",
+       "10908                            27                       4.0   \n",
+       "10909                             1                       0.0   \n",
+       "\n",
+       "       number_of_policies     policy_type        policy  renew_offer_type  \\\n",
+       "0                       9  Corporate Auto  Corporate L3            Offer3   \n",
+       "1                       1   Personal Auto   Personal L3            Offer4   \n",
+       "2                       2   Personal Auto   Personal L3            Offer3   \n",
+       "3                       2  Corporate Auto  Corporate L3            Offer2   \n",
+       "4                       7   Personal Auto   Personal L2            Offer1   \n",
+       "...                   ...             ...           ...               ...   \n",
+       "10905                   7   Personal Auto   Personal L1            Offer3   \n",
+       "10906                   6   Personal Auto   Personal L3            Offer2   \n",
+       "10907                   2  Corporate Auto  Corporate L3            Offer1   \n",
+       "10908                   6   Personal Auto   Personal L1            Offer1   \n",
+       "10909                   3   Personal Auto   Personal L1            Offer4   \n",
+       "\n",
+       "       sales_channel total_claim_amount  vehicle_class vehicle_size  \n",
+       "0              Agent         292.800000  Four-Door Car      Medsize  \n",
+       "1        Call Center         744.924331  Four-Door Car      Medsize  \n",
+       "2        Call Center         480.000000            SUV      Medsize  \n",
+       "3             Branch         484.013411  Four-Door Car      Medsize  \n",
+       "4             Branch         707.925645  Four-Door Car      Medsize  \n",
+       "...              ...                ...            ...          ...  \n",
+       "10905            Web        1214.400000     Luxury Car      Medsize  \n",
+       "10906         Branch         273.018929  Four-Door Car      Medsize  \n",
+       "10907            Web         381.306996     Luxury SUV      Medsize  \n",
+       "10908         Branch         618.288849            SUV      Medsize  \n",
+       "10909            Web        1021.719397            SUV      Medsize  \n",
+       "\n",
+       "[10910 rows x 24 columns]"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Importing the dataframe\n",
+    "\n",
+    "customer_df = pd.read_csv(\"/Users/leozinho.air/Desktop/ironhack_da/class_04/lab-customer-analysis-round-2/files_for_lab/csv_files/marketing_customer_analysis.csv\")\n",
+    "\n",
+    "# Cleaning operations\n",
+    "\n",
+    "columns = []\n",
+    "\n",
+    "for i in range(len(customer_df.columns)):\n",
+    "    columns.append(customer_df.columns[i].lower().replace(' ', '_'))\n",
+    "\n",
+    "customer_df.columns = columns\n",
+    "\n",
+    "customer_df\n",
+    "\n",
+    "# Dropping the column 'unnamed:_0'\n",
+    "\n",
+    "customer_df = customer_df.drop(['unnamed:_0'], axis = 1)\n",
+    "\n",
+    "# The only values are Nan or vehicle class 'A', let's create a new df without the column.\n",
+    "\n",
+    "customer_df = customer_df.drop(['vehicle_type'], axis = 1)\n",
+    "\n",
+    "customer_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "958cdaf7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This is for predictions\n",
+    "from sklearn import linear_model\n",
+    "from sklearn.metrics import mean_squared_error, r2_score\n",
+    "\n",
+    "# These Libs are for stats -> this ones are for description\n",
+    "import statsmodels.api as sm\n",
+    "from statsmodels.formula.api import ols\n",
+    "\n",
+    "# Creating a dataframe with only numerical values\n",
+    "\n",
+    "num_cust = customer_df.select_dtypes(include = np.number)\n",
+    "\n",
+    "# Checking for null values\n",
+    "\n",
+    "num_cust.isnull().any() # Two columns have nan values\n",
+    "\n",
+    "# I chose to fill the nan valus with the means of the two columns\n",
+    "\n",
+    "mean_months = num_cust['months_since_last_claim'].mean()\n",
+    "\n",
+    "num_cust['months_since_last_claim'].fillna(value = mean_months, inplace = True)\n",
+    "\n",
+    "\n",
+    "mean_complaints = num_cust['number_of_open_complaints'].mean()\n",
+    "num_cust['number_of_open_complaints'].fillna(value = mean_complaints, inplace = True)\n",
+    "\n",
+    "num_cust.isnull().sum().sum() # There are no nan values\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "95c51d22",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>customer_lifetime_value</th>\n",
+       "      <th>income</th>\n",
+       "      <th>monthly_premium_auto</th>\n",
+       "      <th>months_since_last_claim</th>\n",
+       "      <th>months_since_policy_inception</th>\n",
+       "      <th>number_of_open_complaints</th>\n",
+       "      <th>number_of_policies</th>\n",
+       "      <th>total_claim_amount</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.035752</td>\n",
+       "      <td>0.480381</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.200000</td>\n",
+       "      <td>0.525253</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>1.000</td>\n",
+       "      <td>0.101171</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.004059</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.012658</td>\n",
+       "      <td>0.085714</td>\n",
+       "      <td>0.262626</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000</td>\n",
+       "      <td>0.257445</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.160264</td>\n",
+       "      <td>0.221432</td>\n",
+       "      <td>0.164557</td>\n",
+       "      <td>0.971429</td>\n",
+       "      <td>0.313131</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.125</td>\n",
+       "      <td>0.165875</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.250953</td>\n",
+       "      <td>0.490873</td>\n",
+       "      <td>0.151899</td>\n",
+       "      <td>0.285714</td>\n",
+       "      <td>0.030303</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.125</td>\n",
+       "      <td>0.167263</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.087527</td>\n",
+       "      <td>0.236795</td>\n",
+       "      <td>0.236287</td>\n",
+       "      <td>0.432831</td>\n",
+       "      <td>0.313131</td>\n",
+       "      <td>0.076851</td>\n",
+       "      <td>0.750</td>\n",
+       "      <td>0.244657</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10905</th>\n",
+       "      <td>0.167823</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.810127</td>\n",
+       "      <td>0.432831</td>\n",
+       "      <td>0.404040</td>\n",
+       "      <td>0.076851</td>\n",
+       "      <td>0.750</td>\n",
+       "      <td>0.419717</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10906</th>\n",
+       "      <td>0.041281</td>\n",
+       "      <td>0.611576</td>\n",
+       "      <td>0.016878</td>\n",
+       "      <td>0.200000</td>\n",
+       "      <td>0.686869</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.625</td>\n",
+       "      <td>0.094333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10907</th>\n",
+       "      <td>0.270122</td>\n",
+       "      <td>0.398446</td>\n",
+       "      <td>0.590717</td>\n",
+       "      <td>0.314286</td>\n",
+       "      <td>0.636364</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.125</td>\n",
+       "      <td>0.131763</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10908</th>\n",
+       "      <td>0.123717</td>\n",
+       "      <td>0.642072</td>\n",
+       "      <td>0.409283</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.272727</td>\n",
+       "      <td>0.800000</td>\n",
+       "      <td>0.625</td>\n",
+       "      <td>0.213674</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10909</th>\n",
+       "      <td>0.060907</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.168776</td>\n",
+       "      <td>0.885714</td>\n",
+       "      <td>0.010101</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.250</td>\n",
+       "      <td>0.353118</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>10910 rows × 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       customer_lifetime_value    income  monthly_premium_auto  \\\n",
+       "0                     0.035752  0.480381              0.000000   \n",
+       "1                     0.004059  0.000000              0.012658   \n",
+       "2                     0.160264  0.221432              0.164557   \n",
+       "3                     0.250953  0.490873              0.151899   \n",
+       "4                     0.087527  0.236795              0.236287   \n",
+       "...                        ...       ...                   ...   \n",
+       "10905                 0.167823  0.000000              0.810127   \n",
+       "10906                 0.041281  0.611576              0.016878   \n",
+       "10907                 0.270122  0.398446              0.590717   \n",
+       "10908                 0.123717  0.642072              0.409283   \n",
+       "10909                 0.060907  0.000000              0.168776   \n",
+       "\n",
+       "       months_since_last_claim  months_since_policy_inception  \\\n",
+       "0                     0.200000                       0.525253   \n",
+       "1                     0.085714                       0.262626   \n",
+       "2                     0.971429                       0.313131   \n",
+       "3                     0.285714                       0.030303   \n",
+       "4                     0.432831                       0.313131   \n",
+       "...                        ...                            ...   \n",
+       "10905                 0.432831                       0.404040   \n",
+       "10906                 0.200000                       0.686869   \n",
+       "10907                 0.314286                       0.636364   \n",
+       "10908                 0.000000                       0.272727   \n",
+       "10909                 0.885714                       0.010101   \n",
+       "\n",
+       "       number_of_open_complaints  number_of_policies  total_claim_amount  \n",
+       "0                       0.000000               1.000            0.101171  \n",
+       "1                       0.000000               0.000            0.257445  \n",
+       "2                       0.000000               0.125            0.165875  \n",
+       "3                       0.000000               0.125            0.167263  \n",
+       "4                       0.076851               0.750            0.244657  \n",
+       "...                          ...                 ...                 ...  \n",
+       "10905                   0.076851               0.750            0.419717  \n",
+       "10906                   0.000000               0.625            0.094333  \n",
+       "10907                   0.000000               0.125            0.131763  \n",
+       "10908                   0.800000               0.625            0.213674  \n",
+       "10909                   0.000000               0.250            0.353118  \n",
+       "\n",
+       "[10910 rows x 8 columns]"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# X-y split.\n",
+    "\n",
+    "Y = customer_df['total_claim_amount']\n",
+    "X = customer_df.drop(['total_claim_amount'], axis = 1)\n",
+    "\n",
+    "# Normalizing the DataFrame\n",
+    "\n",
+    "from sklearn.preprocessing import MinMaxScaler\n",
+    "\n",
+    "scaler = MinMaxScaler() # This is the normalization process\n",
+    "\n",
+    "normalized_data = scaler.fit_transform(num_cust) # Fit it to the data\n",
+    "normalized_data = pd.DataFrame(normalized_data, columns = num_cust.columns)\n",
+    "\n",
+    "normalized_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0ccdae9d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

	customer	state	customer_lifetime_value	response	coverage	education	effective_to_date	employmentstatus	gender	income	...	months_since_policy_inception	number_of_open_complaints	number_of_policies	policy_type	policy	renew_offer_type	sales_channel	total_claim_amount	vehicle_class	vehicle_size
0	DK49336	Arizona	4809.216960	No	Basic	College	2/18/11	Employed	M	48029	...	52	0.0	9	Corporate Auto	Corporate L3	Offer3	Agent	292.800000	Four-Door Car	Medsize
1	KX64629	California	2228.525238	No	Basic	College	1/18/11	Unemployed	F	0	...	26	0.0	1	Personal Auto	Personal L3	Offer4	Call Center	744.924331	Four-Door Car	Medsize
2	LZ68649	Washington	14947.917300	No	Basic	Bachelor	2/10/11	Employed	M	22139	...	31	0.0	2	Personal Auto	Personal L3	Offer3	Call Center	480.000000	SUV	Medsize
3	XL78013	Oregon	22332.439460	Yes	Extended	College	1/11/11	Employed	M	49078	...	3	0.0	2	Corporate Auto	Corporate L3	Offer2	Branch	484.013411	Four-Door Car	Medsize
4	QA50777	Oregon	9025.067525	No	Premium	Bachelor	1/17/11	Medical Leave	F	23675	...	31	NaN	7	Personal Auto	Personal L2	Offer1	Branch	707.925645	Four-Door Car	Medsize
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
10905	FE99816	Nevada	15563.369440	No	Premium	Bachelor	1/19/11	Unemployed	F	0	...	40	NaN	7	Personal Auto	Personal L1	Offer3	Web	1214.400000	Luxury Car	Medsize
10906	KX53892	Oregon	5259.444853	No	Basic	College	1/6/11	Employed	F	61146	...	68	0.0	6	Personal Auto	Personal L3	Offer2	Branch	273.018929	Four-Door Car	Medsize
10907	TL39050	Arizona	23893.304100	No	Extended	Bachelor	2/6/11	Employed	F	39837	...	63	0.0	2	Corporate Auto	Corporate L3	Offer1	Web	381.306996	Luxury SUV	Medsize
10908	WA60547	California	11971.977650	No	Premium	College	2/13/11	Employed	F	64195	...	27	4.0	6	Personal Auto	Personal L1	Offer1	Branch	618.288849	SUV	Medsize
10909	IV32877	NaN	6857.519928	NaN	Basic	Bachelor	1/8/11	Unemployed	M	0	...	1	0.0	3	Personal Auto	Personal L1	Offer4	Web	1021.719397	SUV	Medsize
	customer_lifetime_value	income	monthly_premium_auto	months_since_last_claim	months_since_policy_inception	number_of_open_complaints	number_of_policies	total_claim_amount
0	0.035752	0.480381	0.000000	0.200000	0.525253	0.000000	1.000	0.101171
1	0.004059	0.000000	0.012658	0.085714	0.262626	0.000000	0.000	0.257445
2	0.160264	0.221432	0.164557	0.971429	0.313131	0.000000	0.125	0.165875
3	0.250953	0.490873	0.151899	0.285714	0.030303	0.000000	0.125	0.167263
4	0.087527	0.236795	0.236287	0.432831	0.313131	0.076851	0.750	0.244657
...	...	...	...	...	...	...	...	...
10905	0.167823	0.000000	0.810127	0.432831	0.404040	0.076851	0.750	0.419717
10906	0.041281	0.611576	0.016878	0.200000	0.686869	0.000000	0.625	0.094333
10907	0.270122	0.398446	0.590717	0.314286	0.636364	0.000000	0.125	0.131763
10908	0.123717	0.642072	0.409283	0.000000	0.272727	0.800000	0.625	0.213674
10909	0.060907	0.000000	0.168776	0.885714	0.010101	0.000000	0.250	0.353118