diff --git a/.ipynb_checkpoints/Lab_Customer_Analysis_Round_5_Joao-checkpoint.ipynb b/.ipynb_checkpoints/Lab_Customer_Analysis_Round_5_Joao-checkpoint.ipynb new file mode 100644 index 0000000..64f5032 --- /dev/null +++ b/.ipynb_checkpoints/Lab_Customer_Analysis_Round_5_Joao-checkpoint.ipynb @@ -0,0 +1,2250 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "f428a16d", + "metadata": {}, + "outputs": [], + "source": [ + "# Importing libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "190485ab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerStateCustomer Lifetime ValueResponseCoverageEducationEffective To DateEmploymentStatusGenderIncome...Months Since Policy InceptionNumber of Open ComplaintsNumber of PoliciesPolicy TypePolicyRenew Offer TypeSales ChannelTotal Claim AmountVehicle ClassVehicle Size
0BU79786Washington2763.519279NoBasicBachelor2/24/11EmployedF56274...501Corporate AutoCorporate L3Offer1Agent384.811147Two-Door CarMedsize
1QZ44356Arizona6979.535903NoExtendedBachelor1/31/11UnemployedF0...4208Personal AutoPersonal L3Offer3Agent1131.464935Four-Door CarMedsize
2AI49188Nevada12887.431650NoPremiumBachelor2/19/11EmployedF48767...3802Personal AutoPersonal L3Offer1Agent566.472247Two-Door CarMedsize
3WW63253California7645.861827NoBasicBachelor1/20/11UnemployedM0...6507Corporate AutoCorporate L2Offer1Call Center529.881344SUVMedsize
4HB64268Washington2813.692575NoBasicBachelor2/3/11EmployedM43836...4401Personal AutoPersonal L1Offer1Agent138.130879Four-Door CarMedsize
..................................................................
9129LA72316California23405.987980NoBasicBachelor2/10/11EmployedM71941...8902Personal AutoPersonal L1Offer2Web198.234764Four-Door CarMedsize
9130PK87824California3096.511217YesExtendedCollege2/12/11EmployedF21604...2801Corporate AutoCorporate L3Offer1Branch379.200000Four-Door CarMedsize
9131TD14365California8163.890428NoExtendedBachelor2/6/11UnemployedM0...3732Corporate AutoCorporate L2Offer1Branch790.784983Four-Door CarMedsize
9132UP19263California7524.442436NoExtendedCollege2/3/11EmployedM21941...303Personal AutoPersonal L2Offer3Branch691.200000Four-Door CarLarge
9133Y167826California2611.836866NoExtendedCollege2/14/11UnemployedM0...9001Corporate AutoCorporate L3Offer4Call Center369.600000Two-Door CarMedsize
\n", + "

9134 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " Customer State Customer Lifetime Value Response Coverage \\\n", + "0 BU79786 Washington 2763.519279 No Basic \n", + "1 QZ44356 Arizona 6979.535903 No Extended \n", + "2 AI49188 Nevada 12887.431650 No Premium \n", + "3 WW63253 California 7645.861827 No Basic \n", + "4 HB64268 Washington 2813.692575 No Basic \n", + "... ... ... ... ... ... \n", + "9129 LA72316 California 23405.987980 No Basic \n", + "9130 PK87824 California 3096.511217 Yes Extended \n", + "9131 TD14365 California 8163.890428 No Extended \n", + "9132 UP19263 California 7524.442436 No Extended \n", + "9133 Y167826 California 2611.836866 No Extended \n", + "\n", + " Education Effective To Date EmploymentStatus Gender Income ... \\\n", + "0 Bachelor 2/24/11 Employed F 56274 ... \n", + "1 Bachelor 1/31/11 Unemployed F 0 ... \n", + "2 Bachelor 2/19/11 Employed F 48767 ... \n", + "3 Bachelor 1/20/11 Unemployed M 0 ... \n", + "4 Bachelor 2/3/11 Employed M 43836 ... \n", + "... ... ... ... ... ... ... \n", + "9129 Bachelor 2/10/11 Employed M 71941 ... \n", + "9130 College 2/12/11 Employed F 21604 ... \n", + "9131 Bachelor 2/6/11 Unemployed M 0 ... \n", + "9132 College 2/3/11 Employed M 21941 ... \n", + "9133 College 2/14/11 Unemployed M 0 ... \n", + "\n", + " Months Since Policy Inception Number of Open Complaints \\\n", + "0 5 0 \n", + "1 42 0 \n", + "2 38 0 \n", + "3 65 0 \n", + "4 44 0 \n", + "... ... ... \n", + "9129 89 0 \n", + "9130 28 0 \n", + "9131 37 3 \n", + "9132 3 0 \n", + "9133 90 0 \n", + "\n", + " Number of Policies Policy Type Policy Renew Offer Type \\\n", + "0 1 Corporate Auto Corporate L3 Offer1 \n", + "1 8 Personal Auto Personal L3 Offer3 \n", + "2 2 Personal Auto Personal L3 Offer1 \n", + "3 7 Corporate Auto Corporate L2 Offer1 \n", + "4 1 Personal Auto Personal L1 Offer1 \n", + "... ... ... ... ... \n", + "9129 2 Personal Auto Personal L1 Offer2 \n", + "9130 1 Corporate Auto Corporate L3 Offer1 \n", + "9131 2 Corporate Auto Corporate L2 Offer1 \n", + "9132 3 Personal Auto Personal L2 Offer3 \n", + "9133 1 Corporate Auto Corporate L3 Offer4 \n", + "\n", + " Sales Channel Total Claim Amount Vehicle Class Vehicle Size \n", + "0 Agent 384.811147 Two-Door Car Medsize \n", + "1 Agent 1131.464935 Four-Door Car Medsize \n", + "2 Agent 566.472247 Two-Door Car Medsize \n", + "3 Call Center 529.881344 SUV Medsize \n", + "4 Agent 138.130879 Four-Door Car Medsize \n", + "... ... ... ... ... \n", + "9129 Web 198.234764 Four-Door Car Medsize \n", + "9130 Branch 379.200000 Four-Door Car Medsize \n", + "9131 Branch 790.784983 Four-Door Car Medsize \n", + "9132 Branch 691.200000 Four-Door Car Large \n", + "9133 Call Center 369.600000 Two-Door Car Medsize \n", + "\n", + "[9134 rows x 24 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Importing file\n", + "\n", + "df = pd.read_csv(r\"C:\\Users\\joaoa\\Desktop\\Ironhack\\Labs\\lab-customer-analysis-round-5\\files_for_lab\\csv_files\\marketing_customer_analysis.csv\")\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "10c11a0c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...months_since_policy_inceptionnumber_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_size
0BU79786Washington2763.519279NoBasicBachelor2/24/11EmployedF56274...501Corporate AutoCorporate L3Offer1Agent384.811147Two-Door CarMedsize
1QZ44356Arizona6979.535903NoExtendedBachelor1/31/11UnemployedF0...4208Personal AutoPersonal L3Offer3Agent1131.464935Four-Door CarMedsize
2AI49188Nevada12887.431650NoPremiumBachelor2/19/11EmployedF48767...3802Personal AutoPersonal L3Offer1Agent566.472247Two-Door CarMedsize
3WW63253California7645.861827NoBasicBachelor1/20/11UnemployedM0...6507Corporate AutoCorporate L2Offer1Call Center529.881344SUVMedsize
4HB64268Washington2813.692575NoBasicBachelor2/3/11EmployedM43836...4401Personal AutoPersonal L1Offer1Agent138.130879Four-Door CarMedsize
..................................................................
9129LA72316California23405.987980NoBasicBachelor2/10/11EmployedM71941...8902Personal AutoPersonal L1Offer2Web198.234764Four-Door CarMedsize
9130PK87824California3096.511217YesExtendedCollege2/12/11EmployedF21604...2801Corporate AutoCorporate L3Offer1Branch379.200000Four-Door CarMedsize
9131TD14365California8163.890428NoExtendedBachelor2/6/11UnemployedM0...3732Corporate AutoCorporate L2Offer1Branch790.784983Four-Door CarMedsize
9132UP19263California7524.442436NoExtendedCollege2/3/11EmployedM21941...303Personal AutoPersonal L2Offer3Branch691.200000Four-Door CarLarge
9133Y167826California2611.836866NoExtendedCollege2/14/11UnemployedM0...9001Corporate AutoCorporate L3Offer4Call Center369.600000Two-Door CarMedsize
\n", + "

9134 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage \\\n", + "0 BU79786 Washington 2763.519279 No Basic \n", + "1 QZ44356 Arizona 6979.535903 No Extended \n", + "2 AI49188 Nevada 12887.431650 No Premium \n", + "3 WW63253 California 7645.861827 No Basic \n", + "4 HB64268 Washington 2813.692575 No Basic \n", + "... ... ... ... ... ... \n", + "9129 LA72316 California 23405.987980 No Basic \n", + "9130 PK87824 California 3096.511217 Yes Extended \n", + "9131 TD14365 California 8163.890428 No Extended \n", + "9132 UP19263 California 7524.442436 No Extended \n", + "9133 Y167826 California 2611.836866 No Extended \n", + "\n", + " education effective_to_date employmentstatus gender income ... \\\n", + "0 Bachelor 2/24/11 Employed F 56274 ... \n", + "1 Bachelor 1/31/11 Unemployed F 0 ... \n", + "2 Bachelor 2/19/11 Employed F 48767 ... \n", + "3 Bachelor 1/20/11 Unemployed M 0 ... \n", + "4 Bachelor 2/3/11 Employed M 43836 ... \n", + "... ... ... ... ... ... ... \n", + "9129 Bachelor 2/10/11 Employed M 71941 ... \n", + "9130 College 2/12/11 Employed F 21604 ... \n", + "9131 Bachelor 2/6/11 Unemployed M 0 ... \n", + "9132 College 2/3/11 Employed M 21941 ... \n", + "9133 College 2/14/11 Unemployed M 0 ... \n", + "\n", + " months_since_policy_inception number_of_open_complaints \\\n", + "0 5 0 \n", + "1 42 0 \n", + "2 38 0 \n", + "3 65 0 \n", + "4 44 0 \n", + "... ... ... \n", + "9129 89 0 \n", + "9130 28 0 \n", + "9131 37 3 \n", + "9132 3 0 \n", + "9133 90 0 \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 1 Corporate Auto Corporate L3 Offer1 \n", + "1 8 Personal Auto Personal L3 Offer3 \n", + "2 2 Personal Auto Personal L3 Offer1 \n", + "3 7 Corporate Auto Corporate L2 Offer1 \n", + "4 1 Personal Auto Personal L1 Offer1 \n", + "... ... ... ... ... \n", + "9129 2 Personal Auto Personal L1 Offer2 \n", + "9130 1 Corporate Auto Corporate L3 Offer1 \n", + "9131 2 Corporate Auto Corporate L2 Offer1 \n", + "9132 3 Personal Auto Personal L2 Offer3 \n", + "9133 1 Corporate Auto Corporate L3 Offer4 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \n", + "0 Agent 384.811147 Two-Door Car Medsize \n", + "1 Agent 1131.464935 Four-Door Car Medsize \n", + "2 Agent 566.472247 Two-Door Car Medsize \n", + "3 Call Center 529.881344 SUV Medsize \n", + "4 Agent 138.130879 Four-Door Car Medsize \n", + "... ... ... ... ... \n", + "9129 Web 198.234764 Four-Door Car Medsize \n", + "9130 Branch 379.200000 Four-Door Car Medsize \n", + "9131 Branch 790.784983 Four-Door Car Medsize \n", + "9132 Branch 691.200000 Four-Door Car Large \n", + "9133 Call Center 369.600000 Two-Door Car Medsize \n", + "\n", + "[9134 rows x 24 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Standardizing columns names\n", + "\n", + "cols = []\n", + "for i in range(len(df.columns)):\n", + " cols.append(df.columns[i].lower().replace(\" \",\"_\"))\n", + "df.columns = cols\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "12c46e60", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer object\n", + "state object\n", + "customer_lifetime_value float64\n", + "response object\n", + "coverage object\n", + "education object\n", + "effective_to_date object\n", + "employmentstatus object\n", + "gender object\n", + "income int64\n", + "location_code object\n", + "marital_status object\n", + "monthly_premium_auto int64\n", + "months_since_last_claim int64\n", + "months_since_policy_inception int64\n", + "number_of_open_complaints int64\n", + "number_of_policies int64\n", + "policy_type object\n", + "policy object\n", + "renew_offer_type object\n", + "sales_channel object\n", + "total_claim_amount float64\n", + "vehicle_class object\n", + "vehicle_size object\n", + "dtype: object" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Looking for data types\n", + "\n", + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "86dd7344", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total NaN values: 0\n" + ] + }, + { + "data": { + "text/plain": [ + "customer 0\n", + "state 0\n", + "customer_lifetime_value 0\n", + "response 0\n", + "coverage 0\n", + "education 0\n", + "effective_to_date 0\n", + "employmentstatus 0\n", + "gender 0\n", + "income 0\n", + "location_code 0\n", + "marital_status 0\n", + "monthly_premium_auto 0\n", + "months_since_last_claim 0\n", + "months_since_policy_inception 0\n", + "number_of_open_complaints 0\n", + "number_of_policies 0\n", + "policy_type 0\n", + "policy 0\n", + "renew_offer_type 0\n", + "sales_channel 0\n", + "total_claim_amount 0\n", + "vehicle_class 0\n", + "vehicle_size 0\n", + "dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Checking null values\n", + "\n", + "print(\"Total NaN values:\", df.isna().sum().sum())\n", + "df.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0f18bf3c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...months_since_policy_inceptionnumber_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_size
0BU79786Washington2763.519279NoBasicBachelor2/24/11EmployedF56274...501Corporate AutoCorporate L3Offer1Agent384.811147Two-Door CarMedsize
1QZ44356Arizona6979.535903NoExtendedBachelor1/31/11UnemployedF0...4208Personal AutoPersonal L3Offer3Agent1131.464935Four-Door CarMedsize
2AI49188Nevada12887.431650NoPremiumBachelor2/19/11EmployedF48767...3802Personal AutoPersonal L3Offer1Agent566.472247Two-Door CarMedsize
3WW63253California7645.861827NoBasicBachelor1/20/11UnemployedM0...6507Corporate AutoCorporate L2Offer1Call Center529.881344SUVMedsize
4HB64268Washington2813.692575NoBasicBachelor2/3/11EmployedM43836...4401Personal AutoPersonal L1Offer1Agent138.130879Four-Door CarMedsize
..................................................................
9129LA72316California23405.987980NoBasicBachelor2/10/11EmployedM71941...8902Personal AutoPersonal L1Offer2Web198.234764Four-Door CarMedsize
9130PK87824California3096.511217YesExtendedCollege2/12/11EmployedF21604...2801Corporate AutoCorporate L3Offer1Branch379.200000Four-Door CarMedsize
9131TD14365California8163.890428NoExtendedBachelor2/6/11UnemployedM0...3732Corporate AutoCorporate L2Offer1Branch790.784983Four-Door CarMedsize
9132UP19263California7524.442436NoExtendedCollege2/3/11EmployedM21941...303Personal AutoPersonal L2Offer3Branch691.200000Four-Door CarLarge
9133Y167826California2611.836866NoExtendedCollege2/14/11UnemployedM0...9001Corporate AutoCorporate L3Offer4Call Center369.600000Two-Door CarMedsize
\n", + "

9134 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage \\\n", + "0 BU79786 Washington 2763.519279 No Basic \n", + "1 QZ44356 Arizona 6979.535903 No Extended \n", + "2 AI49188 Nevada 12887.431650 No Premium \n", + "3 WW63253 California 7645.861827 No Basic \n", + "4 HB64268 Washington 2813.692575 No Basic \n", + "... ... ... ... ... ... \n", + "9129 LA72316 California 23405.987980 No Basic \n", + "9130 PK87824 California 3096.511217 Yes Extended \n", + "9131 TD14365 California 8163.890428 No Extended \n", + "9132 UP19263 California 7524.442436 No Extended \n", + "9133 Y167826 California 2611.836866 No Extended \n", + "\n", + " education effective_to_date employmentstatus gender income ... \\\n", + "0 Bachelor 2/24/11 Employed F 56274 ... \n", + "1 Bachelor 1/31/11 Unemployed F 0 ... \n", + "2 Bachelor 2/19/11 Employed F 48767 ... \n", + "3 Bachelor 1/20/11 Unemployed M 0 ... \n", + "4 Bachelor 2/3/11 Employed M 43836 ... \n", + "... ... ... ... ... ... ... \n", + "9129 Bachelor 2/10/11 Employed M 71941 ... \n", + "9130 College 2/12/11 Employed F 21604 ... \n", + "9131 Bachelor 2/6/11 Unemployed M 0 ... \n", + "9132 College 2/3/11 Employed M 21941 ... \n", + "9133 College 2/14/11 Unemployed M 0 ... \n", + "\n", + " months_since_policy_inception number_of_open_complaints \\\n", + "0 5 0 \n", + "1 42 0 \n", + "2 38 0 \n", + "3 65 0 \n", + "4 44 0 \n", + "... ... ... \n", + "9129 89 0 \n", + "9130 28 0 \n", + "9131 37 3 \n", + "9132 3 0 \n", + "9133 90 0 \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 1 Corporate Auto Corporate L3 Offer1 \n", + "1 8 Personal Auto Personal L3 Offer3 \n", + "2 2 Personal Auto Personal L3 Offer1 \n", + "3 7 Corporate Auto Corporate L2 Offer1 \n", + "4 1 Personal Auto Personal L1 Offer1 \n", + "... ... ... ... ... \n", + "9129 2 Personal Auto Personal L1 Offer2 \n", + "9130 1 Corporate Auto Corporate L3 Offer1 \n", + "9131 2 Corporate Auto Corporate L2 Offer1 \n", + "9132 3 Personal Auto Personal L2 Offer3 \n", + "9133 1 Corporate Auto Corporate L3 Offer4 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \n", + "0 Agent 384.811147 Two-Door Car Medsize \n", + "1 Agent 1131.464935 Four-Door Car Medsize \n", + "2 Agent 566.472247 Two-Door Car Medsize \n", + "3 Call Center 529.881344 SUV Medsize \n", + "4 Agent 138.130879 Four-Door Car Medsize \n", + "... ... ... ... ... \n", + "9129 Web 198.234764 Four-Door Car Medsize \n", + "9130 Branch 379.200000 Four-Door Car Medsize \n", + "9131 Branch 790.784983 Four-Door Car Medsize \n", + "9132 Branch 691.200000 Four-Door Car Large \n", + "9133 Call Center 369.600000 Two-Door Car Medsize \n", + "\n", + "[9134 rows x 24 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Dropping duplicates if there are\n", + "\n", + "df = df.drop_duplicates()\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b3949153", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "state unique values: ['Washington' 'Arizona' 'Nevada' 'California' 'Oregon']\n", + "response unique values: ['No' 'Yes']\n", + "coverage unique values: ['Basic' 'Extended' 'Premium']\n", + "education unique values: ['Bachelor' 'College' 'Master' 'High School or Below' 'Doctor']\n", + "employmentstatus unique values: ['Employed' 'Unemployed' 'Medical Leave' 'Disabled' 'Retired']\n", + "gender unique values: ['F' 'M']\n", + "number_of_open_complaints unique values: [0 2 1 3 5 4]\n", + "number_of_policies unique values: [1 8 2 7 9 4 3 6 5]\n", + "location_code unique values: ['Suburban' 'Rural' 'Urban']\n", + "marital_status unique values: ['Married' 'Single' 'Divorced']\n", + "policy_type unique values: ['Corporate Auto' 'Personal Auto' 'Special Auto']\n", + "policy unique values: ['Corporate L3' 'Personal L3' 'Corporate L2' 'Personal L1' 'Special L2'\n", + " 'Corporate L1' 'Personal L2' 'Special L1' 'Special L3']\n", + "renew_offer_type unique values: ['Offer1' 'Offer3' 'Offer2' 'Offer4']\n", + "sales_channel unique values: ['Agent' 'Call Center' 'Web' 'Branch']\n", + "vehicle_class unique values: ['Two-Door Car' 'Four-Door Car' 'SUV' 'Luxury SUV' 'Sports Car'\n", + " 'Luxury Car']\n", + "vehicle_size unique values: ['Medsize' 'Small' 'Large']\n" + ] + } + ], + "source": [ + "# Checking unique values\n", + "\n", + "print(\"state unique values:\", df[\"state\"].unique())\n", + "print(\"response unique values:\", df[\"response\"].unique())\n", + "print(\"coverage unique values:\", df[\"coverage\"].unique())\n", + "print(\"education unique values:\", df[\"education\"].unique())\n", + "print(\"employmentstatus unique values:\", df[\"employmentstatus\"].unique())\n", + "print(\"gender unique values:\", df[\"gender\"].unique())\n", + "print(\"number_of_open_complaints unique values:\", df[\"number_of_open_complaints\"].unique())\n", + "print(\"number_of_policies unique values:\", df[\"number_of_policies\"].unique())\n", + "print(\"location_code unique values:\", df[\"location_code\"].unique())\n", + "print(\"marital_status unique values:\", df[\"marital_status\"].unique())\n", + "print(\"policy_type unique values:\", df[\"policy_type\"].unique())\n", + "print(\"policy unique values:\", df[\"policy\"].unique())\n", + "print(\"renew_offer_type unique values:\", df[\"renew_offer_type\"].unique())\n", + "print(\"sales_channel unique values:\", df[\"sales_channel\"].unique())\n", + "print(\"vehicle_class unique values:\", df[\"vehicle_class\"].unique())\n", + "print(\"vehicle_size unique values:\", df[\"vehicle_size\"].unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "56c7a443", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Min of customer_lifetime_value: 1898.007675\n", + "Max of customer_lifetime_value: 83325.38119\n", + "Min of income: 0\n", + "Max of income: 99981\n", + "Min of monthly_premium_auto: 61\n", + "Max of monthly_premium_auto: 298\n", + "Min of months_since_last_claim: 0\n", + "Max of months_since_last_claim: 35\n", + "Min of months_since_policy_inception: 0\n", + "Max of months_since_policy_inception: 99\n", + "Min of total_claim_amount: 0.099007\n", + "Max of total_claim_amount: 2893.239678\n" + ] + } + ], + "source": [ + "# Checking min values and max values\n", + "\n", + "print(\"Min of customer_lifetime_value:\", min(df[\"customer_lifetime_value\"]))\n", + "print(\"Max of customer_lifetime_value:\", max(df[\"customer_lifetime_value\"]))\n", + "print(\"Min of income:\", min(df[\"income\"]))\n", + "print(\"Max of income:\", max(df[\"income\"]))\n", + "print(\"Min of monthly_premium_auto:\", min(df[\"monthly_premium_auto\"]))\n", + "print(\"Max of monthly_premium_auto:\", max(df[\"monthly_premium_auto\"]))\n", + "print(\"Min of months_since_last_claim:\", min(df[\"months_since_last_claim\"]))\n", + "print(\"Max of months_since_last_claim:\", max(df[\"months_since_last_claim\"]))\n", + "print(\"Min of months_since_policy_inception:\", min(df[\"months_since_policy_inception\"]))\n", + "print(\"Max of months_since_policy_inception:\", max(df[\"months_since_policy_inception\"]))\n", + "print(\"Min of total_claim_amount:\", min(df[\"total_claim_amount\"]))\n", + "print(\"Max of total_claim_amount:\", max(df[\"total_claim_amount\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "584b19c7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...months_since_last_claimmonths_since_policy_inceptionnumber_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channelvehicle_classvehicle_size
0BU79786Washington2763.519279NoBasicBachelor2/24/11EmployedF56274...32501Corporate AutoCorporate L3Offer1AgentTwo-Door CarMedsize
1QZ44356Arizona6979.535903NoExtendedBachelor1/31/11UnemployedF0...134208Personal AutoPersonal L3Offer3AgentFour-Door CarMedsize
2AI49188Nevada12887.431650NoPremiumBachelor2/19/11EmployedF48767...183802Personal AutoPersonal L3Offer1AgentTwo-Door CarMedsize
3WW63253California7645.861827NoBasicBachelor1/20/11UnemployedM0...186507Corporate AutoCorporate L2Offer1Call CenterSUVMedsize
4HB64268Washington2813.692575NoBasicBachelor2/3/11EmployedM43836...124401Personal AutoPersonal L1Offer1AgentFour-Door CarMedsize
..................................................................
9129LA72316California23405.987980NoBasicBachelor2/10/11EmployedM71941...188902Personal AutoPersonal L1Offer2WebFour-Door CarMedsize
9130PK87824California3096.511217YesExtendedCollege2/12/11EmployedF21604...142801Corporate AutoCorporate L3Offer1BranchFour-Door CarMedsize
9131TD14365California8163.890428NoExtendedBachelor2/6/11UnemployedM0...93732Corporate AutoCorporate L2Offer1BranchFour-Door CarMedsize
9132UP19263California7524.442436NoExtendedCollege2/3/11EmployedM21941...34303Personal AutoPersonal L2Offer3BranchFour-Door CarLarge
9133Y167826California2611.836866NoExtendedCollege2/14/11UnemployedM0...39001Corporate AutoCorporate L3Offer4Call CenterTwo-Door CarMedsize
\n", + "

9134 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage \\\n", + "0 BU79786 Washington 2763.519279 No Basic \n", + "1 QZ44356 Arizona 6979.535903 No Extended \n", + "2 AI49188 Nevada 12887.431650 No Premium \n", + "3 WW63253 California 7645.861827 No Basic \n", + "4 HB64268 Washington 2813.692575 No Basic \n", + "... ... ... ... ... ... \n", + "9129 LA72316 California 23405.987980 No Basic \n", + "9130 PK87824 California 3096.511217 Yes Extended \n", + "9131 TD14365 California 8163.890428 No Extended \n", + "9132 UP19263 California 7524.442436 No Extended \n", + "9133 Y167826 California 2611.836866 No Extended \n", + "\n", + " education effective_to_date employmentstatus gender income ... \\\n", + "0 Bachelor 2/24/11 Employed F 56274 ... \n", + "1 Bachelor 1/31/11 Unemployed F 0 ... \n", + "2 Bachelor 2/19/11 Employed F 48767 ... \n", + "3 Bachelor 1/20/11 Unemployed M 0 ... \n", + "4 Bachelor 2/3/11 Employed M 43836 ... \n", + "... ... ... ... ... ... ... \n", + "9129 Bachelor 2/10/11 Employed M 71941 ... \n", + "9130 College 2/12/11 Employed F 21604 ... \n", + "9131 Bachelor 2/6/11 Unemployed M 0 ... \n", + "9132 College 2/3/11 Employed M 21941 ... \n", + "9133 College 2/14/11 Unemployed M 0 ... \n", + "\n", + " months_since_last_claim months_since_policy_inception \\\n", + "0 32 5 \n", + "1 13 42 \n", + "2 18 38 \n", + "3 18 65 \n", + "4 12 44 \n", + "... ... ... \n", + "9129 18 89 \n", + "9130 14 28 \n", + "9131 9 37 \n", + "9132 34 3 \n", + "9133 3 90 \n", + "\n", + " number_of_open_complaints number_of_policies policy_type \\\n", + "0 0 1 Corporate Auto \n", + "1 0 8 Personal Auto \n", + "2 0 2 Personal Auto \n", + "3 0 7 Corporate Auto \n", + "4 0 1 Personal Auto \n", + "... ... ... ... \n", + "9129 0 2 Personal Auto \n", + "9130 0 1 Corporate Auto \n", + "9131 3 2 Corporate Auto \n", + "9132 0 3 Personal Auto \n", + "9133 0 1 Corporate Auto \n", + "\n", + " policy renew_offer_type sales_channel vehicle_class vehicle_size \n", + "0 Corporate L3 Offer1 Agent Two-Door Car Medsize \n", + "1 Personal L3 Offer3 Agent Four-Door Car Medsize \n", + "2 Personal L3 Offer1 Agent Two-Door Car Medsize \n", + "3 Corporate L2 Offer1 Call Center SUV Medsize \n", + "4 Personal L1 Offer1 Agent Four-Door Car Medsize \n", + "... ... ... ... ... ... \n", + "9129 Personal L1 Offer2 Web Four-Door Car Medsize \n", + "9130 Corporate L3 Offer1 Branch Four-Door Car Medsize \n", + "9131 Corporate L2 Offer1 Branch Four-Door Car Medsize \n", + "9132 Personal L2 Offer3 Branch Four-Door Car Large \n", + "9133 Corporate L3 Offer4 Call Center Two-Door Car Medsize \n", + "\n", + "[9134 rows x 23 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Splitting X-Y\n", + "\n", + "Y = df[\"total_claim_amount\"]\n", + "X = df.drop([\"total_claim_amount\"], axis = 1)\n", + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d8f88108", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...months_since_last_claimmonths_since_policy_inceptionnumber_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channelvehicle_classvehicle_size
0BU79786Washington0.010629NoBasicBachelor2/24/11EmployedF0.562847...0.9142860.0505050.00.000Corporate AutoCorporate L3Offer1AgentTwo-Door CarMedsize
1QZ44356Arizona0.062406NoExtendedBachelor1/31/11UnemployedF0.000000...0.3714290.4242420.00.875Personal AutoPersonal L3Offer3AgentFour-Door CarMedsize
2AI49188Nevada0.134960NoPremiumBachelor2/19/11EmployedF0.487763...0.5142860.3838380.00.125Personal AutoPersonal L3Offer1AgentTwo-Door CarMedsize
3WW63253California0.070589NoBasicBachelor1/20/11UnemployedM0.000000...0.5142860.6565660.00.750Corporate AutoCorporate L2Offer1Call CenterSUVMedsize
4HB64268Washington0.011245NoBasicBachelor2/3/11EmployedM0.438443...0.3428570.4444440.00.000Personal AutoPersonal L1Offer1AgentFour-Door CarMedsize
..................................................................
9129LA72316California0.264137NoBasicBachelor2/10/11EmployedM0.719547...0.5142860.8989900.00.125Personal AutoPersonal L1Offer2WebFour-Door CarMedsize
9130PK87824California0.014719YesExtendedCollege2/12/11EmployedF0.216081...0.4000000.2828280.00.000Corporate AutoCorporate L3Offer1BranchFour-Door CarMedsize
9131TD14365California0.076951NoExtendedBachelor2/6/11UnemployedM0.000000...0.2571430.3737370.60.125Corporate AutoCorporate L2Offer1BranchFour-Door CarMedsize
9132UP19263California0.069098NoExtendedCollege2/3/11EmployedM0.219452...0.9714290.0303030.00.250Personal AutoPersonal L2Offer3BranchFour-Door CarLarge
9133Y167826California0.008766NoExtendedCollege2/14/11UnemployedM0.000000...0.0857140.9090910.00.000Corporate AutoCorporate L3Offer4Call CenterTwo-Door CarMedsize
\n", + "

9134 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage \\\n", + "0 BU79786 Washington 0.010629 No Basic \n", + "1 QZ44356 Arizona 0.062406 No Extended \n", + "2 AI49188 Nevada 0.134960 No Premium \n", + "3 WW63253 California 0.070589 No Basic \n", + "4 HB64268 Washington 0.011245 No Basic \n", + "... ... ... ... ... ... \n", + "9129 LA72316 California 0.264137 No Basic \n", + "9130 PK87824 California 0.014719 Yes Extended \n", + "9131 TD14365 California 0.076951 No Extended \n", + "9132 UP19263 California 0.069098 No Extended \n", + "9133 Y167826 California 0.008766 No Extended \n", + "\n", + " education effective_to_date employmentstatus gender income ... \\\n", + "0 Bachelor 2/24/11 Employed F 0.562847 ... \n", + "1 Bachelor 1/31/11 Unemployed F 0.000000 ... \n", + "2 Bachelor 2/19/11 Employed F 0.487763 ... \n", + "3 Bachelor 1/20/11 Unemployed M 0.000000 ... \n", + "4 Bachelor 2/3/11 Employed M 0.438443 ... \n", + "... ... ... ... ... ... ... \n", + "9129 Bachelor 2/10/11 Employed M 0.719547 ... \n", + "9130 College 2/12/11 Employed F 0.216081 ... \n", + "9131 Bachelor 2/6/11 Unemployed M 0.000000 ... \n", + "9132 College 2/3/11 Employed M 0.219452 ... \n", + "9133 College 2/14/11 Unemployed M 0.000000 ... \n", + "\n", + " months_since_last_claim months_since_policy_inception \\\n", + "0 0.914286 0.050505 \n", + "1 0.371429 0.424242 \n", + "2 0.514286 0.383838 \n", + "3 0.514286 0.656566 \n", + "4 0.342857 0.444444 \n", + "... ... ... \n", + "9129 0.514286 0.898990 \n", + "9130 0.400000 0.282828 \n", + "9131 0.257143 0.373737 \n", + "9132 0.971429 0.030303 \n", + "9133 0.085714 0.909091 \n", + "\n", + " number_of_open_complaints number_of_policies policy_type \\\n", + "0 0.0 0.000 Corporate Auto \n", + "1 0.0 0.875 Personal Auto \n", + "2 0.0 0.125 Personal Auto \n", + "3 0.0 0.750 Corporate Auto \n", + "4 0.0 0.000 Personal Auto \n", + "... ... ... ... \n", + "9129 0.0 0.125 Personal Auto \n", + "9130 0.0 0.000 Corporate Auto \n", + "9131 0.6 0.125 Corporate Auto \n", + "9132 0.0 0.250 Personal Auto \n", + "9133 0.0 0.000 Corporate Auto \n", + "\n", + " policy renew_offer_type sales_channel vehicle_class vehicle_size \n", + "0 Corporate L3 Offer1 Agent Two-Door Car Medsize \n", + "1 Personal L3 Offer3 Agent Four-Door Car Medsize \n", + "2 Personal L3 Offer1 Agent Two-Door Car Medsize \n", + "3 Corporate L2 Offer1 Call Center SUV Medsize \n", + "4 Personal L1 Offer1 Agent Four-Door Car Medsize \n", + "... ... ... ... ... ... \n", + "9129 Personal L1 Offer2 Web Four-Door Car Medsize \n", + "9130 Corporate L3 Offer1 Branch Four-Door Car Medsize \n", + "9131 Corporate L2 Offer1 Branch Four-Door Car Medsize \n", + "9132 Personal L2 Offer3 Branch Four-Door Car Large \n", + "9133 Corporate L3 Offer4 Call Center Two-Door Car Medsize \n", + "\n", + "[9134 rows x 23 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Normalizing X numerical values\n", + "\n", + "num_X = X.select_dtypes(include=np.number)\n", + "scaler = MinMaxScaler() \n", + "normalized_test = scaler.fit_transform(num_X) \n", + "normalized_test = pd.DataFrame(normalized_test, columns = num_X.columns)\n", + "normalized_test\n", + "X[normalized_test.columns] = normalized_test\n", + "X" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Lab_Customer_Analysis_Round_5_Joao.ipynb b/Lab_Customer_Analysis_Round_5_Joao.ipynb new file mode 100644 index 0000000..64f5032 --- /dev/null +++ b/Lab_Customer_Analysis_Round_5_Joao.ipynb @@ -0,0 +1,2250 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "f428a16d", + "metadata": {}, + "outputs": [], + "source": [ + "# Importing libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "190485ab", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CustomerStateCustomer Lifetime ValueResponseCoverageEducationEffective To DateEmploymentStatusGenderIncome...Months Since Policy InceptionNumber of Open ComplaintsNumber of PoliciesPolicy TypePolicyRenew Offer TypeSales ChannelTotal Claim AmountVehicle ClassVehicle Size
0BU79786Washington2763.519279NoBasicBachelor2/24/11EmployedF56274...501Corporate AutoCorporate L3Offer1Agent384.811147Two-Door CarMedsize
1QZ44356Arizona6979.535903NoExtendedBachelor1/31/11UnemployedF0...4208Personal AutoPersonal L3Offer3Agent1131.464935Four-Door CarMedsize
2AI49188Nevada12887.431650NoPremiumBachelor2/19/11EmployedF48767...3802Personal AutoPersonal L3Offer1Agent566.472247Two-Door CarMedsize
3WW63253California7645.861827NoBasicBachelor1/20/11UnemployedM0...6507Corporate AutoCorporate L2Offer1Call Center529.881344SUVMedsize
4HB64268Washington2813.692575NoBasicBachelor2/3/11EmployedM43836...4401Personal AutoPersonal L1Offer1Agent138.130879Four-Door CarMedsize
..................................................................
9129LA72316California23405.987980NoBasicBachelor2/10/11EmployedM71941...8902Personal AutoPersonal L1Offer2Web198.234764Four-Door CarMedsize
9130PK87824California3096.511217YesExtendedCollege2/12/11EmployedF21604...2801Corporate AutoCorporate L3Offer1Branch379.200000Four-Door CarMedsize
9131TD14365California8163.890428NoExtendedBachelor2/6/11UnemployedM0...3732Corporate AutoCorporate L2Offer1Branch790.784983Four-Door CarMedsize
9132UP19263California7524.442436NoExtendedCollege2/3/11EmployedM21941...303Personal AutoPersonal L2Offer3Branch691.200000Four-Door CarLarge
9133Y167826California2611.836866NoExtendedCollege2/14/11UnemployedM0...9001Corporate AutoCorporate L3Offer4Call Center369.600000Two-Door CarMedsize
\n", + "

9134 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " Customer State Customer Lifetime Value Response Coverage \\\n", + "0 BU79786 Washington 2763.519279 No Basic \n", + "1 QZ44356 Arizona 6979.535903 No Extended \n", + "2 AI49188 Nevada 12887.431650 No Premium \n", + "3 WW63253 California 7645.861827 No Basic \n", + "4 HB64268 Washington 2813.692575 No Basic \n", + "... ... ... ... ... ... \n", + "9129 LA72316 California 23405.987980 No Basic \n", + "9130 PK87824 California 3096.511217 Yes Extended \n", + "9131 TD14365 California 8163.890428 No Extended \n", + "9132 UP19263 California 7524.442436 No Extended \n", + "9133 Y167826 California 2611.836866 No Extended \n", + "\n", + " Education Effective To Date EmploymentStatus Gender Income ... \\\n", + "0 Bachelor 2/24/11 Employed F 56274 ... \n", + "1 Bachelor 1/31/11 Unemployed F 0 ... \n", + "2 Bachelor 2/19/11 Employed F 48767 ... \n", + "3 Bachelor 1/20/11 Unemployed M 0 ... \n", + "4 Bachelor 2/3/11 Employed M 43836 ... \n", + "... ... ... ... ... ... ... \n", + "9129 Bachelor 2/10/11 Employed M 71941 ... \n", + "9130 College 2/12/11 Employed F 21604 ... \n", + "9131 Bachelor 2/6/11 Unemployed M 0 ... \n", + "9132 College 2/3/11 Employed M 21941 ... \n", + "9133 College 2/14/11 Unemployed M 0 ... \n", + "\n", + " Months Since Policy Inception Number of Open Complaints \\\n", + "0 5 0 \n", + "1 42 0 \n", + "2 38 0 \n", + "3 65 0 \n", + "4 44 0 \n", + "... ... ... \n", + "9129 89 0 \n", + "9130 28 0 \n", + "9131 37 3 \n", + "9132 3 0 \n", + "9133 90 0 \n", + "\n", + " Number of Policies Policy Type Policy Renew Offer Type \\\n", + "0 1 Corporate Auto Corporate L3 Offer1 \n", + "1 8 Personal Auto Personal L3 Offer3 \n", + "2 2 Personal Auto Personal L3 Offer1 \n", + "3 7 Corporate Auto Corporate L2 Offer1 \n", + "4 1 Personal Auto Personal L1 Offer1 \n", + "... ... ... ... ... \n", + "9129 2 Personal Auto Personal L1 Offer2 \n", + "9130 1 Corporate Auto Corporate L3 Offer1 \n", + "9131 2 Corporate Auto Corporate L2 Offer1 \n", + "9132 3 Personal Auto Personal L2 Offer3 \n", + "9133 1 Corporate Auto Corporate L3 Offer4 \n", + "\n", + " Sales Channel Total Claim Amount Vehicle Class Vehicle Size \n", + "0 Agent 384.811147 Two-Door Car Medsize \n", + "1 Agent 1131.464935 Four-Door Car Medsize \n", + "2 Agent 566.472247 Two-Door Car Medsize \n", + "3 Call Center 529.881344 SUV Medsize \n", + "4 Agent 138.130879 Four-Door Car Medsize \n", + "... ... ... ... ... \n", + "9129 Web 198.234764 Four-Door Car Medsize \n", + "9130 Branch 379.200000 Four-Door Car Medsize \n", + "9131 Branch 790.784983 Four-Door Car Medsize \n", + "9132 Branch 691.200000 Four-Door Car Large \n", + "9133 Call Center 369.600000 Two-Door Car Medsize \n", + "\n", + "[9134 rows x 24 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Importing file\n", + "\n", + "df = pd.read_csv(r\"C:\\Users\\joaoa\\Desktop\\Ironhack\\Labs\\lab-customer-analysis-round-5\\files_for_lab\\csv_files\\marketing_customer_analysis.csv\")\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "10c11a0c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...months_since_policy_inceptionnumber_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_size
0BU79786Washington2763.519279NoBasicBachelor2/24/11EmployedF56274...501Corporate AutoCorporate L3Offer1Agent384.811147Two-Door CarMedsize
1QZ44356Arizona6979.535903NoExtendedBachelor1/31/11UnemployedF0...4208Personal AutoPersonal L3Offer3Agent1131.464935Four-Door CarMedsize
2AI49188Nevada12887.431650NoPremiumBachelor2/19/11EmployedF48767...3802Personal AutoPersonal L3Offer1Agent566.472247Two-Door CarMedsize
3WW63253California7645.861827NoBasicBachelor1/20/11UnemployedM0...6507Corporate AutoCorporate L2Offer1Call Center529.881344SUVMedsize
4HB64268Washington2813.692575NoBasicBachelor2/3/11EmployedM43836...4401Personal AutoPersonal L1Offer1Agent138.130879Four-Door CarMedsize
..................................................................
9129LA72316California23405.987980NoBasicBachelor2/10/11EmployedM71941...8902Personal AutoPersonal L1Offer2Web198.234764Four-Door CarMedsize
9130PK87824California3096.511217YesExtendedCollege2/12/11EmployedF21604...2801Corporate AutoCorporate L3Offer1Branch379.200000Four-Door CarMedsize
9131TD14365California8163.890428NoExtendedBachelor2/6/11UnemployedM0...3732Corporate AutoCorporate L2Offer1Branch790.784983Four-Door CarMedsize
9132UP19263California7524.442436NoExtendedCollege2/3/11EmployedM21941...303Personal AutoPersonal L2Offer3Branch691.200000Four-Door CarLarge
9133Y167826California2611.836866NoExtendedCollege2/14/11UnemployedM0...9001Corporate AutoCorporate L3Offer4Call Center369.600000Two-Door CarMedsize
\n", + "

9134 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage \\\n", + "0 BU79786 Washington 2763.519279 No Basic \n", + "1 QZ44356 Arizona 6979.535903 No Extended \n", + "2 AI49188 Nevada 12887.431650 No Premium \n", + "3 WW63253 California 7645.861827 No Basic \n", + "4 HB64268 Washington 2813.692575 No Basic \n", + "... ... ... ... ... ... \n", + "9129 LA72316 California 23405.987980 No Basic \n", + "9130 PK87824 California 3096.511217 Yes Extended \n", + "9131 TD14365 California 8163.890428 No Extended \n", + "9132 UP19263 California 7524.442436 No Extended \n", + "9133 Y167826 California 2611.836866 No Extended \n", + "\n", + " education effective_to_date employmentstatus gender income ... \\\n", + "0 Bachelor 2/24/11 Employed F 56274 ... \n", + "1 Bachelor 1/31/11 Unemployed F 0 ... \n", + "2 Bachelor 2/19/11 Employed F 48767 ... \n", + "3 Bachelor 1/20/11 Unemployed M 0 ... \n", + "4 Bachelor 2/3/11 Employed M 43836 ... \n", + "... ... ... ... ... ... ... \n", + "9129 Bachelor 2/10/11 Employed M 71941 ... \n", + "9130 College 2/12/11 Employed F 21604 ... \n", + "9131 Bachelor 2/6/11 Unemployed M 0 ... \n", + "9132 College 2/3/11 Employed M 21941 ... \n", + "9133 College 2/14/11 Unemployed M 0 ... \n", + "\n", + " months_since_policy_inception number_of_open_complaints \\\n", + "0 5 0 \n", + "1 42 0 \n", + "2 38 0 \n", + "3 65 0 \n", + "4 44 0 \n", + "... ... ... \n", + "9129 89 0 \n", + "9130 28 0 \n", + "9131 37 3 \n", + "9132 3 0 \n", + "9133 90 0 \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 1 Corporate Auto Corporate L3 Offer1 \n", + "1 8 Personal Auto Personal L3 Offer3 \n", + "2 2 Personal Auto Personal L3 Offer1 \n", + "3 7 Corporate Auto Corporate L2 Offer1 \n", + "4 1 Personal Auto Personal L1 Offer1 \n", + "... ... ... ... ... \n", + "9129 2 Personal Auto Personal L1 Offer2 \n", + "9130 1 Corporate Auto Corporate L3 Offer1 \n", + "9131 2 Corporate Auto Corporate L2 Offer1 \n", + "9132 3 Personal Auto Personal L2 Offer3 \n", + "9133 1 Corporate Auto Corporate L3 Offer4 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \n", + "0 Agent 384.811147 Two-Door Car Medsize \n", + "1 Agent 1131.464935 Four-Door Car Medsize \n", + "2 Agent 566.472247 Two-Door Car Medsize \n", + "3 Call Center 529.881344 SUV Medsize \n", + "4 Agent 138.130879 Four-Door Car Medsize \n", + "... ... ... ... ... \n", + "9129 Web 198.234764 Four-Door Car Medsize \n", + "9130 Branch 379.200000 Four-Door Car Medsize \n", + "9131 Branch 790.784983 Four-Door Car Medsize \n", + "9132 Branch 691.200000 Four-Door Car Large \n", + "9133 Call Center 369.600000 Two-Door Car Medsize \n", + "\n", + "[9134 rows x 24 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Standardizing columns names\n", + "\n", + "cols = []\n", + "for i in range(len(df.columns)):\n", + " cols.append(df.columns[i].lower().replace(\" \",\"_\"))\n", + "df.columns = cols\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "12c46e60", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer object\n", + "state object\n", + "customer_lifetime_value float64\n", + "response object\n", + "coverage object\n", + "education object\n", + "effective_to_date object\n", + "employmentstatus object\n", + "gender object\n", + "income int64\n", + "location_code object\n", + "marital_status object\n", + "monthly_premium_auto int64\n", + "months_since_last_claim int64\n", + "months_since_policy_inception int64\n", + "number_of_open_complaints int64\n", + "number_of_policies int64\n", + "policy_type object\n", + "policy object\n", + "renew_offer_type object\n", + "sales_channel object\n", + "total_claim_amount float64\n", + "vehicle_class object\n", + "vehicle_size object\n", + "dtype: object" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Looking for data types\n", + "\n", + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "86dd7344", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total NaN values: 0\n" + ] + }, + { + "data": { + "text/plain": [ + "customer 0\n", + "state 0\n", + "customer_lifetime_value 0\n", + "response 0\n", + "coverage 0\n", + "education 0\n", + "effective_to_date 0\n", + "employmentstatus 0\n", + "gender 0\n", + "income 0\n", + "location_code 0\n", + "marital_status 0\n", + "monthly_premium_auto 0\n", + "months_since_last_claim 0\n", + "months_since_policy_inception 0\n", + "number_of_open_complaints 0\n", + "number_of_policies 0\n", + "policy_type 0\n", + "policy 0\n", + "renew_offer_type 0\n", + "sales_channel 0\n", + "total_claim_amount 0\n", + "vehicle_class 0\n", + "vehicle_size 0\n", + "dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Checking null values\n", + "\n", + "print(\"Total NaN values:\", df.isna().sum().sum())\n", + "df.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "0f18bf3c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...months_since_policy_inceptionnumber_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_size
0BU79786Washington2763.519279NoBasicBachelor2/24/11EmployedF56274...501Corporate AutoCorporate L3Offer1Agent384.811147Two-Door CarMedsize
1QZ44356Arizona6979.535903NoExtendedBachelor1/31/11UnemployedF0...4208Personal AutoPersonal L3Offer3Agent1131.464935Four-Door CarMedsize
2AI49188Nevada12887.431650NoPremiumBachelor2/19/11EmployedF48767...3802Personal AutoPersonal L3Offer1Agent566.472247Two-Door CarMedsize
3WW63253California7645.861827NoBasicBachelor1/20/11UnemployedM0...6507Corporate AutoCorporate L2Offer1Call Center529.881344SUVMedsize
4HB64268Washington2813.692575NoBasicBachelor2/3/11EmployedM43836...4401Personal AutoPersonal L1Offer1Agent138.130879Four-Door CarMedsize
..................................................................
9129LA72316California23405.987980NoBasicBachelor2/10/11EmployedM71941...8902Personal AutoPersonal L1Offer2Web198.234764Four-Door CarMedsize
9130PK87824California3096.511217YesExtendedCollege2/12/11EmployedF21604...2801Corporate AutoCorporate L3Offer1Branch379.200000Four-Door CarMedsize
9131TD14365California8163.890428NoExtendedBachelor2/6/11UnemployedM0...3732Corporate AutoCorporate L2Offer1Branch790.784983Four-Door CarMedsize
9132UP19263California7524.442436NoExtendedCollege2/3/11EmployedM21941...303Personal AutoPersonal L2Offer3Branch691.200000Four-Door CarLarge
9133Y167826California2611.836866NoExtendedCollege2/14/11UnemployedM0...9001Corporate AutoCorporate L3Offer4Call Center369.600000Two-Door CarMedsize
\n", + "

9134 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage \\\n", + "0 BU79786 Washington 2763.519279 No Basic \n", + "1 QZ44356 Arizona 6979.535903 No Extended \n", + "2 AI49188 Nevada 12887.431650 No Premium \n", + "3 WW63253 California 7645.861827 No Basic \n", + "4 HB64268 Washington 2813.692575 No Basic \n", + "... ... ... ... ... ... \n", + "9129 LA72316 California 23405.987980 No Basic \n", + "9130 PK87824 California 3096.511217 Yes Extended \n", + "9131 TD14365 California 8163.890428 No Extended \n", + "9132 UP19263 California 7524.442436 No Extended \n", + "9133 Y167826 California 2611.836866 No Extended \n", + "\n", + " education effective_to_date employmentstatus gender income ... \\\n", + "0 Bachelor 2/24/11 Employed F 56274 ... \n", + "1 Bachelor 1/31/11 Unemployed F 0 ... \n", + "2 Bachelor 2/19/11 Employed F 48767 ... \n", + "3 Bachelor 1/20/11 Unemployed M 0 ... \n", + "4 Bachelor 2/3/11 Employed M 43836 ... \n", + "... ... ... ... ... ... ... \n", + "9129 Bachelor 2/10/11 Employed M 71941 ... \n", + "9130 College 2/12/11 Employed F 21604 ... \n", + "9131 Bachelor 2/6/11 Unemployed M 0 ... \n", + "9132 College 2/3/11 Employed M 21941 ... \n", + "9133 College 2/14/11 Unemployed M 0 ... \n", + "\n", + " months_since_policy_inception number_of_open_complaints \\\n", + "0 5 0 \n", + "1 42 0 \n", + "2 38 0 \n", + "3 65 0 \n", + "4 44 0 \n", + "... ... ... \n", + "9129 89 0 \n", + "9130 28 0 \n", + "9131 37 3 \n", + "9132 3 0 \n", + "9133 90 0 \n", + "\n", + " number_of_policies policy_type policy renew_offer_type \\\n", + "0 1 Corporate Auto Corporate L3 Offer1 \n", + "1 8 Personal Auto Personal L3 Offer3 \n", + "2 2 Personal Auto Personal L3 Offer1 \n", + "3 7 Corporate Auto Corporate L2 Offer1 \n", + "4 1 Personal Auto Personal L1 Offer1 \n", + "... ... ... ... ... \n", + "9129 2 Personal Auto Personal L1 Offer2 \n", + "9130 1 Corporate Auto Corporate L3 Offer1 \n", + "9131 2 Corporate Auto Corporate L2 Offer1 \n", + "9132 3 Personal Auto Personal L2 Offer3 \n", + "9133 1 Corporate Auto Corporate L3 Offer4 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \n", + "0 Agent 384.811147 Two-Door Car Medsize \n", + "1 Agent 1131.464935 Four-Door Car Medsize \n", + "2 Agent 566.472247 Two-Door Car Medsize \n", + "3 Call Center 529.881344 SUV Medsize \n", + "4 Agent 138.130879 Four-Door Car Medsize \n", + "... ... ... ... ... \n", + "9129 Web 198.234764 Four-Door Car Medsize \n", + "9130 Branch 379.200000 Four-Door Car Medsize \n", + "9131 Branch 790.784983 Four-Door Car Medsize \n", + "9132 Branch 691.200000 Four-Door Car Large \n", + "9133 Call Center 369.600000 Two-Door Car Medsize \n", + "\n", + "[9134 rows x 24 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Dropping duplicates if there are\n", + "\n", + "df = df.drop_duplicates()\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b3949153", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "state unique values: ['Washington' 'Arizona' 'Nevada' 'California' 'Oregon']\n", + "response unique values: ['No' 'Yes']\n", + "coverage unique values: ['Basic' 'Extended' 'Premium']\n", + "education unique values: ['Bachelor' 'College' 'Master' 'High School or Below' 'Doctor']\n", + "employmentstatus unique values: ['Employed' 'Unemployed' 'Medical Leave' 'Disabled' 'Retired']\n", + "gender unique values: ['F' 'M']\n", + "number_of_open_complaints unique values: [0 2 1 3 5 4]\n", + "number_of_policies unique values: [1 8 2 7 9 4 3 6 5]\n", + "location_code unique values: ['Suburban' 'Rural' 'Urban']\n", + "marital_status unique values: ['Married' 'Single' 'Divorced']\n", + "policy_type unique values: ['Corporate Auto' 'Personal Auto' 'Special Auto']\n", + "policy unique values: ['Corporate L3' 'Personal L3' 'Corporate L2' 'Personal L1' 'Special L2'\n", + " 'Corporate L1' 'Personal L2' 'Special L1' 'Special L3']\n", + "renew_offer_type unique values: ['Offer1' 'Offer3' 'Offer2' 'Offer4']\n", + "sales_channel unique values: ['Agent' 'Call Center' 'Web' 'Branch']\n", + "vehicle_class unique values: ['Two-Door Car' 'Four-Door Car' 'SUV' 'Luxury SUV' 'Sports Car'\n", + " 'Luxury Car']\n", + "vehicle_size unique values: ['Medsize' 'Small' 'Large']\n" + ] + } + ], + "source": [ + "# Checking unique values\n", + "\n", + "print(\"state unique values:\", df[\"state\"].unique())\n", + "print(\"response unique values:\", df[\"response\"].unique())\n", + "print(\"coverage unique values:\", df[\"coverage\"].unique())\n", + "print(\"education unique values:\", df[\"education\"].unique())\n", + "print(\"employmentstatus unique values:\", df[\"employmentstatus\"].unique())\n", + "print(\"gender unique values:\", df[\"gender\"].unique())\n", + "print(\"number_of_open_complaints unique values:\", df[\"number_of_open_complaints\"].unique())\n", + "print(\"number_of_policies unique values:\", df[\"number_of_policies\"].unique())\n", + "print(\"location_code unique values:\", df[\"location_code\"].unique())\n", + "print(\"marital_status unique values:\", df[\"marital_status\"].unique())\n", + "print(\"policy_type unique values:\", df[\"policy_type\"].unique())\n", + "print(\"policy unique values:\", df[\"policy\"].unique())\n", + "print(\"renew_offer_type unique values:\", df[\"renew_offer_type\"].unique())\n", + "print(\"sales_channel unique values:\", df[\"sales_channel\"].unique())\n", + "print(\"vehicle_class unique values:\", df[\"vehicle_class\"].unique())\n", + "print(\"vehicle_size unique values:\", df[\"vehicle_size\"].unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "56c7a443", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Min of customer_lifetime_value: 1898.007675\n", + "Max of customer_lifetime_value: 83325.38119\n", + "Min of income: 0\n", + "Max of income: 99981\n", + "Min of monthly_premium_auto: 61\n", + "Max of monthly_premium_auto: 298\n", + "Min of months_since_last_claim: 0\n", + "Max of months_since_last_claim: 35\n", + "Min of months_since_policy_inception: 0\n", + "Max of months_since_policy_inception: 99\n", + "Min of total_claim_amount: 0.099007\n", + "Max of total_claim_amount: 2893.239678\n" + ] + } + ], + "source": [ + "# Checking min values and max values\n", + "\n", + "print(\"Min of customer_lifetime_value:\", min(df[\"customer_lifetime_value\"]))\n", + "print(\"Max of customer_lifetime_value:\", max(df[\"customer_lifetime_value\"]))\n", + "print(\"Min of income:\", min(df[\"income\"]))\n", + "print(\"Max of income:\", max(df[\"income\"]))\n", + "print(\"Min of monthly_premium_auto:\", min(df[\"monthly_premium_auto\"]))\n", + "print(\"Max of monthly_premium_auto:\", max(df[\"monthly_premium_auto\"]))\n", + "print(\"Min of months_since_last_claim:\", min(df[\"months_since_last_claim\"]))\n", + "print(\"Max of months_since_last_claim:\", max(df[\"months_since_last_claim\"]))\n", + "print(\"Min of months_since_policy_inception:\", min(df[\"months_since_policy_inception\"]))\n", + "print(\"Max of months_since_policy_inception:\", max(df[\"months_since_policy_inception\"]))\n", + "print(\"Min of total_claim_amount:\", min(df[\"total_claim_amount\"]))\n", + "print(\"Max of total_claim_amount:\", max(df[\"total_claim_amount\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "584b19c7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...months_since_last_claimmonths_since_policy_inceptionnumber_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channelvehicle_classvehicle_size
0BU79786Washington2763.519279NoBasicBachelor2/24/11EmployedF56274...32501Corporate AutoCorporate L3Offer1AgentTwo-Door CarMedsize
1QZ44356Arizona6979.535903NoExtendedBachelor1/31/11UnemployedF0...134208Personal AutoPersonal L3Offer3AgentFour-Door CarMedsize
2AI49188Nevada12887.431650NoPremiumBachelor2/19/11EmployedF48767...183802Personal AutoPersonal L3Offer1AgentTwo-Door CarMedsize
3WW63253California7645.861827NoBasicBachelor1/20/11UnemployedM0...186507Corporate AutoCorporate L2Offer1Call CenterSUVMedsize
4HB64268Washington2813.692575NoBasicBachelor2/3/11EmployedM43836...124401Personal AutoPersonal L1Offer1AgentFour-Door CarMedsize
..................................................................
9129LA72316California23405.987980NoBasicBachelor2/10/11EmployedM71941...188902Personal AutoPersonal L1Offer2WebFour-Door CarMedsize
9130PK87824California3096.511217YesExtendedCollege2/12/11EmployedF21604...142801Corporate AutoCorporate L3Offer1BranchFour-Door CarMedsize
9131TD14365California8163.890428NoExtendedBachelor2/6/11UnemployedM0...93732Corporate AutoCorporate L2Offer1BranchFour-Door CarMedsize
9132UP19263California7524.442436NoExtendedCollege2/3/11EmployedM21941...34303Personal AutoPersonal L2Offer3BranchFour-Door CarLarge
9133Y167826California2611.836866NoExtendedCollege2/14/11UnemployedM0...39001Corporate AutoCorporate L3Offer4Call CenterTwo-Door CarMedsize
\n", + "

9134 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage \\\n", + "0 BU79786 Washington 2763.519279 No Basic \n", + "1 QZ44356 Arizona 6979.535903 No Extended \n", + "2 AI49188 Nevada 12887.431650 No Premium \n", + "3 WW63253 California 7645.861827 No Basic \n", + "4 HB64268 Washington 2813.692575 No Basic \n", + "... ... ... ... ... ... \n", + "9129 LA72316 California 23405.987980 No Basic \n", + "9130 PK87824 California 3096.511217 Yes Extended \n", + "9131 TD14365 California 8163.890428 No Extended \n", + "9132 UP19263 California 7524.442436 No Extended \n", + "9133 Y167826 California 2611.836866 No Extended \n", + "\n", + " education effective_to_date employmentstatus gender income ... \\\n", + "0 Bachelor 2/24/11 Employed F 56274 ... \n", + "1 Bachelor 1/31/11 Unemployed F 0 ... \n", + "2 Bachelor 2/19/11 Employed F 48767 ... \n", + "3 Bachelor 1/20/11 Unemployed M 0 ... \n", + "4 Bachelor 2/3/11 Employed M 43836 ... \n", + "... ... ... ... ... ... ... \n", + "9129 Bachelor 2/10/11 Employed M 71941 ... \n", + "9130 College 2/12/11 Employed F 21604 ... \n", + "9131 Bachelor 2/6/11 Unemployed M 0 ... \n", + "9132 College 2/3/11 Employed M 21941 ... \n", + "9133 College 2/14/11 Unemployed M 0 ... \n", + "\n", + " months_since_last_claim months_since_policy_inception \\\n", + "0 32 5 \n", + "1 13 42 \n", + "2 18 38 \n", + "3 18 65 \n", + "4 12 44 \n", + "... ... ... \n", + "9129 18 89 \n", + "9130 14 28 \n", + "9131 9 37 \n", + "9132 34 3 \n", + "9133 3 90 \n", + "\n", + " number_of_open_complaints number_of_policies policy_type \\\n", + "0 0 1 Corporate Auto \n", + "1 0 8 Personal Auto \n", + "2 0 2 Personal Auto \n", + "3 0 7 Corporate Auto \n", + "4 0 1 Personal Auto \n", + "... ... ... ... \n", + "9129 0 2 Personal Auto \n", + "9130 0 1 Corporate Auto \n", + "9131 3 2 Corporate Auto \n", + "9132 0 3 Personal Auto \n", + "9133 0 1 Corporate Auto \n", + "\n", + " policy renew_offer_type sales_channel vehicle_class vehicle_size \n", + "0 Corporate L3 Offer1 Agent Two-Door Car Medsize \n", + "1 Personal L3 Offer3 Agent Four-Door Car Medsize \n", + "2 Personal L3 Offer1 Agent Two-Door Car Medsize \n", + "3 Corporate L2 Offer1 Call Center SUV Medsize \n", + "4 Personal L1 Offer1 Agent Four-Door Car Medsize \n", + "... ... ... ... ... ... \n", + "9129 Personal L1 Offer2 Web Four-Door Car Medsize \n", + "9130 Corporate L3 Offer1 Branch Four-Door Car Medsize \n", + "9131 Corporate L2 Offer1 Branch Four-Door Car Medsize \n", + "9132 Personal L2 Offer3 Branch Four-Door Car Large \n", + "9133 Corporate L3 Offer4 Call Center Two-Door Car Medsize \n", + "\n", + "[9134 rows x 23 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Splitting X-Y\n", + "\n", + "Y = df[\"total_claim_amount\"]\n", + "X = df.drop([\"total_claim_amount\"], axis = 1)\n", + "X" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "d8f88108", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...months_since_last_claimmonths_since_policy_inceptionnumber_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channelvehicle_classvehicle_size
0BU79786Washington0.010629NoBasicBachelor2/24/11EmployedF0.562847...0.9142860.0505050.00.000Corporate AutoCorporate L3Offer1AgentTwo-Door CarMedsize
1QZ44356Arizona0.062406NoExtendedBachelor1/31/11UnemployedF0.000000...0.3714290.4242420.00.875Personal AutoPersonal L3Offer3AgentFour-Door CarMedsize
2AI49188Nevada0.134960NoPremiumBachelor2/19/11EmployedF0.487763...0.5142860.3838380.00.125Personal AutoPersonal L3Offer1AgentTwo-Door CarMedsize
3WW63253California0.070589NoBasicBachelor1/20/11UnemployedM0.000000...0.5142860.6565660.00.750Corporate AutoCorporate L2Offer1Call CenterSUVMedsize
4HB64268Washington0.011245NoBasicBachelor2/3/11EmployedM0.438443...0.3428570.4444440.00.000Personal AutoPersonal L1Offer1AgentFour-Door CarMedsize
..................................................................
9129LA72316California0.264137NoBasicBachelor2/10/11EmployedM0.719547...0.5142860.8989900.00.125Personal AutoPersonal L1Offer2WebFour-Door CarMedsize
9130PK87824California0.014719YesExtendedCollege2/12/11EmployedF0.216081...0.4000000.2828280.00.000Corporate AutoCorporate L3Offer1BranchFour-Door CarMedsize
9131TD14365California0.076951NoExtendedBachelor2/6/11UnemployedM0.000000...0.2571430.3737370.60.125Corporate AutoCorporate L2Offer1BranchFour-Door CarMedsize
9132UP19263California0.069098NoExtendedCollege2/3/11EmployedM0.219452...0.9714290.0303030.00.250Personal AutoPersonal L2Offer3BranchFour-Door CarLarge
9133Y167826California0.008766NoExtendedCollege2/14/11UnemployedM0.000000...0.0857140.9090910.00.000Corporate AutoCorporate L3Offer4Call CenterTwo-Door CarMedsize
\n", + "

9134 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage \\\n", + "0 BU79786 Washington 0.010629 No Basic \n", + "1 QZ44356 Arizona 0.062406 No Extended \n", + "2 AI49188 Nevada 0.134960 No Premium \n", + "3 WW63253 California 0.070589 No Basic \n", + "4 HB64268 Washington 0.011245 No Basic \n", + "... ... ... ... ... ... \n", + "9129 LA72316 California 0.264137 No Basic \n", + "9130 PK87824 California 0.014719 Yes Extended \n", + "9131 TD14365 California 0.076951 No Extended \n", + "9132 UP19263 California 0.069098 No Extended \n", + "9133 Y167826 California 0.008766 No Extended \n", + "\n", + " education effective_to_date employmentstatus gender income ... \\\n", + "0 Bachelor 2/24/11 Employed F 0.562847 ... \n", + "1 Bachelor 1/31/11 Unemployed F 0.000000 ... \n", + "2 Bachelor 2/19/11 Employed F 0.487763 ... \n", + "3 Bachelor 1/20/11 Unemployed M 0.000000 ... \n", + "4 Bachelor 2/3/11 Employed M 0.438443 ... \n", + "... ... ... ... ... ... ... \n", + "9129 Bachelor 2/10/11 Employed M 0.719547 ... \n", + "9130 College 2/12/11 Employed F 0.216081 ... \n", + "9131 Bachelor 2/6/11 Unemployed M 0.000000 ... \n", + "9132 College 2/3/11 Employed M 0.219452 ... \n", + "9133 College 2/14/11 Unemployed M 0.000000 ... \n", + "\n", + " months_since_last_claim months_since_policy_inception \\\n", + "0 0.914286 0.050505 \n", + "1 0.371429 0.424242 \n", + "2 0.514286 0.383838 \n", + "3 0.514286 0.656566 \n", + "4 0.342857 0.444444 \n", + "... ... ... \n", + "9129 0.514286 0.898990 \n", + "9130 0.400000 0.282828 \n", + "9131 0.257143 0.373737 \n", + "9132 0.971429 0.030303 \n", + "9133 0.085714 0.909091 \n", + "\n", + " number_of_open_complaints number_of_policies policy_type \\\n", + "0 0.0 0.000 Corporate Auto \n", + "1 0.0 0.875 Personal Auto \n", + "2 0.0 0.125 Personal Auto \n", + "3 0.0 0.750 Corporate Auto \n", + "4 0.0 0.000 Personal Auto \n", + "... ... ... ... \n", + "9129 0.0 0.125 Personal Auto \n", + "9130 0.0 0.000 Corporate Auto \n", + "9131 0.6 0.125 Corporate Auto \n", + "9132 0.0 0.250 Personal Auto \n", + "9133 0.0 0.000 Corporate Auto \n", + "\n", + " policy renew_offer_type sales_channel vehicle_class vehicle_size \n", + "0 Corporate L3 Offer1 Agent Two-Door Car Medsize \n", + "1 Personal L3 Offer3 Agent Four-Door Car Medsize \n", + "2 Personal L3 Offer1 Agent Two-Door Car Medsize \n", + "3 Corporate L2 Offer1 Call Center SUV Medsize \n", + "4 Personal L1 Offer1 Agent Four-Door Car Medsize \n", + "... ... ... ... ... ... \n", + "9129 Personal L1 Offer2 Web Four-Door Car Medsize \n", + "9130 Corporate L3 Offer1 Branch Four-Door Car Medsize \n", + "9131 Corporate L2 Offer1 Branch Four-Door Car Medsize \n", + "9132 Personal L2 Offer3 Branch Four-Door Car Large \n", + "9133 Corporate L3 Offer4 Call Center Two-Door Car Medsize \n", + "\n", + "[9134 rows x 23 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Normalizing X numerical values\n", + "\n", + "num_X = X.select_dtypes(include=np.number)\n", + "scaler = MinMaxScaler() \n", + "normalized_test = scaler.fit_transform(num_X) \n", + "normalized_test = pd.DataFrame(normalized_test, columns = num_X.columns)\n", + "normalized_test\n", + "X[normalized_test.columns] = normalized_test\n", + "X" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}