From a2c09c4ecea060bc396c69616612bbdf9fc37636 Mon Sep 17 00:00:00 2001 From: Giuliana Miranda Date: Sat, 14 Oct 2023 16:57:57 +0100 Subject: [PATCH] Lab done --- ...b-customer-analysis-round-5]Giuliana.ipynb | 931 ++++++++++++++++++ 1 file changed, 931 insertions(+) create mode 100644 [lab-customer-analysis-round-5]Giuliana.ipynb diff --git a/ [lab-customer-analysis-round-5]Giuliana.ipynb b/ [lab-customer-analysis-round-5]Giuliana.ipynb new file mode 100644 index 0000000..8f5a9bc --- /dev/null +++ b/ [lab-customer-analysis-round-5]Giuliana.ipynb @@ -0,0 +1,931 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "a43b4e39", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "import seaborn as sns\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e2c0597e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgender...number_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizevehicle_type
00DK49336Arizona4809.216960NoBasicCollege2011-02-18EmployedM...0.09Corporate AutoCorporate L3Offer3Agent292.800000Four-Door CarMedsizeNaN
11KX64629California2228.525238NoBasicCollege2011-01-18UnemployedF...0.01Personal AutoPersonal L3Offer4Call Center744.924331Four-Door CarMedsizeNaN
22LZ68649Washington14947.917300NoBasicBachelor2011-02-10EmployedM...0.02Personal AutoPersonal L3Offer3Call Center480.000000SUVMedsizeA
33XL78013Oregon22332.439460YesExtendedCollege2011-01-11EmployedM...0.02Corporate AutoCorporate L3Offer2Branch484.013411Four-Door CarMedsizeA
44QA50777Oregon9025.067525NoPremiumBachelor2011-01-17Medical LeaveF...NaN7Personal AutoPersonal L2Offer1Branch707.925645Four-Door CarMedsizeNaN
..................................................................
1090510905FE99816Nevada15563.369440NoPremiumBachelor2011-01-19UnemployedF...NaN7Personal AutoPersonal L1Offer3Web1214.400000Luxury CarMedsizeA
1090610906KX53892Oregon5259.444853NoBasicCollege2011-01-06EmployedF...0.06Personal AutoPersonal L3Offer2Branch273.018929Four-Door CarMedsizeA
1090710907TL39050Arizona23893.304100NoExtendedBachelor2011-02-06EmployedF...0.02Corporate AutoCorporate L3Offer1Web381.306996Luxury SUVMedsizeNaN
1090810908WA60547California11971.977650NoPremiumCollege2011-02-13EmployedF...4.06Personal AutoPersonal L1Offer1Branch618.288849SUVMedsizeA
1090910909IV32877NaN6857.519928NaNBasicBachelor2011-01-08UnemployedM...0.03Personal AutoPersonal L1Offer4Web1021.719397SUVMedsizeNaN
\n", + "

10910 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer state customer_lifetime_value response \\\n", + "0 0 DK49336 Arizona 4809.216960 No \n", + "1 1 KX64629 California 2228.525238 No \n", + "2 2 LZ68649 Washington 14947.917300 No \n", + "3 3 XL78013 Oregon 22332.439460 Yes \n", + "4 4 QA50777 Oregon 9025.067525 No \n", + "... ... ... ... ... ... \n", + "10905 10905 FE99816 Nevada 15563.369440 No \n", + "10906 10906 KX53892 Oregon 5259.444853 No \n", + "10907 10907 TL39050 Arizona 23893.304100 No \n", + "10908 10908 WA60547 California 11971.977650 No \n", + "10909 10909 IV32877 NaN 6857.519928 NaN \n", + "\n", + " coverage education effective_to_date employmentstatus gender ... \\\n", + "0 Basic College 2011-02-18 Employed M ... \n", + "1 Basic College 2011-01-18 Unemployed F ... \n", + "2 Basic Bachelor 2011-02-10 Employed M ... \n", + "3 Extended College 2011-01-11 Employed M ... \n", + "4 Premium Bachelor 2011-01-17 Medical Leave F ... \n", + "... ... ... ... ... ... ... \n", + "10905 Premium Bachelor 2011-01-19 Unemployed F ... \n", + "10906 Basic College 2011-01-06 Employed F ... \n", + "10907 Extended Bachelor 2011-02-06 Employed F ... \n", + "10908 Premium College 2011-02-13 Employed F ... \n", + "10909 Basic Bachelor 2011-01-08 Unemployed M ... \n", + "\n", + " number_of_open_complaints number_of_policies policy_type \\\n", + "0 0.0 9 Corporate Auto \n", + "1 0.0 1 Personal Auto \n", + "2 0.0 2 Personal Auto \n", + "3 0.0 2 Corporate Auto \n", + "4 NaN 7 Personal Auto \n", + "... ... ... ... \n", + "10905 NaN 7 Personal Auto \n", + "10906 0.0 6 Personal Auto \n", + "10907 0.0 2 Corporate Auto \n", + "10908 4.0 6 Personal Auto \n", + "10909 0.0 3 Personal Auto \n", + "\n", + " policy renew_offer_type sales_channel total_claim_amount \\\n", + "0 Corporate L3 Offer3 Agent 292.800000 \n", + "1 Personal L3 Offer4 Call Center 744.924331 \n", + "2 Personal L3 Offer3 Call Center 480.000000 \n", + "3 Corporate L3 Offer2 Branch 484.013411 \n", + "4 Personal L2 Offer1 Branch 707.925645 \n", + "... ... ... ... ... \n", + "10905 Personal L1 Offer3 Web 1214.400000 \n", + "10906 Personal L3 Offer2 Branch 273.018929 \n", + "10907 Corporate L3 Offer1 Web 381.306996 \n", + "10908 Personal L1 Offer1 Branch 618.288849 \n", + "10909 Personal L1 Offer4 Web 1021.719397 \n", + "\n", + " vehicle_class vehicle_size vehicle_type \n", + "0 Four-Door Car Medsize NaN \n", + "1 Four-Door Car Medsize NaN \n", + "2 SUV Medsize A \n", + "3 Four-Door Car Medsize A \n", + "4 Four-Door Car Medsize NaN \n", + "... ... ... ... \n", + "10905 Luxury Car Medsize A \n", + "10906 Four-Door Car Medsize A \n", + "10907 Luxury SUV Medsize NaN \n", + "10908 SUV Medsize A \n", + "10909 SUV Medsize NaN \n", + "\n", + "[10910 rows x 26 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_custumers = pd.read_excel(\"/Users/giulianamiranda/Documents/Labs/lab-customer-analysis-round-2/files_for_lab/excel_files/marketing_customer_analysis.xlsx\")\n", + "\n", + "\n", + "cols = []\n", + "\n", + "for a in range(len(df_custumers.columns)):\n", + " cols.append(df_custumers.columns[a].lower().replace(' ', '_'))\n", + " \n", + "df_custumers.columns = cols\n", + "\n", + "df_custumers" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f05702aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customer_lifetime_valueincomemonthly_premium_automonths_since_last_claimmonths_since_policy_inceptionnumber_of_open_complaintsnumber_of_policiestotal_claim_amount
004809.21696048029617.0520.09292.800000
112228.5252380643.0260.01744.924331
2214947.9173002213910034.0310.02480.000000
3322332.439460490789710.030.02484.013411
449025.06752523675117NaN31NaN7707.925645
..............................
109051090515563.3694400253NaN40NaN71214.400000
10906109065259.44485361146657.0680.06273.018929
109071090723893.3041003983720111.0630.02381.306996
109081090811971.977650641951580.0274.06618.288849
10909109096857.519928010131.010.031021.719397
\n", + "

10910 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer_lifetime_value income monthly_premium_auto \\\n", + "0 0 4809.216960 48029 61 \n", + "1 1 2228.525238 0 64 \n", + "2 2 14947.917300 22139 100 \n", + "3 3 22332.439460 49078 97 \n", + "4 4 9025.067525 23675 117 \n", + "... ... ... ... ... \n", + "10905 10905 15563.369440 0 253 \n", + "10906 10906 5259.444853 61146 65 \n", + "10907 10907 23893.304100 39837 201 \n", + "10908 10908 11971.977650 64195 158 \n", + "10909 10909 6857.519928 0 101 \n", + "\n", + " months_since_last_claim months_since_policy_inception \\\n", + "0 7.0 52 \n", + "1 3.0 26 \n", + "2 34.0 31 \n", + "3 10.0 3 \n", + "4 NaN 31 \n", + "... ... ... \n", + "10905 NaN 40 \n", + "10906 7.0 68 \n", + "10907 11.0 63 \n", + "10908 0.0 27 \n", + "10909 31.0 1 \n", + "\n", + " number_of_open_complaints number_of_policies total_claim_amount \n", + "0 0.0 9 292.800000 \n", + "1 0.0 1 744.924331 \n", + "2 0.0 2 480.000000 \n", + "3 0.0 2 484.013411 \n", + "4 NaN 7 707.925645 \n", + "... ... ... ... \n", + "10905 NaN 7 1214.400000 \n", + "10906 0.0 6 273.018929 \n", + "10907 0.0 2 381.306996 \n", + "10908 4.0 6 618.288849 \n", + "10909 0.0 3 1021.719397 \n", + "\n", + "[10910 rows x 9 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "numerical = df_custumers.select_dtypes(include=[np.number])\n", + "numerical\n", + "\n", + "numerical" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "77737dd5", + "metadata": {}, + "outputs": [], + "source": [ + "# X-y split.\n", + "\n", + "Y = df_custumers['total_claim_amount']\n", + "X = df_custumers.drop(['total_claim_amount'], axis=1) " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "2e126245", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unnamed:_0customer_lifetime_valueincomemonthly_premium_automonths_since_last_claimmonths_since_policy_inceptionnumber_of_open_complaintsnumber_of_policies
00.0000000.0357520.4803810.0000000.2000000.5252530.01.000
10.0000920.0040590.0000000.0126580.0857140.2626260.00.000
20.0001830.1602640.2214320.1645570.9714290.3131310.00.125
30.0002750.2509530.4908730.1518990.2857140.0303030.00.125
40.0004580.0349660.5055860.0000000.0571430.7373740.00.750
...........................
102720.9995420.2128330.0000000.5316460.4000000.3232320.00.125
102730.9997250.0412810.6115760.0168780.2000000.6868690.00.625
102740.9998170.2701220.3984460.5907170.3142860.6363640.00.125
102750.9999080.1237170.6420720.4092830.0000000.2727270.80.625
102761.0000000.0609070.0000000.1687760.8857140.0101010.00.250
\n", + "

10277 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " unnamed:_0 customer_lifetime_value income monthly_premium_auto \\\n", + "0 0.000000 0.035752 0.480381 0.000000 \n", + "1 0.000092 0.004059 0.000000 0.012658 \n", + "2 0.000183 0.160264 0.221432 0.164557 \n", + "3 0.000275 0.250953 0.490873 0.151899 \n", + "4 0.000458 0.034966 0.505586 0.000000 \n", + "... ... ... ... ... \n", + "10272 0.999542 0.212833 0.000000 0.531646 \n", + "10273 0.999725 0.041281 0.611576 0.016878 \n", + "10274 0.999817 0.270122 0.398446 0.590717 \n", + "10275 0.999908 0.123717 0.642072 0.409283 \n", + "10276 1.000000 0.060907 0.000000 0.168776 \n", + "\n", + " months_since_last_claim months_since_policy_inception \\\n", + "0 0.200000 0.525253 \n", + "1 0.085714 0.262626 \n", + "2 0.971429 0.313131 \n", + "3 0.285714 0.030303 \n", + "4 0.057143 0.737374 \n", + "... ... ... \n", + "10272 0.400000 0.323232 \n", + "10273 0.200000 0.686869 \n", + "10274 0.314286 0.636364 \n", + "10275 0.000000 0.272727 \n", + "10276 0.885714 0.010101 \n", + "\n", + " number_of_open_complaints number_of_policies \n", + "0 0.0 1.000 \n", + "1 0.0 0.000 \n", + "2 0.0 0.125 \n", + "3 0.0 0.125 \n", + "4 0.0 0.750 \n", + "... ... ... \n", + "10272 0.0 0.125 \n", + "10273 0.0 0.625 \n", + "10274 0.0 0.125 \n", + "10275 0.8 0.625 \n", + "10276 0.0 0.250 \n", + "\n", + "[10277 rows x 8 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Normalize (numerical)\n", + "\n", + "numerical.isnull().sum()\n", + "numerical = numerical.dropna()\n", + "\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "\n", + "num_data = numerical.drop(['total_claim_amount'], axis = 1)\n", + "scaler = MinMaxScaler() \n", + "normalized_df = scaler.fit_transform(num_data) \n", + "normalized_df = pd.DataFrame(normalized_df, columns = num_data.columns)\n", + "normalized_df\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51e4161b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}