diff --git a/[lab-customer-analysis-round-5] Mickael Jossier.ipynb b/[lab-customer-analysis-round-5] Mickael Jossier.ipynb
new file mode 100644
index 0000000..43909f8
--- /dev/null
+++ b/[lab-customer-analysis-round-5] Mickael Jossier.ipynb
@@ -0,0 +1,1281 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer | \n",
+ " state | \n",
+ " customer_lifetime_value | \n",
+ " response | \n",
+ " coverage | \n",
+ " education | \n",
+ " effective_to_date | \n",
+ " employmentstatus | \n",
+ " gender | \n",
+ " income | \n",
+ " ... | \n",
+ " number_of_policies | \n",
+ " policy_type | \n",
+ " policy | \n",
+ " renew_offer_type | \n",
+ " sales_channel | \n",
+ " total_claim_amount | \n",
+ " vehicle_class | \n",
+ " vehicle_size | \n",
+ " date | \n",
+ " month | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 87 | \n",
+ " QC35222 | \n",
+ " California | \n",
+ " 2683.470677 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 1/1/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 48269 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Corporate Auto | \n",
+ " Corporate L2 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 282.151207 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " 2011-01-01 | \n",
+ " Jan | \n",
+ "
\n",
+ " \n",
+ " | 271 | \n",
+ " AE98193 | \n",
+ " Washington | \n",
+ " 7859.414569 | \n",
+ " No | \n",
+ " Basic | \n",
+ " High School or Below | \n",
+ " 1/1/11 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 813.600000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " 2011-01-01 | \n",
+ " Jan | \n",
+ "
\n",
+ " \n",
+ " | 307 | \n",
+ " TM23514 | \n",
+ " Oregon | \n",
+ " 10272.608200 | \n",
+ " No | \n",
+ " Extended | \n",
+ " College | \n",
+ " 1/1/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 60145 | \n",
+ " ... | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Web | \n",
+ " 580.473259 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " 2011-01-01 | \n",
+ " Jan | \n",
+ "
\n",
+ " \n",
+ " | 355 | \n",
+ " WB38524 | \n",
+ " California | \n",
+ " 2969.593296 | \n",
+ " No | \n",
+ " Basic | \n",
+ " High School or Below | \n",
+ " 1/1/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 46131 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Branch | \n",
+ " 355.200000 | \n",
+ " Two-Door Car | \n",
+ " Small | \n",
+ " 2011-01-01 | \n",
+ " Jan | \n",
+ "
\n",
+ " \n",
+ " | 479 | \n",
+ " QZ42725 | \n",
+ " Washington | \n",
+ " 2310.882998 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 1/1/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer2 | \n",
+ " Agent | \n",
+ " 460.800000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " 2011-01-01 | \n",
+ " Jan | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 8914 | \n",
+ " OH83983 | \n",
+ " California | \n",
+ " 3528.044252 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " Doctor | \n",
+ " 2/1/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 31278 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 441.600000 | \n",
+ " Two-Door Car | \n",
+ " Medsize | \n",
+ " 2011-02-01 | \n",
+ " Feb | \n",
+ "
\n",
+ " \n",
+ " | 8938 | \n",
+ " CV70978 | \n",
+ " California | \n",
+ " 7756.437921 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2/1/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 97592 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer4 | \n",
+ " Agent | \n",
+ " 312.000000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " 2011-02-01 | \n",
+ " Feb | \n",
+ "
\n",
+ " \n",
+ " | 8995 | \n",
+ " LX30483 | \n",
+ " California | \n",
+ " 3371.528475 | \n",
+ " No | \n",
+ " Basic | \n",
+ " College | \n",
+ " 2/1/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Agent | \n",
+ " 489.600000 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ " 2011-02-01 | \n",
+ " Feb | \n",
+ "
\n",
+ " \n",
+ " | 9075 | \n",
+ " IX18485 | \n",
+ " California | \n",
+ " 9594.248898 | \n",
+ " No | \n",
+ " Extended | \n",
+ " High School or Below | \n",
+ " 2/1/11 | \n",
+ " Retired | \n",
+ " F | \n",
+ " 27443 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Agent | \n",
+ " 685.048914 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ " 2011-02-01 | \n",
+ " Feb | \n",
+ "
\n",
+ " \n",
+ " | 9122 | \n",
+ " FH43628 | \n",
+ " California | \n",
+ " 25464.820590 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2/1/11 | \n",
+ " Retired | \n",
+ " F | \n",
+ " 13663 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 465.600000 | \n",
+ " Four-Door Car | \n",
+ " Small | \n",
+ " 2011-02-01 | \n",
+ " Feb | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
302 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer state customer_lifetime_value response coverage \\\n",
+ "87 QC35222 California 2683.470677 No Basic \n",
+ "271 AE98193 Washington 7859.414569 No Basic \n",
+ "307 TM23514 Oregon 10272.608200 No Extended \n",
+ "355 WB38524 California 2969.593296 No Basic \n",
+ "479 QZ42725 Washington 2310.882998 No Basic \n",
+ "... ... ... ... ... ... \n",
+ "8914 OH83983 California 3528.044252 Yes Extended \n",
+ "8938 CV70978 California 7756.437921 No Basic \n",
+ "8995 LX30483 California 3371.528475 No Basic \n",
+ "9075 IX18485 California 9594.248898 No Extended \n",
+ "9122 FH43628 California 25464.820590 Yes Extended \n",
+ "\n",
+ " education effective_to_date employmentstatus gender income \\\n",
+ "87 Bachelor 1/1/11 Employed F 48269 \n",
+ "271 High School or Below 1/1/11 Unemployed M 0 \n",
+ "307 College 1/1/11 Employed M 60145 \n",
+ "355 High School or Below 1/1/11 Employed M 46131 \n",
+ "479 Bachelor 1/1/11 Unemployed F 0 \n",
+ "... ... ... ... ... ... \n",
+ "8914 Doctor 2/1/11 Employed M 31278 \n",
+ "8938 Bachelor 2/1/11 Employed M 97592 \n",
+ "8995 College 2/1/11 Unemployed F 0 \n",
+ "9075 High School or Below 2/1/11 Retired F 27443 \n",
+ "9122 College 2/1/11 Retired F 13663 \n",
+ "\n",
+ " ... number_of_policies policy_type policy renew_offer_type \\\n",
+ "87 ... 1 Corporate Auto Corporate L2 Offer3 \n",
+ "271 ... 7 Personal Auto Personal L1 Offer1 \n",
+ "307 ... 3 Personal Auto Personal L3 Offer3 \n",
+ "355 ... 1 Personal Auto Personal L3 Offer2 \n",
+ "479 ... 1 Personal Auto Personal L3 Offer2 \n",
+ "... ... ... ... ... ... \n",
+ "8914 ... 1 Personal Auto Personal L2 Offer1 \n",
+ "8938 ... 2 Personal Auto Personal L2 Offer4 \n",
+ "8995 ... 1 Personal Auto Personal L2 Offer1 \n",
+ "9075 ... 2 Personal Auto Personal L2 Offer1 \n",
+ "9122 ... 2 Personal Auto Personal L2 Offer1 \n",
+ "\n",
+ " sales_channel total_claim_amount vehicle_class vehicle_size \\\n",
+ "87 Web 282.151207 Four-Door Car Medsize \n",
+ "271 Branch 813.600000 SUV Medsize \n",
+ "307 Web 580.473259 SUV Medsize \n",
+ "355 Branch 355.200000 Two-Door Car Small \n",
+ "479 Agent 460.800000 Four-Door Car Medsize \n",
+ "... ... ... ... ... \n",
+ "8914 Branch 441.600000 Two-Door Car Medsize \n",
+ "8938 Agent 312.000000 Four-Door Car Medsize \n",
+ "8995 Agent 489.600000 SUV Medsize \n",
+ "9075 Agent 685.048914 Four-Door Car Medsize \n",
+ "9122 Branch 465.600000 Four-Door Car Small \n",
+ "\n",
+ " date month \n",
+ "87 2011-01-01 Jan \n",
+ "271 2011-01-01 Jan \n",
+ "307 2011-01-01 Jan \n",
+ "355 2011-01-01 Jan \n",
+ "479 2011-01-01 Jan \n",
+ "... ... ... \n",
+ "8914 2011-02-01 Feb \n",
+ "8938 2011-02-01 Feb \n",
+ "8995 2011-02-01 Feb \n",
+ "9075 2011-02-01 Feb \n",
+ "9122 2011-02-01 Feb \n",
+ "\n",
+ "[302 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "import pandas as pd\n",
+ "\n",
+ "\n",
+ "mark_cust_A = pd.read_excel(\"marketing_customer_analysis.xlsx\")\n",
+ "# fonction processing all the steps to make the data clean for our file\n",
+ "def Data_quarter_Y(mark_cust_A):\n",
+ " # creation of a list to store columns name after lower and removing spaces\n",
+ " cols_st = []\n",
+ " # \n",
+ " for i in range(len(mark_cust_A.columns)):\n",
+ " cols_st.append(mark_cust_A.columns[i].lower().replace(\" \",\"_\"))\n",
+ " mark_cust_A.columns = cols_st\n",
+ " \n",
+ " mark_cust_A = mark_cust_A.fillna({'response':'No','number_of_open_complaints': 0,'vehicle_type':'Other vehicule'})\n",
+ " mark_cust_A['date'] = pd.to_datetime(mark_cust_A['effective_to_date'])\n",
+ " mark_cust_A['month'] = mark_cust_A['date'].dt.to_period('M')\n",
+ " \n",
+ " condition1 = (mark_cust_A['date'] == '2011-01')\n",
+ " condition2 = (mark_cust_A['date'] == '2011-02')\n",
+ " condition3 = (mark_cust_A['date'] == '2011-03')\n",
+ " \n",
+ " data_for_jan = mark_cust_A[condition1].copy()\n",
+ " data_for_jan['month'] = 'Jan'\n",
+ "\n",
+ " data_for_fev = mark_cust_A[condition2].copy()\n",
+ " data_for_fev['month'] = 'Feb'\n",
+ "\n",
+ " data_for_march = mark_cust_A[condition3].copy()\n",
+ " data_for_march['month'] = 'March'\n",
+ " \n",
+ " return pd.concat([data_for_jan, data_for_fev, data_for_march], axis=0) \n",
+ "\n",
+ "mark_cust_A = Data_quarter_Y(mark_cust_A)\n",
+ "\n",
+ "mark_cust_A"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_lifetime_value | \n",
+ " income | \n",
+ " monthly_premium_auto | \n",
+ " months_since_last_claim | \n",
+ " months_since_policy_inception | \n",
+ " number_of_open_complaints | \n",
+ " number_of_policies | \n",
+ " total_claim_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 87 | \n",
+ " 2683.470677 | \n",
+ " 48269 | \n",
+ " 69 | \n",
+ " 3 | \n",
+ " 79 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 282.151207 | \n",
+ "
\n",
+ " \n",
+ " | 271 | \n",
+ " 7859.414569 | \n",
+ " 0 | \n",
+ " 113 | \n",
+ " 19 | \n",
+ " 10 | \n",
+ " 0 | \n",
+ " 7 | \n",
+ " 813.600000 | \n",
+ "
\n",
+ " \n",
+ " | 307 | \n",
+ " 10272.608200 | \n",
+ " 60145 | \n",
+ " 132 | \n",
+ " 8 | \n",
+ " 28 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " 580.473259 | \n",
+ "
\n",
+ " \n",
+ " | 355 | \n",
+ " 2969.593296 | \n",
+ " 46131 | \n",
+ " 74 | \n",
+ " 27 | \n",
+ " 28 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 355.200000 | \n",
+ "
\n",
+ " \n",
+ " | 479 | \n",
+ " 2310.882998 | \n",
+ " 0 | \n",
+ " 64 | \n",
+ " 12 | \n",
+ " 24 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 460.800000 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 8914 | \n",
+ " 3528.044252 | \n",
+ " 31278 | \n",
+ " 92 | \n",
+ " 24 | \n",
+ " 95 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ " 441.600000 | \n",
+ "
\n",
+ " \n",
+ " | 8938 | \n",
+ " 7756.437921 | \n",
+ " 97592 | \n",
+ " 65 | \n",
+ " 10 | \n",
+ " 7 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 312.000000 | \n",
+ "
\n",
+ " \n",
+ " | 8995 | \n",
+ " 3371.528475 | \n",
+ " 0 | \n",
+ " 102 | \n",
+ " 2 | \n",
+ " 36 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 489.600000 | \n",
+ "
\n",
+ " \n",
+ " | 9075 | \n",
+ " 9594.248898 | \n",
+ " 27443 | \n",
+ " 86 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 685.048914 | \n",
+ "
\n",
+ " \n",
+ " | 9122 | \n",
+ " 25464.820590 | \n",
+ " 13663 | \n",
+ " 97 | \n",
+ " 1 | \n",
+ " 66 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 465.600000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
302 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_lifetime_value income monthly_premium_auto \\\n",
+ "87 2683.470677 48269 69 \n",
+ "271 7859.414569 0 113 \n",
+ "307 10272.608200 60145 132 \n",
+ "355 2969.593296 46131 74 \n",
+ "479 2310.882998 0 64 \n",
+ "... ... ... ... \n",
+ "8914 3528.044252 31278 92 \n",
+ "8938 7756.437921 97592 65 \n",
+ "8995 3371.528475 0 102 \n",
+ "9075 9594.248898 27443 86 \n",
+ "9122 25464.820590 13663 97 \n",
+ "\n",
+ " months_since_last_claim months_since_policy_inception \\\n",
+ "87 3 79 \n",
+ "271 19 10 \n",
+ "307 8 28 \n",
+ "355 27 28 \n",
+ "479 12 24 \n",
+ "... ... ... \n",
+ "8914 24 95 \n",
+ "8938 10 7 \n",
+ "8995 2 36 \n",
+ "9075 3 1 \n",
+ "9122 1 66 \n",
+ "\n",
+ " number_of_open_complaints number_of_policies total_claim_amount \n",
+ "87 3 1 282.151207 \n",
+ "271 0 7 813.600000 \n",
+ "307 0 3 580.473259 \n",
+ "355 0 1 355.200000 \n",
+ "479 0 1 460.800000 \n",
+ "... ... ... ... \n",
+ "8914 4 1 441.600000 \n",
+ "8938 0 2 312.000000 \n",
+ "8995 3 1 489.600000 \n",
+ "9075 2 2 685.048914 \n",
+ "9122 0 2 465.600000 \n",
+ "\n",
+ "[302 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "Numerical = mark_cust_A.select_dtypes(include='number')\n",
+ "Numerical"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "OLS Regression Results\n",
+ "\n",
+ " | Dep. Variable: | total_claim_amount | R-squared: | 0.543 | \n",
+ "
\n",
+ "\n",
+ " | Model: | OLS | Adj. R-squared: | 0.532 | \n",
+ "
\n",
+ "\n",
+ " | Method: | Least Squares | F-statistic: | 49.91 | \n",
+ "
\n",
+ "\n",
+ " | Date: | Mon, 16 Oct 2023 | Prob (F-statistic): | 1.79e-46 | \n",
+ "
\n",
+ "\n",
+ " | Time: | 21:07:21 | Log-Likelihood: | -2052.0 | \n",
+ "
\n",
+ "\n",
+ " | No. Observations: | 302 | AIC: | 4120. | \n",
+ "
\n",
+ "\n",
+ " | Df Residuals: | 294 | BIC: | 4150. | \n",
+ "
\n",
+ "\n",
+ " | Df Model: | 7 | | | \n",
+ "
\n",
+ "\n",
+ " | Covariance Type: | nonrobust | | | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
+ "
\n",
+ "\n",
+ " | const | 101.6399 | 51.578 | 1.971 | 0.050 | 0.130 | 203.150 | \n",
+ "
\n",
+ "\n",
+ " | customer_lifetime_value | -0.0003 | 0.002 | -0.163 | 0.870 | -0.004 | 0.003 | \n",
+ "
\n",
+ "\n",
+ " | income | -0.0030 | 0.000 | -7.032 | 0.000 | -0.004 | -0.002 | \n",
+ "
\n",
+ "\n",
+ " | monthly_premium_auto | 5.5780 | 0.377 | 14.777 | 0.000 | 4.835 | 6.321 | \n",
+ "
\n",
+ "\n",
+ " | months_since_last_claim | -0.3473 | 1.309 | -0.265 | 0.791 | -2.923 | 2.228 | \n",
+ "
\n",
+ "\n",
+ " | months_since_policy_inception | -0.7589 | 0.444 | -1.709 | 0.088 | -1.633 | 0.115 | \n",
+ "
\n",
+ "\n",
+ " | number_of_open_complaints | -11.8580 | 13.786 | -0.860 | 0.390 | -38.989 | 15.273 | \n",
+ "
\n",
+ "\n",
+ " | number_of_policies | -4.9817 | 5.348 | -0.931 | 0.352 | -15.508 | 5.544 | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " | Omnibus: | 48.909 | Durbin-Watson: | 2.174 | \n",
+ "
\n",
+ "\n",
+ " | Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 339.334 | \n",
+ "
\n",
+ "\n",
+ " | Skew: | 0.377 | Prob(JB): | 2.06e-74 | \n",
+ "
\n",
+ "\n",
+ " | Kurtosis: | 8.138 | Cond. No. | 2.06e+05 | \n",
+ "
\n",
+ "
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.06e+05. This might indicate that there are
strong multicollinearity or other numerical problems."
+ ],
+ "text/latex": [
+ "\\begin{center}\n",
+ "\\begin{tabular}{lclc}\n",
+ "\\toprule\n",
+ "\\textbf{Dep. Variable:} & total\\_claim\\_amount & \\textbf{ R-squared: } & 0.543 \\\\\n",
+ "\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared: } & 0.532 \\\\\n",
+ "\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 49.91 \\\\\n",
+ "\\textbf{Date:} & Mon, 16 Oct 2023 & \\textbf{ Prob (F-statistic):} & 1.79e-46 \\\\\n",
+ "\\textbf{Time:} & 21:07:21 & \\textbf{ Log-Likelihood: } & -2052.0 \\\\\n",
+ "\\textbf{No. Observations:} & 302 & \\textbf{ AIC: } & 4120. \\\\\n",
+ "\\textbf{Df Residuals:} & 294 & \\textbf{ BIC: } & 4150. \\\\\n",
+ "\\textbf{Df Model:} & 7 & \\textbf{ } & \\\\\n",
+ "\\textbf{Covariance Type:} & nonrobust & \\textbf{ } & \\\\\n",
+ "\\bottomrule\n",
+ "\\end{tabular}\n",
+ "\\begin{tabular}{lcccccc}\n",
+ " & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n",
+ "\\midrule\n",
+ "\\textbf{const} & 101.6399 & 51.578 & 1.971 & 0.050 & 0.130 & 203.150 \\\\\n",
+ "\\textbf{customer\\_lifetime\\_value} & -0.0003 & 0.002 & -0.163 & 0.870 & -0.004 & 0.003 \\\\\n",
+ "\\textbf{income} & -0.0030 & 0.000 & -7.032 & 0.000 & -0.004 & -0.002 \\\\\n",
+ "\\textbf{monthly\\_premium\\_auto} & 5.5780 & 0.377 & 14.777 & 0.000 & 4.835 & 6.321 \\\\\n",
+ "\\textbf{months\\_since\\_last\\_claim} & -0.3473 & 1.309 & -0.265 & 0.791 & -2.923 & 2.228 \\\\\n",
+ "\\textbf{months\\_since\\_policy\\_inception} & -0.7589 & 0.444 & -1.709 & 0.088 & -1.633 & 0.115 \\\\\n",
+ "\\textbf{number\\_of\\_open\\_complaints} & -11.8580 & 13.786 & -0.860 & 0.390 & -38.989 & 15.273 \\\\\n",
+ "\\textbf{number\\_of\\_policies} & -4.9817 & 5.348 & -0.931 & 0.352 & -15.508 & 5.544 \\\\\n",
+ "\\bottomrule\n",
+ "\\end{tabular}\n",
+ "\\begin{tabular}{lclc}\n",
+ "\\textbf{Omnibus:} & 48.909 & \\textbf{ Durbin-Watson: } & 2.174 \\\\\n",
+ "\\textbf{Prob(Omnibus):} & 0.000 & \\textbf{ Jarque-Bera (JB): } & 339.334 \\\\\n",
+ "\\textbf{Skew:} & 0.377 & \\textbf{ Prob(JB): } & 2.06e-74 \\\\\n",
+ "\\textbf{Kurtosis:} & 8.138 & \\textbf{ Cond. No. } & 2.06e+05 \\\\\n",
+ "\\bottomrule\n",
+ "\\end{tabular}\n",
+ "%\\caption{OLS Regression Results}\n",
+ "\\end{center}\n",
+ "\n",
+ "Notes: \\newline\n",
+ " [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. \\newline\n",
+ " [2] The condition number is large, 2.06e+05. This might indicate that there are \\newline\n",
+ " strong multicollinearity or other numerical problems."
+ ],
+ "text/plain": [
+ "\n",
+ "\"\"\"\n",
+ " OLS Regression Results \n",
+ "==============================================================================\n",
+ "Dep. Variable: total_claim_amount R-squared: 0.543\n",
+ "Model: OLS Adj. R-squared: 0.532\n",
+ "Method: Least Squares F-statistic: 49.91\n",
+ "Date: Mon, 16 Oct 2023 Prob (F-statistic): 1.79e-46\n",
+ "Time: 21:07:21 Log-Likelihood: -2052.0\n",
+ "No. Observations: 302 AIC: 4120.\n",
+ "Df Residuals: 294 BIC: 4150.\n",
+ "Df Model: 7 \n",
+ "Covariance Type: nonrobust \n",
+ "=================================================================================================\n",
+ " coef std err t P>|t| [0.025 0.975]\n",
+ "-------------------------------------------------------------------------------------------------\n",
+ "const 101.6399 51.578 1.971 0.050 0.130 203.150\n",
+ "customer_lifetime_value -0.0003 0.002 -0.163 0.870 -0.004 0.003\n",
+ "income -0.0030 0.000 -7.032 0.000 -0.004 -0.002\n",
+ "monthly_premium_auto 5.5780 0.377 14.777 0.000 4.835 6.321\n",
+ "months_since_last_claim -0.3473 1.309 -0.265 0.791 -2.923 2.228\n",
+ "months_since_policy_inception -0.7589 0.444 -1.709 0.088 -1.633 0.115\n",
+ "number_of_open_complaints -11.8580 13.786 -0.860 0.390 -38.989 15.273\n",
+ "number_of_policies -4.9817 5.348 -0.931 0.352 -15.508 5.544\n",
+ "==============================================================================\n",
+ "Omnibus: 48.909 Durbin-Watson: 2.174\n",
+ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 339.334\n",
+ "Skew: 0.377 Prob(JB): 2.06e-74\n",
+ "Kurtosis: 8.138 Cond. No. 2.06e+05\n",
+ "==============================================================================\n",
+ "\n",
+ "Notes:\n",
+ "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
+ "[2] The condition number is large, 2.06e+05. This might indicate that there are\n",
+ "strong multicollinearity or other numerical problems.\n",
+ "\"\"\""
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn import linear_model\n",
+ "from sklearn.metrics import mean_squared_error,r2_score\n",
+ "\n",
+ "import statsmodels.api as sm\n",
+ "from statsmodels.formula.api import ols\n",
+ "\n",
+ "Y = Numerical['total_claim_amount']\n",
+ "X = Numerical.drop(['total_claim_amount'], axis = 1)\n",
+ "# the mod el = Y = inter + coef. * X\n",
+ "\n",
+ "X = sm.add_constant(X)\n",
+ "\n",
+ "# ordinary least square\n",
+ "\n",
+ "model = sm.OLS(Y,X).fit()\n",
+ "model.summary()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "OLS Regression Results\n",
+ "\n",
+ " | Dep. Variable: | total_claim_amount | R-squared: | 0.543 | \n",
+ "
\n",
+ "\n",
+ " | Model: | OLS | Adj. R-squared: | 0.532 | \n",
+ "
\n",
+ "\n",
+ " | Method: | Least Squares | F-statistic: | 49.91 | \n",
+ "
\n",
+ "\n",
+ " | Date: | Mon, 16 Oct 2023 | Prob (F-statistic): | 1.79e-46 | \n",
+ "
\n",
+ "\n",
+ " | Time: | 21:16:04 | Log-Likelihood: | -2052.0 | \n",
+ "
\n",
+ "\n",
+ " | No. Observations: | 302 | AIC: | 4120. | \n",
+ "
\n",
+ "\n",
+ " | Df Residuals: | 294 | BIC: | 4150. | \n",
+ "
\n",
+ "\n",
+ " | Df Model: | 7 | | | \n",
+ "
\n",
+ "\n",
+ " | Covariance Type: | nonrobust | | | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
+ "
\n",
+ "\n",
+ " | const | 440.7154 | 12.605 | 34.963 | 0.000 | 415.907 | 465.523 | \n",
+ "
\n",
+ "\n",
+ " | x1 | -2.3298 | 14.262 | -0.163 | 0.870 | -30.399 | 25.740 | \n",
+ "
\n",
+ "\n",
+ " | x2 | -90.4006 | 12.856 | -7.032 | 0.000 | -115.701 | -65.100 | \n",
+ "
\n",
+ "\n",
+ " | x3 | 210.0406 | 14.214 | 14.777 | 0.000 | 182.067 | 238.014 | \n",
+ "
\n",
+ "\n",
+ " | x4 | -3.3836 | 12.752 | -0.265 | 0.791 | -28.480 | 21.713 | \n",
+ "
\n",
+ "\n",
+ " | x5 | -22.0200 | 12.883 | -1.709 | 0.088 | -47.374 | 3.334 | \n",
+ "
\n",
+ "\n",
+ " | x6 | -11.2644 | 13.096 | -0.860 | 0.390 | -37.038 | 14.509 | \n",
+ "
\n",
+ "\n",
+ " | x7 | -11.8760 | 12.750 | -0.931 | 0.352 | -36.969 | 13.217 | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " | Omnibus: | 48.909 | Durbin-Watson: | 2.174 | \n",
+ "
\n",
+ "\n",
+ " | Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 339.334 | \n",
+ "
\n",
+ "\n",
+ " | Skew: | 0.377 | Prob(JB): | 2.06e-74 | \n",
+ "
\n",
+ "\n",
+ " | Kurtosis: | 8.138 | Cond. No. | 1.70 | \n",
+ "
\n",
+ "
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
+ ],
+ "text/latex": [
+ "\\begin{center}\n",
+ "\\begin{tabular}{lclc}\n",
+ "\\toprule\n",
+ "\\textbf{Dep. Variable:} & total\\_claim\\_amount & \\textbf{ R-squared: } & 0.543 \\\\\n",
+ "\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared: } & 0.532 \\\\\n",
+ "\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 49.91 \\\\\n",
+ "\\textbf{Date:} & Mon, 16 Oct 2023 & \\textbf{ Prob (F-statistic):} & 1.79e-46 \\\\\n",
+ "\\textbf{Time:} & 21:16:04 & \\textbf{ Log-Likelihood: } & -2052.0 \\\\\n",
+ "\\textbf{No. Observations:} & 302 & \\textbf{ AIC: } & 4120. \\\\\n",
+ "\\textbf{Df Residuals:} & 294 & \\textbf{ BIC: } & 4150. \\\\\n",
+ "\\textbf{Df Model:} & 7 & \\textbf{ } & \\\\\n",
+ "\\textbf{Covariance Type:} & nonrobust & \\textbf{ } & \\\\\n",
+ "\\bottomrule\n",
+ "\\end{tabular}\n",
+ "\\begin{tabular}{lcccccc}\n",
+ " & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n",
+ "\\midrule\n",
+ "\\textbf{const} & 440.7154 & 12.605 & 34.963 & 0.000 & 415.907 & 465.523 \\\\\n",
+ "\\textbf{x1} & -2.3298 & 14.262 & -0.163 & 0.870 & -30.399 & 25.740 \\\\\n",
+ "\\textbf{x2} & -90.4006 & 12.856 & -7.032 & 0.000 & -115.701 & -65.100 \\\\\n",
+ "\\textbf{x3} & 210.0406 & 14.214 & 14.777 & 0.000 & 182.067 & 238.014 \\\\\n",
+ "\\textbf{x4} & -3.3836 & 12.752 & -0.265 & 0.791 & -28.480 & 21.713 \\\\\n",
+ "\\textbf{x5} & -22.0200 & 12.883 & -1.709 & 0.088 & -47.374 & 3.334 \\\\\n",
+ "\\textbf{x6} & -11.2644 & 13.096 & -0.860 & 0.390 & -37.038 & 14.509 \\\\\n",
+ "\\textbf{x7} & -11.8760 & 12.750 & -0.931 & 0.352 & -36.969 & 13.217 \\\\\n",
+ "\\bottomrule\n",
+ "\\end{tabular}\n",
+ "\\begin{tabular}{lclc}\n",
+ "\\textbf{Omnibus:} & 48.909 & \\textbf{ Durbin-Watson: } & 2.174 \\\\\n",
+ "\\textbf{Prob(Omnibus):} & 0.000 & \\textbf{ Jarque-Bera (JB): } & 339.334 \\\\\n",
+ "\\textbf{Skew:} & 0.377 & \\textbf{ Prob(JB): } & 2.06e-74 \\\\\n",
+ "\\textbf{Kurtosis:} & 8.138 & \\textbf{ Cond. No. } & 1.70 \\\\\n",
+ "\\bottomrule\n",
+ "\\end{tabular}\n",
+ "%\\caption{OLS Regression Results}\n",
+ "\\end{center}\n",
+ "\n",
+ "Notes: \\newline\n",
+ " [1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
+ ],
+ "text/plain": [
+ "\n",
+ "\"\"\"\n",
+ " OLS Regression Results \n",
+ "==============================================================================\n",
+ "Dep. Variable: total_claim_amount R-squared: 0.543\n",
+ "Model: OLS Adj. R-squared: 0.532\n",
+ "Method: Least Squares F-statistic: 49.91\n",
+ "Date: Mon, 16 Oct 2023 Prob (F-statistic): 1.79e-46\n",
+ "Time: 21:16:04 Log-Likelihood: -2052.0\n",
+ "No. Observations: 302 AIC: 4120.\n",
+ "Df Residuals: 294 BIC: 4150.\n",
+ "Df Model: 7 \n",
+ "Covariance Type: nonrobust \n",
+ "==============================================================================\n",
+ " coef std err t P>|t| [0.025 0.975]\n",
+ "------------------------------------------------------------------------------\n",
+ "const 440.7154 12.605 34.963 0.000 415.907 465.523\n",
+ "x1 -2.3298 14.262 -0.163 0.870 -30.399 25.740\n",
+ "x2 -90.4006 12.856 -7.032 0.000 -115.701 -65.100\n",
+ "x3 210.0406 14.214 14.777 0.000 182.067 238.014\n",
+ "x4 -3.3836 12.752 -0.265 0.791 -28.480 21.713\n",
+ "x5 -22.0200 12.883 -1.709 0.088 -47.374 3.334\n",
+ "x6 -11.2644 13.096 -0.860 0.390 -37.038 14.509\n",
+ "x7 -11.8760 12.750 -0.931 0.352 -36.969 13.217\n",
+ "==============================================================================\n",
+ "Omnibus: 48.909 Durbin-Watson: 2.174\n",
+ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 339.334\n",
+ "Skew: 0.377 Prob(JB): 2.06e-74\n",
+ "Kurtosis: 8.138 Cond. No. 1.70\n",
+ "==============================================================================\n",
+ "\n",
+ "Notes:\n",
+ "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
+ "\"\"\""
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# X-y split. using SKlearn\n",
+ "\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "\n",
+ "Y = mark_cust_A['total_claim_amount']\n",
+ "\n",
+ "X = Numerical.drop('total_claim_amount', axis=1)\n",
+ "# nomralize the data using standard scaler \n",
+ "scaler = StandardScaler()\n",
+ "X = scaler.fit_transform(X)\n",
+ "\n",
+ "X = sm.add_constant(X)\n",
+ "\n",
+ "model = sm.OLS(Y, X).fit()\n",
+ "\n",
+ "model.summary()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer_lifetime_value | \n",
+ " income | \n",
+ " monthly_premium_auto | \n",
+ " months_since_last_claim | \n",
+ " months_since_policy_inception | \n",
+ " number_of_open_complaints | \n",
+ " number_of_policies | \n",
+ " total_claim_amount | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | customer_lifetime_value | \n",
+ " 1.000000 | \n",
+ " 0.040115 | \n",
+ " 0.440848 | \n",
+ " 0.070187 | \n",
+ " -0.020998 | \n",
+ " -0.167398 | \n",
+ " -0.047100 | \n",
+ " 0.279324 | \n",
+ "
\n",
+ " \n",
+ " | income | \n",
+ " 0.040115 | \n",
+ " 1.000000 | \n",
+ " -0.083673 | \n",
+ " 0.014751 | \n",
+ " -0.075638 | \n",
+ " -0.105281 | \n",
+ " -0.098446 | \n",
+ " -0.325577 | \n",
+ "
\n",
+ " \n",
+ " | monthly_premium_auto | \n",
+ " 0.440848 | \n",
+ " -0.083673 | \n",
+ " 1.000000 | \n",
+ " 0.111741 | \n",
+ " -0.024951 | \n",
+ " -0.084278 | \n",
+ " 0.018489 | \n",
+ " 0.680185 | \n",
+ "
\n",
+ " \n",
+ " | months_since_last_claim | \n",
+ " 0.070187 | \n",
+ " 0.014751 | \n",
+ " 0.111741 | \n",
+ " 1.000000 | \n",
+ " -0.000785 | \n",
+ " 0.065477 | \n",
+ " -0.052498 | \n",
+ " 0.057838 | \n",
+ "
\n",
+ " \n",
+ " | months_since_policy_inception | \n",
+ " -0.020998 | \n",
+ " -0.075638 | \n",
+ " -0.024951 | \n",
+ " -0.000785 | \n",
+ " 1.000000 | \n",
+ " 0.179870 | \n",
+ " -0.076588 | \n",
+ " -0.067206 | \n",
+ "
\n",
+ " \n",
+ " | number_of_open_complaints | \n",
+ " -0.167398 | \n",
+ " -0.105281 | \n",
+ " -0.084278 | \n",
+ " 0.065477 | \n",
+ " 0.179870 | \n",
+ " 1.000000 | \n",
+ " -0.016099 | \n",
+ " -0.072091 | \n",
+ "
\n",
+ " \n",
+ " | number_of_policies | \n",
+ " -0.047100 | \n",
+ " -0.098446 | \n",
+ " 0.018489 | \n",
+ " -0.052498 | \n",
+ " -0.076588 | \n",
+ " -0.016099 | \n",
+ " 1.000000 | \n",
+ " 0.009577 | \n",
+ "
\n",
+ " \n",
+ " | total_claim_amount | \n",
+ " 0.279324 | \n",
+ " -0.325577 | \n",
+ " 0.680185 | \n",
+ " 0.057838 | \n",
+ " -0.067206 | \n",
+ " -0.072091 | \n",
+ " 0.009577 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer_lifetime_value income \\\n",
+ "customer_lifetime_value 1.000000 0.040115 \n",
+ "income 0.040115 1.000000 \n",
+ "monthly_premium_auto 0.440848 -0.083673 \n",
+ "months_since_last_claim 0.070187 0.014751 \n",
+ "months_since_policy_inception -0.020998 -0.075638 \n",
+ "number_of_open_complaints -0.167398 -0.105281 \n",
+ "number_of_policies -0.047100 -0.098446 \n",
+ "total_claim_amount 0.279324 -0.325577 \n",
+ "\n",
+ " monthly_premium_auto months_since_last_claim \\\n",
+ "customer_lifetime_value 0.440848 0.070187 \n",
+ "income -0.083673 0.014751 \n",
+ "monthly_premium_auto 1.000000 0.111741 \n",
+ "months_since_last_claim 0.111741 1.000000 \n",
+ "months_since_policy_inception -0.024951 -0.000785 \n",
+ "number_of_open_complaints -0.084278 0.065477 \n",
+ "number_of_policies 0.018489 -0.052498 \n",
+ "total_claim_amount 0.680185 0.057838 \n",
+ "\n",
+ " months_since_policy_inception \\\n",
+ "customer_lifetime_value -0.020998 \n",
+ "income -0.075638 \n",
+ "monthly_premium_auto -0.024951 \n",
+ "months_since_last_claim -0.000785 \n",
+ "months_since_policy_inception 1.000000 \n",
+ "number_of_open_complaints 0.179870 \n",
+ "number_of_policies -0.076588 \n",
+ "total_claim_amount -0.067206 \n",
+ "\n",
+ " number_of_open_complaints number_of_policies \\\n",
+ "customer_lifetime_value -0.167398 -0.047100 \n",
+ "income -0.105281 -0.098446 \n",
+ "monthly_premium_auto -0.084278 0.018489 \n",
+ "months_since_last_claim 0.065477 -0.052498 \n",
+ "months_since_policy_inception 0.179870 -0.076588 \n",
+ "number_of_open_complaints 1.000000 -0.016099 \n",
+ "number_of_policies -0.016099 1.000000 \n",
+ "total_claim_amount -0.072091 0.009577 \n",
+ "\n",
+ " total_claim_amount \n",
+ "customer_lifetime_value 0.279324 \n",
+ "income -0.325577 \n",
+ "monthly_premium_auto 0.680185 \n",
+ "months_since_last_claim 0.057838 \n",
+ "months_since_policy_inception -0.067206 \n",
+ "number_of_open_complaints -0.072091 \n",
+ "number_of_policies 0.009577 \n",
+ "total_claim_amount 1.000000 "
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# normalising the numerical Data\n",
+ "\n",
+ "correlations_matrix = Numerical.corr()\n",
+ "\n",
+ "correlations_matrix"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "base",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/marketing_customer_analysis.xlsx b/marketing_customer_analysis.xlsx
new file mode 100644
index 0000000..ad13851
Binary files /dev/null and b/marketing_customer_analysis.xlsx differ