diff --git a/[lab-customer-analysis-round-5] Mickael Jossier.ipynb b/[lab-customer-analysis-round-5] Mickael Jossier.ipynb new file mode 100644 index 0000000..43909f8 --- /dev/null +++ b/[lab-customer-analysis-round-5] Mickael Jossier.ipynb @@ -0,0 +1,1281 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...number_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_sizedatemonth
87QC35222California2683.470677NoBasicBachelor1/1/11EmployedF48269...1Corporate AutoCorporate L2Offer3Web282.151207Four-Door CarMedsize2011-01-01Jan
271AE98193Washington7859.414569NoBasicHigh School or Below1/1/11UnemployedM0...7Personal AutoPersonal L1Offer1Branch813.600000SUVMedsize2011-01-01Jan
307TM23514Oregon10272.608200NoExtendedCollege1/1/11EmployedM60145...3Personal AutoPersonal L3Offer3Web580.473259SUVMedsize2011-01-01Jan
355WB38524California2969.593296NoBasicHigh School or Below1/1/11EmployedM46131...1Personal AutoPersonal L3Offer2Branch355.200000Two-Door CarSmall2011-01-01Jan
479QZ42725Washington2310.882998NoBasicBachelor1/1/11UnemployedF0...1Personal AutoPersonal L3Offer2Agent460.800000Four-Door CarMedsize2011-01-01Jan
..................................................................
8914OH83983California3528.044252YesExtendedDoctor2/1/11EmployedM31278...1Personal AutoPersonal L2Offer1Branch441.600000Two-Door CarMedsize2011-02-01Feb
8938CV70978California7756.437921NoBasicBachelor2/1/11EmployedM97592...2Personal AutoPersonal L2Offer4Agent312.000000Four-Door CarMedsize2011-02-01Feb
8995LX30483California3371.528475NoBasicCollege2/1/11UnemployedF0...1Personal AutoPersonal L2Offer1Agent489.600000SUVMedsize2011-02-01Feb
9075IX18485California9594.248898NoExtendedHigh School or Below2/1/11RetiredF27443...2Personal AutoPersonal L2Offer1Agent685.048914Four-Door CarMedsize2011-02-01Feb
9122FH43628California25464.820590YesExtendedCollege2/1/11RetiredF13663...2Personal AutoPersonal L2Offer1Branch465.600000Four-Door CarSmall2011-02-01Feb
\n", + "

302 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage \\\n", + "87 QC35222 California 2683.470677 No Basic \n", + "271 AE98193 Washington 7859.414569 No Basic \n", + "307 TM23514 Oregon 10272.608200 No Extended \n", + "355 WB38524 California 2969.593296 No Basic \n", + "479 QZ42725 Washington 2310.882998 No Basic \n", + "... ... ... ... ... ... \n", + "8914 OH83983 California 3528.044252 Yes Extended \n", + "8938 CV70978 California 7756.437921 No Basic \n", + "8995 LX30483 California 3371.528475 No Basic \n", + "9075 IX18485 California 9594.248898 No Extended \n", + "9122 FH43628 California 25464.820590 Yes Extended \n", + "\n", + " education effective_to_date employmentstatus gender income \\\n", + "87 Bachelor 1/1/11 Employed F 48269 \n", + "271 High School or Below 1/1/11 Unemployed M 0 \n", + "307 College 1/1/11 Employed M 60145 \n", + "355 High School or Below 1/1/11 Employed M 46131 \n", + "479 Bachelor 1/1/11 Unemployed F 0 \n", + "... ... ... ... ... ... \n", + "8914 Doctor 2/1/11 Employed M 31278 \n", + "8938 Bachelor 2/1/11 Employed M 97592 \n", + "8995 College 2/1/11 Unemployed F 0 \n", + "9075 High School or Below 2/1/11 Retired F 27443 \n", + "9122 College 2/1/11 Retired F 13663 \n", + "\n", + " ... number_of_policies policy_type policy renew_offer_type \\\n", + "87 ... 1 Corporate Auto Corporate L2 Offer3 \n", + "271 ... 7 Personal Auto Personal L1 Offer1 \n", + "307 ... 3 Personal Auto Personal L3 Offer3 \n", + "355 ... 1 Personal Auto Personal L3 Offer2 \n", + "479 ... 1 Personal Auto Personal L3 Offer2 \n", + "... ... ... ... ... ... \n", + "8914 ... 1 Personal Auto Personal L2 Offer1 \n", + "8938 ... 2 Personal Auto Personal L2 Offer4 \n", + "8995 ... 1 Personal Auto Personal L2 Offer1 \n", + "9075 ... 2 Personal Auto Personal L2 Offer1 \n", + "9122 ... 2 Personal Auto Personal L2 Offer1 \n", + "\n", + " sales_channel total_claim_amount vehicle_class vehicle_size \\\n", + "87 Web 282.151207 Four-Door Car Medsize \n", + "271 Branch 813.600000 SUV Medsize \n", + "307 Web 580.473259 SUV Medsize \n", + "355 Branch 355.200000 Two-Door Car Small \n", + "479 Agent 460.800000 Four-Door Car Medsize \n", + "... ... ... ... ... \n", + "8914 Branch 441.600000 Two-Door Car Medsize \n", + "8938 Agent 312.000000 Four-Door Car Medsize \n", + "8995 Agent 489.600000 SUV Medsize \n", + "9075 Agent 685.048914 Four-Door Car Medsize \n", + "9122 Branch 465.600000 Four-Door Car Small \n", + "\n", + " date month \n", + "87 2011-01-01 Jan \n", + "271 2011-01-01 Jan \n", + "307 2011-01-01 Jan \n", + "355 2011-01-01 Jan \n", + "479 2011-01-01 Jan \n", + "... ... ... \n", + "8914 2011-02-01 Feb \n", + "8938 2011-02-01 Feb \n", + "8995 2011-02-01 Feb \n", + "9075 2011-02-01 Feb \n", + "9122 2011-02-01 Feb \n", + "\n", + "[302 rows x 26 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "import pandas as pd\n", + "\n", + "\n", + "mark_cust_A = pd.read_excel(\"marketing_customer_analysis.xlsx\")\n", + "# fonction processing all the steps to make the data clean for our file\n", + "def Data_quarter_Y(mark_cust_A):\n", + " # creation of a list to store columns name after lower and removing spaces\n", + " cols_st = []\n", + " # \n", + " for i in range(len(mark_cust_A.columns)):\n", + " cols_st.append(mark_cust_A.columns[i].lower().replace(\" \",\"_\"))\n", + " mark_cust_A.columns = cols_st\n", + " \n", + " mark_cust_A = mark_cust_A.fillna({'response':'No','number_of_open_complaints': 0,'vehicle_type':'Other vehicule'})\n", + " mark_cust_A['date'] = pd.to_datetime(mark_cust_A['effective_to_date'])\n", + " mark_cust_A['month'] = mark_cust_A['date'].dt.to_period('M')\n", + " \n", + " condition1 = (mark_cust_A['date'] == '2011-01')\n", + " condition2 = (mark_cust_A['date'] == '2011-02')\n", + " condition3 = (mark_cust_A['date'] == '2011-03')\n", + " \n", + " data_for_jan = mark_cust_A[condition1].copy()\n", + " data_for_jan['month'] = 'Jan'\n", + "\n", + " data_for_fev = mark_cust_A[condition2].copy()\n", + " data_for_fev['month'] = 'Feb'\n", + "\n", + " data_for_march = mark_cust_A[condition3].copy()\n", + " data_for_march['month'] = 'March'\n", + " \n", + " return pd.concat([data_for_jan, data_for_fev, data_for_march], axis=0) \n", + "\n", + "mark_cust_A = Data_quarter_Y(mark_cust_A)\n", + "\n", + "mark_cust_A" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_lifetime_valueincomemonthly_premium_automonths_since_last_claimmonths_since_policy_inceptionnumber_of_open_complaintsnumber_of_policiestotal_claim_amount
872683.470677482696937931282.151207
2717859.4145690113191007813.600000
30710272.6082006014513282803580.473259
3552969.5932964613174272801355.200000
4792310.882998064122401460.800000
...........................
89143528.0442523127892249541441.600000
89387756.437921975926510702312.000000
89953371.528475010223631489.600000
90759594.24889827443863122685.048914
912225464.820590136639716602465.600000
\n", + "

302 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " customer_lifetime_value income monthly_premium_auto \\\n", + "87 2683.470677 48269 69 \n", + "271 7859.414569 0 113 \n", + "307 10272.608200 60145 132 \n", + "355 2969.593296 46131 74 \n", + "479 2310.882998 0 64 \n", + "... ... ... ... \n", + "8914 3528.044252 31278 92 \n", + "8938 7756.437921 97592 65 \n", + "8995 3371.528475 0 102 \n", + "9075 9594.248898 27443 86 \n", + "9122 25464.820590 13663 97 \n", + "\n", + " months_since_last_claim months_since_policy_inception \\\n", + "87 3 79 \n", + "271 19 10 \n", + "307 8 28 \n", + "355 27 28 \n", + "479 12 24 \n", + "... ... ... \n", + "8914 24 95 \n", + "8938 10 7 \n", + "8995 2 36 \n", + "9075 3 1 \n", + "9122 1 66 \n", + "\n", + " number_of_open_complaints number_of_policies total_claim_amount \n", + "87 3 1 282.151207 \n", + "271 0 7 813.600000 \n", + "307 0 3 580.473259 \n", + "355 0 1 355.200000 \n", + "479 0 1 460.800000 \n", + "... ... ... ... \n", + "8914 4 1 441.600000 \n", + "8938 0 2 312.000000 \n", + "8995 3 1 489.600000 \n", + "9075 2 2 685.048914 \n", + "9122 0 2 465.600000 \n", + "\n", + "[302 rows x 8 columns]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Numerical = mark_cust_A.select_dtypes(include='number')\n", + "Numerical" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
OLS Regression Results
Dep. Variable: total_claim_amount R-squared: 0.543
Model: OLS Adj. R-squared: 0.532
Method: Least Squares F-statistic: 49.91
Date: Mon, 16 Oct 2023 Prob (F-statistic): 1.79e-46
Time: 21:07:21 Log-Likelihood: -2052.0
No. Observations: 302 AIC: 4120.
Df Residuals: 294 BIC: 4150.
Df Model: 7
Covariance Type: nonrobust
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err t P>|t| [0.025 0.975]
const 101.6399 51.578 1.971 0.050 0.130 203.150
customer_lifetime_value -0.0003 0.002 -0.163 0.870 -0.004 0.003
income -0.0030 0.000 -7.032 0.000 -0.004 -0.002
monthly_premium_auto 5.5780 0.377 14.777 0.000 4.835 6.321
months_since_last_claim -0.3473 1.309 -0.265 0.791 -2.923 2.228
months_since_policy_inception -0.7589 0.444 -1.709 0.088 -1.633 0.115
number_of_open_complaints -11.8580 13.786 -0.860 0.390 -38.989 15.273
number_of_policies -4.9817 5.348 -0.931 0.352 -15.508 5.544
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Omnibus: 48.909 Durbin-Watson: 2.174
Prob(Omnibus): 0.000 Jarque-Bera (JB): 339.334
Skew: 0.377 Prob(JB): 2.06e-74
Kurtosis: 8.138 Cond. No. 2.06e+05


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.06e+05. This might indicate that there are
strong multicollinearity or other numerical problems." + ], + "text/latex": [ + "\\begin{center}\n", + "\\begin{tabular}{lclc}\n", + "\\toprule\n", + "\\textbf{Dep. Variable:} & total\\_claim\\_amount & \\textbf{ R-squared: } & 0.543 \\\\\n", + "\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared: } & 0.532 \\\\\n", + "\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 49.91 \\\\\n", + "\\textbf{Date:} & Mon, 16 Oct 2023 & \\textbf{ Prob (F-statistic):} & 1.79e-46 \\\\\n", + "\\textbf{Time:} & 21:07:21 & \\textbf{ Log-Likelihood: } & -2052.0 \\\\\n", + "\\textbf{No. Observations:} & 302 & \\textbf{ AIC: } & 4120. \\\\\n", + "\\textbf{Df Residuals:} & 294 & \\textbf{ BIC: } & 4150. \\\\\n", + "\\textbf{Df Model:} & 7 & \\textbf{ } & \\\\\n", + "\\textbf{Covariance Type:} & nonrobust & \\textbf{ } & \\\\\n", + "\\bottomrule\n", + "\\end{tabular}\n", + "\\begin{tabular}{lcccccc}\n", + " & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n", + "\\midrule\n", + "\\textbf{const} & 101.6399 & 51.578 & 1.971 & 0.050 & 0.130 & 203.150 \\\\\n", + "\\textbf{customer\\_lifetime\\_value} & -0.0003 & 0.002 & -0.163 & 0.870 & -0.004 & 0.003 \\\\\n", + "\\textbf{income} & -0.0030 & 0.000 & -7.032 & 0.000 & -0.004 & -0.002 \\\\\n", + "\\textbf{monthly\\_premium\\_auto} & 5.5780 & 0.377 & 14.777 & 0.000 & 4.835 & 6.321 \\\\\n", + "\\textbf{months\\_since\\_last\\_claim} & -0.3473 & 1.309 & -0.265 & 0.791 & -2.923 & 2.228 \\\\\n", + "\\textbf{months\\_since\\_policy\\_inception} & -0.7589 & 0.444 & -1.709 & 0.088 & -1.633 & 0.115 \\\\\n", + "\\textbf{number\\_of\\_open\\_complaints} & -11.8580 & 13.786 & -0.860 & 0.390 & -38.989 & 15.273 \\\\\n", + "\\textbf{number\\_of\\_policies} & -4.9817 & 5.348 & -0.931 & 0.352 & -15.508 & 5.544 \\\\\n", + "\\bottomrule\n", + "\\end{tabular}\n", + "\\begin{tabular}{lclc}\n", + "\\textbf{Omnibus:} & 48.909 & \\textbf{ Durbin-Watson: } & 2.174 \\\\\n", + "\\textbf{Prob(Omnibus):} & 0.000 & \\textbf{ Jarque-Bera (JB): } & 339.334 \\\\\n", + "\\textbf{Skew:} & 0.377 & \\textbf{ Prob(JB): } & 2.06e-74 \\\\\n", + "\\textbf{Kurtosis:} & 8.138 & \\textbf{ Cond. No. } & 2.06e+05 \\\\\n", + "\\bottomrule\n", + "\\end{tabular}\n", + "%\\caption{OLS Regression Results}\n", + "\\end{center}\n", + "\n", + "Notes: \\newline\n", + " [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. \\newline\n", + " [2] The condition number is large, 2.06e+05. This might indicate that there are \\newline\n", + " strong multicollinearity or other numerical problems." + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: total_claim_amount R-squared: 0.543\n", + "Model: OLS Adj. R-squared: 0.532\n", + "Method: Least Squares F-statistic: 49.91\n", + "Date: Mon, 16 Oct 2023 Prob (F-statistic): 1.79e-46\n", + "Time: 21:07:21 Log-Likelihood: -2052.0\n", + "No. Observations: 302 AIC: 4120.\n", + "Df Residuals: 294 BIC: 4150.\n", + "Df Model: 7 \n", + "Covariance Type: nonrobust \n", + "=================================================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "-------------------------------------------------------------------------------------------------\n", + "const 101.6399 51.578 1.971 0.050 0.130 203.150\n", + "customer_lifetime_value -0.0003 0.002 -0.163 0.870 -0.004 0.003\n", + "income -0.0030 0.000 -7.032 0.000 -0.004 -0.002\n", + "monthly_premium_auto 5.5780 0.377 14.777 0.000 4.835 6.321\n", + "months_since_last_claim -0.3473 1.309 -0.265 0.791 -2.923 2.228\n", + "months_since_policy_inception -0.7589 0.444 -1.709 0.088 -1.633 0.115\n", + "number_of_open_complaints -11.8580 13.786 -0.860 0.390 -38.989 15.273\n", + "number_of_policies -4.9817 5.348 -0.931 0.352 -15.508 5.544\n", + "==============================================================================\n", + "Omnibus: 48.909 Durbin-Watson: 2.174\n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 339.334\n", + "Skew: 0.377 Prob(JB): 2.06e-74\n", + "Kurtosis: 8.138 Cond. No. 2.06e+05\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", + "[2] The condition number is large, 2.06e+05. This might indicate that there are\n", + "strong multicollinearity or other numerical problems.\n", + "\"\"\"" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn import linear_model\n", + "from sklearn.metrics import mean_squared_error,r2_score\n", + "\n", + "import statsmodels.api as sm\n", + "from statsmodels.formula.api import ols\n", + "\n", + "Y = Numerical['total_claim_amount']\n", + "X = Numerical.drop(['total_claim_amount'], axis = 1)\n", + "# the mod el = Y = inter + coef. * X\n", + "\n", + "X = sm.add_constant(X)\n", + "\n", + "# ordinary least square\n", + "\n", + "model = sm.OLS(Y,X).fit()\n", + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
OLS Regression Results
Dep. Variable: total_claim_amount R-squared: 0.543
Model: OLS Adj. R-squared: 0.532
Method: Least Squares F-statistic: 49.91
Date: Mon, 16 Oct 2023 Prob (F-statistic): 1.79e-46
Time: 21:16:04 Log-Likelihood: -2052.0
No. Observations: 302 AIC: 4120.
Df Residuals: 294 BIC: 4150.
Df Model: 7
Covariance Type: nonrobust
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err t P>|t| [0.025 0.975]
const 440.7154 12.605 34.963 0.000 415.907 465.523
x1 -2.3298 14.262 -0.163 0.870 -30.399 25.740
x2 -90.4006 12.856 -7.032 0.000 -115.701 -65.100
x3 210.0406 14.214 14.777 0.000 182.067 238.014
x4 -3.3836 12.752 -0.265 0.791 -28.480 21.713
x5 -22.0200 12.883 -1.709 0.088 -47.374 3.334
x6 -11.2644 13.096 -0.860 0.390 -37.038 14.509
x7 -11.8760 12.750 -0.931 0.352 -36.969 13.217
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Omnibus: 48.909 Durbin-Watson: 2.174
Prob(Omnibus): 0.000 Jarque-Bera (JB): 339.334
Skew: 0.377 Prob(JB): 2.06e-74
Kurtosis: 8.138 Cond. No. 1.70


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified." + ], + "text/latex": [ + "\\begin{center}\n", + "\\begin{tabular}{lclc}\n", + "\\toprule\n", + "\\textbf{Dep. Variable:} & total\\_claim\\_amount & \\textbf{ R-squared: } & 0.543 \\\\\n", + "\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared: } & 0.532 \\\\\n", + "\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 49.91 \\\\\n", + "\\textbf{Date:} & Mon, 16 Oct 2023 & \\textbf{ Prob (F-statistic):} & 1.79e-46 \\\\\n", + "\\textbf{Time:} & 21:16:04 & \\textbf{ Log-Likelihood: } & -2052.0 \\\\\n", + "\\textbf{No. Observations:} & 302 & \\textbf{ AIC: } & 4120. \\\\\n", + "\\textbf{Df Residuals:} & 294 & \\textbf{ BIC: } & 4150. \\\\\n", + "\\textbf{Df Model:} & 7 & \\textbf{ } & \\\\\n", + "\\textbf{Covariance Type:} & nonrobust & \\textbf{ } & \\\\\n", + "\\bottomrule\n", + "\\end{tabular}\n", + "\\begin{tabular}{lcccccc}\n", + " & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n", + "\\midrule\n", + "\\textbf{const} & 440.7154 & 12.605 & 34.963 & 0.000 & 415.907 & 465.523 \\\\\n", + "\\textbf{x1} & -2.3298 & 14.262 & -0.163 & 0.870 & -30.399 & 25.740 \\\\\n", + "\\textbf{x2} & -90.4006 & 12.856 & -7.032 & 0.000 & -115.701 & -65.100 \\\\\n", + "\\textbf{x3} & 210.0406 & 14.214 & 14.777 & 0.000 & 182.067 & 238.014 \\\\\n", + "\\textbf{x4} & -3.3836 & 12.752 & -0.265 & 0.791 & -28.480 & 21.713 \\\\\n", + "\\textbf{x5} & -22.0200 & 12.883 & -1.709 & 0.088 & -47.374 & 3.334 \\\\\n", + "\\textbf{x6} & -11.2644 & 13.096 & -0.860 & 0.390 & -37.038 & 14.509 \\\\\n", + "\\textbf{x7} & -11.8760 & 12.750 & -0.931 & 0.352 & -36.969 & 13.217 \\\\\n", + "\\bottomrule\n", + "\\end{tabular}\n", + "\\begin{tabular}{lclc}\n", + "\\textbf{Omnibus:} & 48.909 & \\textbf{ Durbin-Watson: } & 2.174 \\\\\n", + "\\textbf{Prob(Omnibus):} & 0.000 & \\textbf{ Jarque-Bera (JB): } & 339.334 \\\\\n", + "\\textbf{Skew:} & 0.377 & \\textbf{ Prob(JB): } & 2.06e-74 \\\\\n", + "\\textbf{Kurtosis:} & 8.138 & \\textbf{ Cond. No. } & 1.70 \\\\\n", + "\\bottomrule\n", + "\\end{tabular}\n", + "%\\caption{OLS Regression Results}\n", + "\\end{center}\n", + "\n", + "Notes: \\newline\n", + " [1] Standard Errors assume that the covariance matrix of the errors is correctly specified." + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: total_claim_amount R-squared: 0.543\n", + "Model: OLS Adj. R-squared: 0.532\n", + "Method: Least Squares F-statistic: 49.91\n", + "Date: Mon, 16 Oct 2023 Prob (F-statistic): 1.79e-46\n", + "Time: 21:16:04 Log-Likelihood: -2052.0\n", + "No. Observations: 302 AIC: 4120.\n", + "Df Residuals: 294 BIC: 4150.\n", + "Df Model: 7 \n", + "Covariance Type: nonrobust \n", + "==============================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "------------------------------------------------------------------------------\n", + "const 440.7154 12.605 34.963 0.000 415.907 465.523\n", + "x1 -2.3298 14.262 -0.163 0.870 -30.399 25.740\n", + "x2 -90.4006 12.856 -7.032 0.000 -115.701 -65.100\n", + "x3 210.0406 14.214 14.777 0.000 182.067 238.014\n", + "x4 -3.3836 12.752 -0.265 0.791 -28.480 21.713\n", + "x5 -22.0200 12.883 -1.709 0.088 -47.374 3.334\n", + "x6 -11.2644 13.096 -0.860 0.390 -37.038 14.509\n", + "x7 -11.8760 12.750 -0.931 0.352 -36.969 13.217\n", + "==============================================================================\n", + "Omnibus: 48.909 Durbin-Watson: 2.174\n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 339.334\n", + "Skew: 0.377 Prob(JB): 2.06e-74\n", + "Kurtosis: 8.138 Cond. No. 1.70\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", + "\"\"\"" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# X-y split. using SKlearn\n", + "\n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "Y = mark_cust_A['total_claim_amount']\n", + "\n", + "X = Numerical.drop('total_claim_amount', axis=1)\n", + "# nomralize the data using standard scaler \n", + "scaler = StandardScaler()\n", + "X = scaler.fit_transform(X)\n", + "\n", + "X = sm.add_constant(X)\n", + "\n", + "model = sm.OLS(Y, X).fit()\n", + "\n", + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_lifetime_valueincomemonthly_premium_automonths_since_last_claimmonths_since_policy_inceptionnumber_of_open_complaintsnumber_of_policiestotal_claim_amount
customer_lifetime_value1.0000000.0401150.4408480.070187-0.020998-0.167398-0.0471000.279324
income0.0401151.000000-0.0836730.014751-0.075638-0.105281-0.098446-0.325577
monthly_premium_auto0.440848-0.0836731.0000000.111741-0.024951-0.0842780.0184890.680185
months_since_last_claim0.0701870.0147510.1117411.000000-0.0007850.065477-0.0524980.057838
months_since_policy_inception-0.020998-0.075638-0.024951-0.0007851.0000000.179870-0.076588-0.067206
number_of_open_complaints-0.167398-0.105281-0.0842780.0654770.1798701.000000-0.016099-0.072091
number_of_policies-0.047100-0.0984460.018489-0.052498-0.076588-0.0160991.0000000.009577
total_claim_amount0.279324-0.3255770.6801850.057838-0.067206-0.0720910.0095771.000000
\n", + "
" + ], + "text/plain": [ + " customer_lifetime_value income \\\n", + "customer_lifetime_value 1.000000 0.040115 \n", + "income 0.040115 1.000000 \n", + "monthly_premium_auto 0.440848 -0.083673 \n", + "months_since_last_claim 0.070187 0.014751 \n", + "months_since_policy_inception -0.020998 -0.075638 \n", + "number_of_open_complaints -0.167398 -0.105281 \n", + "number_of_policies -0.047100 -0.098446 \n", + "total_claim_amount 0.279324 -0.325577 \n", + "\n", + " monthly_premium_auto months_since_last_claim \\\n", + "customer_lifetime_value 0.440848 0.070187 \n", + "income -0.083673 0.014751 \n", + "monthly_premium_auto 1.000000 0.111741 \n", + "months_since_last_claim 0.111741 1.000000 \n", + "months_since_policy_inception -0.024951 -0.000785 \n", + "number_of_open_complaints -0.084278 0.065477 \n", + "number_of_policies 0.018489 -0.052498 \n", + "total_claim_amount 0.680185 0.057838 \n", + "\n", + " months_since_policy_inception \\\n", + "customer_lifetime_value -0.020998 \n", + "income -0.075638 \n", + "monthly_premium_auto -0.024951 \n", + "months_since_last_claim -0.000785 \n", + "months_since_policy_inception 1.000000 \n", + "number_of_open_complaints 0.179870 \n", + "number_of_policies -0.076588 \n", + "total_claim_amount -0.067206 \n", + "\n", + " number_of_open_complaints number_of_policies \\\n", + "customer_lifetime_value -0.167398 -0.047100 \n", + "income -0.105281 -0.098446 \n", + "monthly_premium_auto -0.084278 0.018489 \n", + "months_since_last_claim 0.065477 -0.052498 \n", + "months_since_policy_inception 0.179870 -0.076588 \n", + "number_of_open_complaints 1.000000 -0.016099 \n", + "number_of_policies -0.016099 1.000000 \n", + "total_claim_amount -0.072091 0.009577 \n", + "\n", + " total_claim_amount \n", + "customer_lifetime_value 0.279324 \n", + "income -0.325577 \n", + "monthly_premium_auto 0.680185 \n", + "months_since_last_claim 0.057838 \n", + "months_since_policy_inception -0.067206 \n", + "number_of_open_complaints -0.072091 \n", + "number_of_policies 0.009577 \n", + "total_claim_amount 1.000000 " + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# normalising the numerical Data\n", + "\n", + "correlations_matrix = Numerical.corr()\n", + "\n", + "correlations_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/marketing_customer_analysis.xlsx b/marketing_customer_analysis.xlsx new file mode 100644 index 0000000..ad13851 Binary files /dev/null and b/marketing_customer_analysis.xlsx differ