diff --git a/files_for_lab/[Davi]lab-customer-analysis-round-5.ipynb b/files_for_lab/[Davi]lab-customer-analysis-round-5.ipynb
new file mode 100644
index 0000000..dcd3c6a
--- /dev/null
+++ b/files_for_lab/[Davi]lab-customer-analysis-round-5.ipynb
@@ -0,0 +1,637 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "import warnings\n",
+ "warnings.filterwarnings('ignore')\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline\n",
+ "\n",
+ "import seaborn as sns\n",
+ "from sklearn import linear_model\n",
+ "from sklearn.metrics import mean_squared_error, r2_score\n",
+ "\n",
+ "import statsmodels.api as sm\n",
+ "from statsmodels.formula.api import ols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " customer | \n",
+ " state | \n",
+ " customer_lifetime_value | \n",
+ " response | \n",
+ " coverage | \n",
+ " education | \n",
+ " effective_to_date | \n",
+ " employmentstatus | \n",
+ " gender | \n",
+ " income | \n",
+ " ... | \n",
+ " months_since_policy_inception | \n",
+ " number_of_open_complaints | \n",
+ " number_of_policies | \n",
+ " policy_type | \n",
+ " policy | \n",
+ " renew_offer_type | \n",
+ " sales_channel | \n",
+ " total_claim_amount | \n",
+ " vehicle_class | \n",
+ " vehicle_size | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " BU79786 | \n",
+ " Washington | \n",
+ " 2763.519279 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2/24/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 56274 | \n",
+ " ... | \n",
+ " 5 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Agent | \n",
+ " 384.811147 | \n",
+ " Two-Door Car | \n",
+ " Medsize | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " QZ44356 | \n",
+ " Arizona | \n",
+ " 6979.535903 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 1/31/11 | \n",
+ " Unemployed | \n",
+ " F | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 42 | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer3 | \n",
+ " Agent | \n",
+ " 1131.464935 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " AI49188 | \n",
+ " Nevada | \n",
+ " 12887.431650 | \n",
+ " No | \n",
+ " Premium | \n",
+ " Bachelor | \n",
+ " 2/19/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 48767 | \n",
+ " ... | \n",
+ " 38 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L3 | \n",
+ " Offer1 | \n",
+ " Agent | \n",
+ " 566.472247 | \n",
+ " Two-Door Car | \n",
+ " Medsize | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " WW63253 | \n",
+ " California | \n",
+ " 7645.861827 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 1/20/11 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 65 | \n",
+ " 0 | \n",
+ " 7 | \n",
+ " Corporate Auto | \n",
+ " Corporate L2 | \n",
+ " Offer1 | \n",
+ " Call Center | \n",
+ " 529.881344 | \n",
+ " SUV | \n",
+ " Medsize | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " HB64268 | \n",
+ " Washington | \n",
+ " 2813.692575 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2/3/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 43836 | \n",
+ " ... | \n",
+ " 44 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer1 | \n",
+ " Agent | \n",
+ " 138.130879 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 9129 | \n",
+ " LA72316 | \n",
+ " California | \n",
+ " 23405.987980 | \n",
+ " No | \n",
+ " Basic | \n",
+ " Bachelor | \n",
+ " 2/10/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 71941 | \n",
+ " ... | \n",
+ " 89 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " Personal Auto | \n",
+ " Personal L1 | \n",
+ " Offer2 | \n",
+ " Web | \n",
+ " 198.234764 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ "
\n",
+ " \n",
+ " | 9130 | \n",
+ " PK87824 | \n",
+ " California | \n",
+ " 3096.511217 | \n",
+ " Yes | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2/12/11 | \n",
+ " Employed | \n",
+ " F | \n",
+ " 21604 | \n",
+ " ... | \n",
+ " 28 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 379.200000 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ "
\n",
+ " \n",
+ " | 9131 | \n",
+ " TD14365 | \n",
+ " California | \n",
+ " 8163.890428 | \n",
+ " No | \n",
+ " Extended | \n",
+ " Bachelor | \n",
+ " 2/6/11 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 37 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " Corporate Auto | \n",
+ " Corporate L2 | \n",
+ " Offer1 | \n",
+ " Branch | \n",
+ " 790.784983 | \n",
+ " Four-Door Car | \n",
+ " Medsize | \n",
+ "
\n",
+ " \n",
+ " | 9132 | \n",
+ " UP19263 | \n",
+ " California | \n",
+ " 7524.442436 | \n",
+ " No | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2/3/11 | \n",
+ " Employed | \n",
+ " M | \n",
+ " 21941 | \n",
+ " ... | \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " Personal Auto | \n",
+ " Personal L2 | \n",
+ " Offer3 | \n",
+ " Branch | \n",
+ " 691.200000 | \n",
+ " Four-Door Car | \n",
+ " Large | \n",
+ "
\n",
+ " \n",
+ " | 9133 | \n",
+ " Y167826 | \n",
+ " California | \n",
+ " 2611.836866 | \n",
+ " No | \n",
+ " Extended | \n",
+ " College | \n",
+ " 2/14/11 | \n",
+ " Unemployed | \n",
+ " M | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 90 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " Corporate Auto | \n",
+ " Corporate L3 | \n",
+ " Offer4 | \n",
+ " Call Center | \n",
+ " 369.600000 | \n",
+ " Two-Door Car | \n",
+ " Medsize | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
9134 rows × 24 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " customer state customer_lifetime_value response coverage \\\n",
+ "0 BU79786 Washington 2763.519279 No Basic \n",
+ "1 QZ44356 Arizona 6979.535903 No Extended \n",
+ "2 AI49188 Nevada 12887.431650 No Premium \n",
+ "3 WW63253 California 7645.861827 No Basic \n",
+ "4 HB64268 Washington 2813.692575 No Basic \n",
+ "... ... ... ... ... ... \n",
+ "9129 LA72316 California 23405.987980 No Basic \n",
+ "9130 PK87824 California 3096.511217 Yes Extended \n",
+ "9131 TD14365 California 8163.890428 No Extended \n",
+ "9132 UP19263 California 7524.442436 No Extended \n",
+ "9133 Y167826 California 2611.836866 No Extended \n",
+ "\n",
+ " education effective_to_date employmentstatus gender income ... \\\n",
+ "0 Bachelor 2/24/11 Employed F 56274 ... \n",
+ "1 Bachelor 1/31/11 Unemployed F 0 ... \n",
+ "2 Bachelor 2/19/11 Employed F 48767 ... \n",
+ "3 Bachelor 1/20/11 Unemployed M 0 ... \n",
+ "4 Bachelor 2/3/11 Employed M 43836 ... \n",
+ "... ... ... ... ... ... ... \n",
+ "9129 Bachelor 2/10/11 Employed M 71941 ... \n",
+ "9130 College 2/12/11 Employed F 21604 ... \n",
+ "9131 Bachelor 2/6/11 Unemployed M 0 ... \n",
+ "9132 College 2/3/11 Employed M 21941 ... \n",
+ "9133 College 2/14/11 Unemployed M 0 ... \n",
+ "\n",
+ " months_since_policy_inception number_of_open_complaints \\\n",
+ "0 5 0 \n",
+ "1 42 0 \n",
+ "2 38 0 \n",
+ "3 65 0 \n",
+ "4 44 0 \n",
+ "... ... ... \n",
+ "9129 89 0 \n",
+ "9130 28 0 \n",
+ "9131 37 3 \n",
+ "9132 3 0 \n",
+ "9133 90 0 \n",
+ "\n",
+ " number_of_policies policy_type policy renew_offer_type \\\n",
+ "0 1 Corporate Auto Corporate L3 Offer1 \n",
+ "1 8 Personal Auto Personal L3 Offer3 \n",
+ "2 2 Personal Auto Personal L3 Offer1 \n",
+ "3 7 Corporate Auto Corporate L2 Offer1 \n",
+ "4 1 Personal Auto Personal L1 Offer1 \n",
+ "... ... ... ... ... \n",
+ "9129 2 Personal Auto Personal L1 Offer2 \n",
+ "9130 1 Corporate Auto Corporate L3 Offer1 \n",
+ "9131 2 Corporate Auto Corporate L2 Offer1 \n",
+ "9132 3 Personal Auto Personal L2 Offer3 \n",
+ "9133 1 Corporate Auto Corporate L3 Offer4 \n",
+ "\n",
+ " sales_channel total_claim_amount vehicle_class vehicle_size \n",
+ "0 Agent 384.811147 Two-Door Car Medsize \n",
+ "1 Agent 1131.464935 Four-Door Car Medsize \n",
+ "2 Agent 566.472247 Two-Door Car Medsize \n",
+ "3 Call Center 529.881344 SUV Medsize \n",
+ "4 Agent 138.130879 Four-Door Car Medsize \n",
+ "... ... ... ... ... \n",
+ "9129 Web 198.234764 Four-Door Car Medsize \n",
+ "9130 Branch 379.200000 Four-Door Car Medsize \n",
+ "9131 Branch 790.784983 Four-Door Car Medsize \n",
+ "9132 Branch 691.200000 Four-Door Car Large \n",
+ "9133 Call Center 369.600000 Two-Door Car Medsize \n",
+ "\n",
+ "[9134 rows x 24 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.read_csv(r'csv_files\\marketing_customer_analysis.csv')\n",
+ "cols = []\n",
+ "for i in range(len(df.columns)):\n",
+ " cols.append(df.columns[i].lower().replace(' ', '_'))\n",
+ "df.columns = cols\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "customer object\n",
+ "state object\n",
+ "customer_lifetime_value float64\n",
+ "response object\n",
+ "coverage object\n",
+ "education object\n",
+ "effective_to_date object\n",
+ "employmentstatus object\n",
+ "gender object\n",
+ "income int64\n",
+ "location_code object\n",
+ "marital_status object\n",
+ "monthly_premium_auto int64\n",
+ "months_since_last_claim int64\n",
+ "months_since_policy_inception int64\n",
+ "number_of_open_complaints int64\n",
+ "number_of_policies int64\n",
+ "policy_type object\n",
+ "policy object\n",
+ "renew_offer_type object\n",
+ "sales_channel object\n",
+ "total_claim_amount float64\n",
+ "vehicle_class object\n",
+ "vehicle_size object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "OLS Regression Results\n",
+ "\n",
+ " | Dep. Variable: | total_claim_amount | R-squared (uncentered): | 0.508 | \n",
+ "
\n",
+ "\n",
+ " | Model: | OLS | Adj. R-squared (uncentered): | 0.508 | \n",
+ "
\n",
+ "\n",
+ " | Method: | Least Squares | F-statistic: | 9426. | \n",
+ "
\n",
+ "\n",
+ " | Date: | Tue, 02 Apr 2024 | Prob (F-statistic): | 0.00 | \n",
+ "
\n",
+ "\n",
+ " | Time: | 21:50:34 | Log-Likelihood: | -66885. | \n",
+ "
\n",
+ "\n",
+ " | No. Observations: | 9134 | AIC: | 1.338e+05 | \n",
+ "
\n",
+ "\n",
+ " | Df Residuals: | 9133 | BIC: | 1.338e+05 | \n",
+ "
\n",
+ "\n",
+ " | Df Model: | 1 | | | \n",
+ "
\n",
+ "\n",
+ " | Covariance Type: | nonrobust | | | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
+ "
\n",
+ "\n",
+ " | customer_lifetime_value | 0.0353 | 0.000 | 97.089 | 0.000 | 0.035 | 0.036 | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " | Omnibus: | 843.685 | Durbin-Watson: | 1.647 | \n",
+ "
\n",
+ "\n",
+ " | Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 6058.287 | \n",
+ "
\n",
+ "\n",
+ " | Skew: | -0.059 | Prob(JB): | 0.00 | \n",
+ "
\n",
+ "\n",
+ " | Kurtosis: | 6.988 | Cond. No. | 1.00 | \n",
+ "
\n",
+ "
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified."
+ ],
+ "text/latex": [
+ "\\begin{center}\n",
+ "\\begin{tabular}{lclc}\n",
+ "\\toprule\n",
+ "\\textbf{Dep. Variable:} & total\\_claim\\_amount & \\textbf{ R-squared (uncentered):} & 0.508 \\\\\n",
+ "\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared (uncentered):} & 0.508 \\\\\n",
+ "\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 9426. \\\\\n",
+ "\\textbf{Date:} & Tue, 02 Apr 2024 & \\textbf{ Prob (F-statistic):} & 0.00 \\\\\n",
+ "\\textbf{Time:} & 21:50:34 & \\textbf{ Log-Likelihood: } & -66885. \\\\\n",
+ "\\textbf{No. Observations:} & 9134 & \\textbf{ AIC: } & 1.338e+05 \\\\\n",
+ "\\textbf{Df Residuals:} & 9133 & \\textbf{ BIC: } & 1.338e+05 \\\\\n",
+ "\\textbf{Df Model:} & 1 & \\textbf{ } & \\\\\n",
+ "\\textbf{Covariance Type:} & nonrobust & \\textbf{ } & \\\\\n",
+ "\\bottomrule\n",
+ "\\end{tabular}\n",
+ "\\begin{tabular}{lcccccc}\n",
+ " & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n",
+ "\\midrule\n",
+ "\\textbf{customer\\_lifetime\\_value} & 0.0353 & 0.000 & 97.089 & 0.000 & 0.035 & 0.036 \\\\\n",
+ "\\bottomrule\n",
+ "\\end{tabular}\n",
+ "\\begin{tabular}{lclc}\n",
+ "\\textbf{Omnibus:} & 843.685 & \\textbf{ Durbin-Watson: } & 1.647 \\\\\n",
+ "\\textbf{Prob(Omnibus):} & 0.000 & \\textbf{ Jarque-Bera (JB): } & 6058.287 \\\\\n",
+ "\\textbf{Skew:} & -0.059 & \\textbf{ Prob(JB): } & 0.00 \\\\\n",
+ "\\textbf{Kurtosis:} & 6.988 & \\textbf{ Cond. No. } & 1.00 \\\\\n",
+ "\\bottomrule\n",
+ "\\end{tabular}\n",
+ "%\\caption{OLS Regression Results}\n",
+ "\\end{center}\n",
+ "\n",
+ "Notes: \\newline\n",
+ " [1] R² is computed without centering (uncentered) since the model does not contain a constant. \\newline\n",
+ " [2] Standard Errors assume that the covariance matrix of the errors is correctly specified."
+ ],
+ "text/plain": [
+ "\n",
+ "\"\"\"\n",
+ " OLS Regression Results \n",
+ "=======================================================================================\n",
+ "Dep. Variable: total_claim_amount R-squared (uncentered): 0.508\n",
+ "Model: OLS Adj. R-squared (uncentered): 0.508\n",
+ "Method: Least Squares F-statistic: 9426.\n",
+ "Date: Tue, 02 Apr 2024 Prob (F-statistic): 0.00\n",
+ "Time: 21:50:34 Log-Likelihood: -66885.\n",
+ "No. Observations: 9134 AIC: 1.338e+05\n",
+ "Df Residuals: 9133 BIC: 1.338e+05\n",
+ "Df Model: 1 \n",
+ "Covariance Type: nonrobust \n",
+ "===========================================================================================\n",
+ " coef std err t P>|t| [0.025 0.975]\n",
+ "-------------------------------------------------------------------------------------------\n",
+ "customer_lifetime_value 0.0353 0.000 97.089 0.000 0.035 0.036\n",
+ "==============================================================================\n",
+ "Omnibus: 843.685 Durbin-Watson: 1.647\n",
+ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 6058.287\n",
+ "Skew: -0.059 Prob(JB): 0.00\n",
+ "Kurtosis: 6.988 Cond. No. 1.00\n",
+ "==============================================================================\n",
+ "\n",
+ "Notes:\n",
+ "[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n",
+ "[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
+ "\"\"\""
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# X-y split.\n",
+ "\n",
+ "Y = df['total_claim_amount']\n",
+ "X = df['customer_lifetime_value']\n",
+ "\n",
+ "model = sm.OLS(Y,X).fit()\n",
+ "\n",
+ "model.summary()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "base",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}