Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
371 changes: 371 additions & 0 deletions LAB customer analysis Round 5 DAVID MARTINS.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,371 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 47,
"id": "cd198984",
"metadata": {},
"outputs": [
{
"ename": "SyntaxError",
"evalue": "invalid character '´' (U+00B4) (917027426.py, line 1)",
"output_type": "error",
"traceback": [
"\u001b[1;36m Cell \u001b[1;32mIn[47], line 1\u001b[1;36m\u001b[0m\n\u001b[1;33m ´# These are the normal libraries\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid character '´' (U+00B4)\n"
]
}
],
"source": [
"´# These are the normal libraries\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"# This is just so that we don't get annoying warnings\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"# This is the most common viz library in python\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"\n",
"# This one is the above on steroids\n",
"import seaborn as sns\n",
"\n",
"from sklearn import linear_model\n",
"from sklearn.metrics import mean_squared_error, r2_score\n",
"\n",
"# These Libs are for stats\n",
"import statsmodels.api as sm\n",
"from statsmodels.formula.api import ols\n",
"\n",
"customer = pd.read_csv(r'C:\\Users\\david\\OneDrive\\Ambiente de Trabalho\\Iron Hack\\ficheiros para LAB PANDA 2\\marketing_customer_analysis.csv')\n",
"customer\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "cab4f9df",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"unnamed:_0 int64\n",
"customer object\n",
"state object\n",
"customer_lifetime_value float64\n",
"response object\n",
"coverage object\n",
"education object\n",
"effective_to_date object\n",
"employmentstatus object\n",
"gender object\n",
"income int64\n",
"location_code object\n",
"marital_status object\n",
"monthly_premium_auto int64\n",
"months_since_last_claim float64\n",
"months_since_policy_inception int64\n",
"number_of_open_complaints float64\n",
"number_of_policies int64\n",
"policy_type object\n",
"policy object\n",
"renew_offer_type object\n",
"sales_channel object\n",
"total_claim_amount float64\n",
"vehicle_class object\n",
"vehicle_size object\n",
"vehicle_type object\n",
"dtype: object"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"customer.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "7bbae7ea",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"unnamed:_0 int64\n",
"customer object\n",
"state object\n",
"customer_lifetime_value float64\n",
"response object\n",
"coverage object\n",
"education object\n",
"effective_to_date object\n",
"employmentstatus object\n",
"gender object\n",
"income int64\n",
"location_code object\n",
"marital_status object\n",
"monthly_premium_auto int64\n",
"months_since_last_claim float64\n",
"months_since_policy_inception int64\n",
"number_of_open_complaints float64\n",
"number_of_policies int64\n",
"policy_type object\n",
"policy object\n",
"renew_offer_type object\n",
"sales_channel object\n",
"total_claim_amount float64\n",
"vehicle_class object\n",
"vehicle_size object\n",
"vehicle_type object\n",
"dtype: object"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"# STANDARDIZING\n",
"\n",
"cols = []\n",
"for i in range(len(customer.columns)): \n",
" cols.append(customer.columns[i].lower().replace(' ', '_')) \n",
"customer.columns = cols\n",
"\n",
"customer\n",
"\n",
"customer.dtypes\n",
"\n",
"\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "1b6785a0",
"metadata": {},
"outputs": [],
"source": [
"# Normalize (numerical).\n",
"\n",
"\n",
"def normalize_dataframe(customer, columns_to_normalize=None):\n",
" if columns_to_normalize is None:\n",
" \n",
" numerical_columns = customer.select_dtypes(include=['int64', 'float64']).columns\n",
" columns_to_normalize = list(numerical_columns)\n",
" \n",
" customer_normalized = customer.copy()\n",
" \n",
" scaler = MinMaxScaler()\n",
" \n",
" customer_normalized[columns_to_normalize] = scaler.fit_transform(customer_normalized[columns_to_normalize])\n",
" \n",
" return customer_normalized\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "8e64add6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>OLS Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>total_claim_amount</td> <th> R-squared (uncentered):</th> <td> 0.510</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>OLS</td> <th> Adj. R-squared (uncentered):</th> <td> 0.510</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td>1.136e+04</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Mon, 16 Oct 2023</td> <th> Prob (F-statistic):</th> <td> 0.00</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>20:58:47</td> <th> Log-Likelihood: </th> <td> -79898.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 10910</td> <th> AIC: </th> <td>1.598e+05</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Df Residuals:</th> <td> 10909</td> <th> BIC: </th> <td>1.598e+05</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Df Model:</th> <td> 1</td> <th> </th> <td> </td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>customer_lifetime_value</th> <td> 0.0354</td> <td> 0.000</td> <td> 106.598</td> <td> 0.000</td> <td> 0.035</td> <td> 0.036</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <th>Omnibus:</th> <td>977.749</td> <th> Durbin-Watson: </th> <td> 1.628</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td>6781.308</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Skew:</th> <td>-0.053</td> <th> Prob(JB): </th> <td> 0.00</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Kurtosis:</th> <td> 6.861</td> <th> Cond. No. </th> <td> 1.00</td>\n",
"</tr>\n",
"</table><br/><br/>Notes:<br/>[1] R² is computed without centering (uncentered) since the model does not contain a constant.<br/>[2] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/latex": [
"\\begin{center}\n",
"\\begin{tabular}{lclc}\n",
"\\toprule\n",
"\\textbf{Dep. Variable:} & total\\_claim\\_amount & \\textbf{ R-squared (uncentered):} & 0.510 \\\\\n",
"\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared (uncentered):} & 0.510 \\\\\n",
"\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 1.136e+04 \\\\\n",
"\\textbf{Date:} & Mon, 16 Oct 2023 & \\textbf{ Prob (F-statistic):} & 0.00 \\\\\n",
"\\textbf{Time:} & 20:58:47 & \\textbf{ Log-Likelihood: } & -79898. \\\\\n",
"\\textbf{No. Observations:} & 10910 & \\textbf{ AIC: } & 1.598e+05 \\\\\n",
"\\textbf{Df Residuals:} & 10909 & \\textbf{ BIC: } & 1.598e+05 \\\\\n",
"\\textbf{Df Model:} & 1 & \\textbf{ } & \\\\\n",
"\\textbf{Covariance Type:} & nonrobust & \\textbf{ } & \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\\begin{tabular}{lcccccc}\n",
" & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n",
"\\midrule\n",
"\\textbf{customer\\_lifetime\\_value} & 0.0354 & 0.000 & 106.598 & 0.000 & 0.035 & 0.036 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"\\begin{tabular}{lclc}\n",
"\\textbf{Omnibus:} & 977.749 & \\textbf{ Durbin-Watson: } & 1.628 \\\\\n",
"\\textbf{Prob(Omnibus):} & 0.000 & \\textbf{ Jarque-Bera (JB): } & 6781.308 \\\\\n",
"\\textbf{Skew:} & -0.053 & \\textbf{ Prob(JB): } & 0.00 \\\\\n",
"\\textbf{Kurtosis:} & 6.861 & \\textbf{ Cond. No. } & 1.00 \\\\\n",
"\\bottomrule\n",
"\\end{tabular}\n",
"%\\caption{OLS Regression Results}\n",
"\\end{center}\n",
"\n",
"Notes: \\newline\n",
" [1] R² is computed without centering (uncentered) since the model does not contain a constant. \\newline\n",
" [2] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" OLS Regression Results \n",
"=======================================================================================\n",
"Dep. Variable: total_claim_amount R-squared (uncentered): 0.510\n",
"Model: OLS Adj. R-squared (uncentered): 0.510\n",
"Method: Least Squares F-statistic: 1.136e+04\n",
"Date: Mon, 16 Oct 2023 Prob (F-statistic): 0.00\n",
"Time: 20:58:47 Log-Likelihood: -79898.\n",
"No. Observations: 10910 AIC: 1.598e+05\n",
"Df Residuals: 10909 BIC: 1.598e+05\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"===========================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"-------------------------------------------------------------------------------------------\n",
"customer_lifetime_value 0.0354 0.000 106.598 0.000 0.035 0.036\n",
"==============================================================================\n",
"Omnibus: 977.749 Durbin-Watson: 1.628\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 6781.308\n",
"Skew: -0.053 Prob(JB): 0.00\n",
"Kurtosis: 6.861 Cond. No. 1.00\n",
"==============================================================================\n",
"\n",
"Notes:\n",
"[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n",
"[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# X-y split.\n",
"\n",
"Y = customer['total_claim_amount']\n",
"X = customer['customer_lifetime_value']\n",
"\n",
"# apply linear regression on the following data\n",
"\n",
"model = sm.OLS(Y,X).fit()\n",
"\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "830d4a01",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "4188ab5c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf896d89",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}