diff --git a/LAB customer analysis Round 5 DAVID MARTINS.ipynb b/LAB customer analysis Round 5 DAVID MARTINS.ipynb new file mode 100644 index 0000000..fc53989 --- /dev/null +++ b/LAB customer analysis Round 5 DAVID MARTINS.ipynb @@ -0,0 +1,371 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 47, + "id": "cd198984", + "metadata": {}, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid character '´' (U+00B4) (917027426.py, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[1;36m Cell \u001b[1;32mIn[47], line 1\u001b[1;36m\u001b[0m\n\u001b[1;33m ´# These are the normal libraries\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid character '´' (U+00B4)\n" + ] + } + ], + "source": [ + "´# These are the normal libraries\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# This is just so that we don't get annoying warnings\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# This is the most common viz library in python\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "# This one is the above on steroids\n", + "import seaborn as sns\n", + "\n", + "from sklearn import linear_model\n", + "from sklearn.metrics import mean_squared_error, r2_score\n", + "\n", + "# These Libs are for stats\n", + "import statsmodels.api as sm\n", + "from statsmodels.formula.api import ols\n", + "\n", + "customer = pd.read_csv(r'C:\\Users\\david\\OneDrive\\Ambiente de Trabalho\\Iron Hack\\ficheiros para LAB PANDA 2\\marketing_customer_analysis.csv')\n", + "customer\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "cab4f9df", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "unnamed:_0 int64\n", + "customer object\n", + "state object\n", + "customer_lifetime_value float64\n", + "response object\n", + "coverage object\n", + "education object\n", + "effective_to_date object\n", + "employmentstatus object\n", + "gender object\n", + "income int64\n", + "location_code object\n", + "marital_status object\n", + "monthly_premium_auto int64\n", + "months_since_last_claim float64\n", + "months_since_policy_inception int64\n", + "number_of_open_complaints float64\n", + "number_of_policies int64\n", + "policy_type object\n", + "policy object\n", + "renew_offer_type object\n", + "sales_channel object\n", + "total_claim_amount float64\n", + "vehicle_class object\n", + "vehicle_size object\n", + "vehicle_type object\n", + "dtype: object" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "customer.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "7bbae7ea", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "unnamed:_0 int64\n", + "customer object\n", + "state object\n", + "customer_lifetime_value float64\n", + "response object\n", + "coverage object\n", + "education object\n", + "effective_to_date object\n", + "employmentstatus object\n", + "gender object\n", + "income int64\n", + "location_code object\n", + "marital_status object\n", + "monthly_premium_auto int64\n", + "months_since_last_claim float64\n", + "months_since_policy_inception int64\n", + "number_of_open_complaints float64\n", + "number_of_policies int64\n", + "policy_type object\n", + "policy object\n", + "renew_offer_type object\n", + "sales_channel object\n", + "total_claim_amount float64\n", + "vehicle_class object\n", + "vehicle_size object\n", + "vehicle_type object\n", + "dtype: object" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "# STANDARDIZING\n", + "\n", + "cols = []\n", + "for i in range(len(customer.columns)): \n", + " cols.append(customer.columns[i].lower().replace(' ', '_')) \n", + "customer.columns = cols\n", + "\n", + "customer\n", + "\n", + "customer.dtypes\n", + "\n", + "\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "1b6785a0", + "metadata": {}, + "outputs": [], + "source": [ + "# Normalize (numerical).\n", + "\n", + "\n", + "def normalize_dataframe(customer, columns_to_normalize=None):\n", + " if columns_to_normalize is None:\n", + " \n", + " numerical_columns = customer.select_dtypes(include=['int64', 'float64']).columns\n", + " columns_to_normalize = list(numerical_columns)\n", + " \n", + " customer_normalized = customer.copy()\n", + " \n", + " scaler = MinMaxScaler()\n", + " \n", + " customer_normalized[columns_to_normalize] = scaler.fit_transform(customer_normalized[columns_to_normalize])\n", + " \n", + " return customer_normalized\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "8e64add6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
OLS Regression Results
Dep. Variable: total_claim_amount R-squared (uncentered): 0.510
Model: OLS Adj. R-squared (uncentered): 0.510
Method: Least Squares F-statistic: 1.136e+04
Date: Mon, 16 Oct 2023 Prob (F-statistic): 0.00
Time: 20:58:47 Log-Likelihood: -79898.
No. Observations: 10910 AIC: 1.598e+05
Df Residuals: 10909 BIC: 1.598e+05
Df Model: 1
Covariance Type: nonrobust
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
coef std err t P>|t| [0.025 0.975]
customer_lifetime_value 0.0354 0.000 106.598 0.000 0.035 0.036
\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
Omnibus: 977.749 Durbin-Watson: 1.628
Prob(Omnibus): 0.000 Jarque-Bera (JB): 6781.308
Skew: -0.053 Prob(JB): 0.00
Kurtosis: 6.861 Cond. No. 1.00


Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified." + ], + "text/latex": [ + "\\begin{center}\n", + "\\begin{tabular}{lclc}\n", + "\\toprule\n", + "\\textbf{Dep. Variable:} & total\\_claim\\_amount & \\textbf{ R-squared (uncentered):} & 0.510 \\\\\n", + "\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared (uncentered):} & 0.510 \\\\\n", + "\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 1.136e+04 \\\\\n", + "\\textbf{Date:} & Mon, 16 Oct 2023 & \\textbf{ Prob (F-statistic):} & 0.00 \\\\\n", + "\\textbf{Time:} & 20:58:47 & \\textbf{ Log-Likelihood: } & -79898. \\\\\n", + "\\textbf{No. Observations:} & 10910 & \\textbf{ AIC: } & 1.598e+05 \\\\\n", + "\\textbf{Df Residuals:} & 10909 & \\textbf{ BIC: } & 1.598e+05 \\\\\n", + "\\textbf{Df Model:} & 1 & \\textbf{ } & \\\\\n", + "\\textbf{Covariance Type:} & nonrobust & \\textbf{ } & \\\\\n", + "\\bottomrule\n", + "\\end{tabular}\n", + "\\begin{tabular}{lcccccc}\n", + " & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n", + "\\midrule\n", + "\\textbf{customer\\_lifetime\\_value} & 0.0354 & 0.000 & 106.598 & 0.000 & 0.035 & 0.036 \\\\\n", + "\\bottomrule\n", + "\\end{tabular}\n", + "\\begin{tabular}{lclc}\n", + "\\textbf{Omnibus:} & 977.749 & \\textbf{ Durbin-Watson: } & 1.628 \\\\\n", + "\\textbf{Prob(Omnibus):} & 0.000 & \\textbf{ Jarque-Bera (JB): } & 6781.308 \\\\\n", + "\\textbf{Skew:} & -0.053 & \\textbf{ Prob(JB): } & 0.00 \\\\\n", + "\\textbf{Kurtosis:} & 6.861 & \\textbf{ Cond. No. } & 1.00 \\\\\n", + "\\bottomrule\n", + "\\end{tabular}\n", + "%\\caption{OLS Regression Results}\n", + "\\end{center}\n", + "\n", + "Notes: \\newline\n", + " [1] R² is computed without centering (uncentered) since the model does not contain a constant. \\newline\n", + " [2] Standard Errors assume that the covariance matrix of the errors is correctly specified." + ], + "text/plain": [ + "\n", + "\"\"\"\n", + " OLS Regression Results \n", + "=======================================================================================\n", + "Dep. Variable: total_claim_amount R-squared (uncentered): 0.510\n", + "Model: OLS Adj. R-squared (uncentered): 0.510\n", + "Method: Least Squares F-statistic: 1.136e+04\n", + "Date: Mon, 16 Oct 2023 Prob (F-statistic): 0.00\n", + "Time: 20:58:47 Log-Likelihood: -79898.\n", + "No. Observations: 10910 AIC: 1.598e+05\n", + "Df Residuals: 10909 BIC: 1.598e+05\n", + "Df Model: 1 \n", + "Covariance Type: nonrobust \n", + "===========================================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "-------------------------------------------------------------------------------------------\n", + "customer_lifetime_value 0.0354 0.000 106.598 0.000 0.035 0.036\n", + "==============================================================================\n", + "Omnibus: 977.749 Durbin-Watson: 1.628\n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 6781.308\n", + "Skew: -0.053 Prob(JB): 0.00\n", + "Kurtosis: 6.861 Cond. No. 1.00\n", + "==============================================================================\n", + "\n", + "Notes:\n", + "[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n", + "[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", + "\"\"\"" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# X-y split.\n", + "\n", + "Y = customer['total_claim_amount']\n", + "X = customer['customer_lifetime_value']\n", + "\n", + "# apply linear regression on the following data\n", + "\n", + "model = sm.OLS(Y,X).fit()\n", + "\n", + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "830d4a01", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4188ab5c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf896d89", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}