diff --git a/LAB customer analysis Round 5 DAVID MARTINS.ipynb b/LAB customer analysis Round 5 DAVID MARTINS.ipynb
new file mode 100644
index 0000000..fc53989
--- /dev/null
+++ b/LAB customer analysis Round 5 DAVID MARTINS.ipynb
@@ -0,0 +1,371 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "cd198984",
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "SyntaxError",
+ "evalue": "invalid character '´' (U+00B4) (917027426.py, line 1)",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;36m Cell \u001b[1;32mIn[47], line 1\u001b[1;36m\u001b[0m\n\u001b[1;33m ´# These are the normal libraries\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid character '´' (U+00B4)\n"
+ ]
+ }
+ ],
+ "source": [
+ "´# These are the normal libraries\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "\n",
+ "# This is just so that we don't get annoying warnings\n",
+ "import warnings\n",
+ "warnings.filterwarnings('ignore')\n",
+ "\n",
+ "# This is the most common viz library in python\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline\n",
+ "\n",
+ "# This one is the above on steroids\n",
+ "import seaborn as sns\n",
+ "\n",
+ "from sklearn import linear_model\n",
+ "from sklearn.metrics import mean_squared_error, r2_score\n",
+ "\n",
+ "# These Libs are for stats\n",
+ "import statsmodels.api as sm\n",
+ "from statsmodels.formula.api import ols\n",
+ "\n",
+ "customer = pd.read_csv(r'C:\\Users\\david\\OneDrive\\Ambiente de Trabalho\\Iron Hack\\ficheiros para LAB PANDA 2\\marketing_customer_analysis.csv')\n",
+ "customer\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "cab4f9df",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "unnamed:_0 int64\n",
+ "customer object\n",
+ "state object\n",
+ "customer_lifetime_value float64\n",
+ "response object\n",
+ "coverage object\n",
+ "education object\n",
+ "effective_to_date object\n",
+ "employmentstatus object\n",
+ "gender object\n",
+ "income int64\n",
+ "location_code object\n",
+ "marital_status object\n",
+ "monthly_premium_auto int64\n",
+ "months_since_last_claim float64\n",
+ "months_since_policy_inception int64\n",
+ "number_of_open_complaints float64\n",
+ "number_of_policies int64\n",
+ "policy_type object\n",
+ "policy object\n",
+ "renew_offer_type object\n",
+ "sales_channel object\n",
+ "total_claim_amount float64\n",
+ "vehicle_class object\n",
+ "vehicle_size object\n",
+ "vehicle_type object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "customer.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "7bbae7ea",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "unnamed:_0 int64\n",
+ "customer object\n",
+ "state object\n",
+ "customer_lifetime_value float64\n",
+ "response object\n",
+ "coverage object\n",
+ "education object\n",
+ "effective_to_date object\n",
+ "employmentstatus object\n",
+ "gender object\n",
+ "income int64\n",
+ "location_code object\n",
+ "marital_status object\n",
+ "monthly_premium_auto int64\n",
+ "months_since_last_claim float64\n",
+ "months_since_policy_inception int64\n",
+ "number_of_open_complaints float64\n",
+ "number_of_policies int64\n",
+ "policy_type object\n",
+ "policy object\n",
+ "renew_offer_type object\n",
+ "sales_channel object\n",
+ "total_claim_amount float64\n",
+ "vehicle_class object\n",
+ "vehicle_size object\n",
+ "vehicle_type object\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "# STANDARDIZING\n",
+ "\n",
+ "cols = []\n",
+ "for i in range(len(customer.columns)): \n",
+ " cols.append(customer.columns[i].lower().replace(' ', '_')) \n",
+ "customer.columns = cols\n",
+ "\n",
+ "customer\n",
+ "\n",
+ "customer.dtypes\n",
+ "\n",
+ "\n",
+ " \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "id": "1b6785a0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Normalize (numerical).\n",
+ "\n",
+ "\n",
+ "def normalize_dataframe(customer, columns_to_normalize=None):\n",
+ " if columns_to_normalize is None:\n",
+ " \n",
+ " numerical_columns = customer.select_dtypes(include=['int64', 'float64']).columns\n",
+ " columns_to_normalize = list(numerical_columns)\n",
+ " \n",
+ " customer_normalized = customer.copy()\n",
+ " \n",
+ " scaler = MinMaxScaler()\n",
+ " \n",
+ " customer_normalized[columns_to_normalize] = scaler.fit_transform(customer_normalized[columns_to_normalize])\n",
+ " \n",
+ " return customer_normalized\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "id": "8e64add6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "OLS Regression Results\n",
+ "\n",
+ " | Dep. Variable: | total_claim_amount | R-squared (uncentered): | 0.510 | \n",
+ "
\n",
+ "\n",
+ " | Model: | OLS | Adj. R-squared (uncentered): | 0.510 | \n",
+ "
\n",
+ "\n",
+ " | Method: | Least Squares | F-statistic: | 1.136e+04 | \n",
+ "
\n",
+ "\n",
+ " | Date: | Mon, 16 Oct 2023 | Prob (F-statistic): | 0.00 | \n",
+ "
\n",
+ "\n",
+ " | Time: | 20:58:47 | Log-Likelihood: | -79898. | \n",
+ "
\n",
+ "\n",
+ " | No. Observations: | 10910 | AIC: | 1.598e+05 | \n",
+ "
\n",
+ "\n",
+ " | Df Residuals: | 10909 | BIC: | 1.598e+05 | \n",
+ "
\n",
+ "\n",
+ " | Df Model: | 1 | | | \n",
+ "
\n",
+ "\n",
+ " | Covariance Type: | nonrobust | | | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
+ "
\n",
+ "\n",
+ " | customer_lifetime_value | 0.0354 | 0.000 | 106.598 | 0.000 | 0.035 | 0.036 | \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ " | Omnibus: | 977.749 | Durbin-Watson: | 1.628 | \n",
+ "
\n",
+ "\n",
+ " | Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 6781.308 | \n",
+ "
\n",
+ "\n",
+ " | Skew: | -0.053 | Prob(JB): | 0.00 | \n",
+ "
\n",
+ "\n",
+ " | Kurtosis: | 6.861 | Cond. No. | 1.00 | \n",
+ "
\n",
+ "
Notes:
[1] R² is computed without centering (uncentered) since the model does not contain a constant.
[2] Standard Errors assume that the covariance matrix of the errors is correctly specified."
+ ],
+ "text/latex": [
+ "\\begin{center}\n",
+ "\\begin{tabular}{lclc}\n",
+ "\\toprule\n",
+ "\\textbf{Dep. Variable:} & total\\_claim\\_amount & \\textbf{ R-squared (uncentered):} & 0.510 \\\\\n",
+ "\\textbf{Model:} & OLS & \\textbf{ Adj. R-squared (uncentered):} & 0.510 \\\\\n",
+ "\\textbf{Method:} & Least Squares & \\textbf{ F-statistic: } & 1.136e+04 \\\\\n",
+ "\\textbf{Date:} & Mon, 16 Oct 2023 & \\textbf{ Prob (F-statistic):} & 0.00 \\\\\n",
+ "\\textbf{Time:} & 20:58:47 & \\textbf{ Log-Likelihood: } & -79898. \\\\\n",
+ "\\textbf{No. Observations:} & 10910 & \\textbf{ AIC: } & 1.598e+05 \\\\\n",
+ "\\textbf{Df Residuals:} & 10909 & \\textbf{ BIC: } & 1.598e+05 \\\\\n",
+ "\\textbf{Df Model:} & 1 & \\textbf{ } & \\\\\n",
+ "\\textbf{Covariance Type:} & nonrobust & \\textbf{ } & \\\\\n",
+ "\\bottomrule\n",
+ "\\end{tabular}\n",
+ "\\begin{tabular}{lcccccc}\n",
+ " & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]} \\\\\n",
+ "\\midrule\n",
+ "\\textbf{customer\\_lifetime\\_value} & 0.0354 & 0.000 & 106.598 & 0.000 & 0.035 & 0.036 \\\\\n",
+ "\\bottomrule\n",
+ "\\end{tabular}\n",
+ "\\begin{tabular}{lclc}\n",
+ "\\textbf{Omnibus:} & 977.749 & \\textbf{ Durbin-Watson: } & 1.628 \\\\\n",
+ "\\textbf{Prob(Omnibus):} & 0.000 & \\textbf{ Jarque-Bera (JB): } & 6781.308 \\\\\n",
+ "\\textbf{Skew:} & -0.053 & \\textbf{ Prob(JB): } & 0.00 \\\\\n",
+ "\\textbf{Kurtosis:} & 6.861 & \\textbf{ Cond. No. } & 1.00 \\\\\n",
+ "\\bottomrule\n",
+ "\\end{tabular}\n",
+ "%\\caption{OLS Regression Results}\n",
+ "\\end{center}\n",
+ "\n",
+ "Notes: \\newline\n",
+ " [1] R² is computed without centering (uncentered) since the model does not contain a constant. \\newline\n",
+ " [2] Standard Errors assume that the covariance matrix of the errors is correctly specified."
+ ],
+ "text/plain": [
+ "\n",
+ "\"\"\"\n",
+ " OLS Regression Results \n",
+ "=======================================================================================\n",
+ "Dep. Variable: total_claim_amount R-squared (uncentered): 0.510\n",
+ "Model: OLS Adj. R-squared (uncentered): 0.510\n",
+ "Method: Least Squares F-statistic: 1.136e+04\n",
+ "Date: Mon, 16 Oct 2023 Prob (F-statistic): 0.00\n",
+ "Time: 20:58:47 Log-Likelihood: -79898.\n",
+ "No. Observations: 10910 AIC: 1.598e+05\n",
+ "Df Residuals: 10909 BIC: 1.598e+05\n",
+ "Df Model: 1 \n",
+ "Covariance Type: nonrobust \n",
+ "===========================================================================================\n",
+ " coef std err t P>|t| [0.025 0.975]\n",
+ "-------------------------------------------------------------------------------------------\n",
+ "customer_lifetime_value 0.0354 0.000 106.598 0.000 0.035 0.036\n",
+ "==============================================================================\n",
+ "Omnibus: 977.749 Durbin-Watson: 1.628\n",
+ "Prob(Omnibus): 0.000 Jarque-Bera (JB): 6781.308\n",
+ "Skew: -0.053 Prob(JB): 0.00\n",
+ "Kurtosis: 6.861 Cond. No. 1.00\n",
+ "==============================================================================\n",
+ "\n",
+ "Notes:\n",
+ "[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n",
+ "[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
+ "\"\"\""
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# X-y split.\n",
+ "\n",
+ "Y = customer['total_claim_amount']\n",
+ "X = customer['customer_lifetime_value']\n",
+ "\n",
+ "# apply linear regression on the following data\n",
+ "\n",
+ "model = sm.OLS(Y,X).fit()\n",
+ "\n",
+ "model.summary()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "830d4a01",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4188ab5c",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cf896d89",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}