From 3ee162afbf8ceaf9c7a83aee2bde5cac154f985d Mon Sep 17 00:00:00 2001 From: Pedro G Afonso Date: Mon, 30 Oct 2023 03:19:54 +0000 Subject: [PATCH] commiting lab5 --- Lab_round5.ipynb | 482 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 482 insertions(+) create mode 100644 Lab_round5.ipynb diff --git a/Lab_round5.ipynb b/Lab_round5.ipynb new file mode 100644 index 0000000..0573c1a --- /dev/null +++ b/Lab_round5.ipynb @@ -0,0 +1,482 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "2efc0ce2", + "metadata": {}, + "outputs": [], + "source": [ + "#importing libraries\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "import seaborn as sns\n", + "\n", + "from sklearn import linear_model\n", + "from sklearn.metrics import mean_squared_error, r2_score\n", + "\n", + "import statsmodels.api as sm\n", + "from statsmodels.formula.api import ols" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "6c01a7a3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(9134, 24)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerstatecustomer_lifetime_valueresponsecoverageeducationeffective_to_dateemploymentstatusgenderincome...months_since_policy_inceptionnumber_of_open_complaintsnumber_of_policiespolicy_typepolicyrenew_offer_typesales_channeltotal_claim_amountvehicle_classvehicle_size
0BU79786Washington2763.519279NoBasicBachelor2/24/11EmployedF56274...501Corporate AutoCorporate L3Offer1Agent384.811147Two-Door CarMedsize
1QZ44356Arizona6979.535903NoExtendedBachelor1/31/11UnemployedF0...4208Personal AutoPersonal L3Offer3Agent1131.464935Four-Door CarMedsize
2AI49188Nevada12887.431650NoPremiumBachelor2/19/11EmployedF48767...3802Personal AutoPersonal L3Offer1Agent566.472247Two-Door CarMedsize
\n", + "

3 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " customer state customer_lifetime_value response coverage education \\\n", + "0 BU79786 Washington 2763.519279 No Basic Bachelor \n", + "1 QZ44356 Arizona 6979.535903 No Extended Bachelor \n", + "2 AI49188 Nevada 12887.431650 No Premium Bachelor \n", + "\n", + " effective_to_date employmentstatus gender income ... \\\n", + "0 2/24/11 Employed F 56274 ... \n", + "1 1/31/11 Unemployed F 0 ... \n", + "2 2/19/11 Employed F 48767 ... \n", + "\n", + " months_since_policy_inception number_of_open_complaints number_of_policies \\\n", + "0 5 0 1 \n", + "1 42 0 8 \n", + "2 38 0 2 \n", + "\n", + " policy_type policy renew_offer_type sales_channel \\\n", + "0 Corporate Auto Corporate L3 Offer1 Agent \n", + "1 Personal Auto Personal L3 Offer3 Agent \n", + "2 Personal Auto Personal L3 Offer1 Agent \n", + "\n", + " total_claim_amount vehicle_class vehicle_size \n", + "0 384.811147 Two-Door Car Medsize \n", + "1 1131.464935 Four-Door Car Medsize \n", + "2 566.472247 Two-Door Car Medsize \n", + "\n", + "[3 rows x 24 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#importing dataset from csv\n", + "mkt = pd.read_csv(r\"C:\\Users\\pedro\\Desktop\\Ironhack\\lab-customer-analysis-round-5\\lab-customer-analysis-round-5\\files_for_lab\\csv_files\\marketing_customer_analysis.csv\")\n", + "print(mkt.shape)\n", + "\n", + "#standardization of columns names\n", + "cols = []\n", + "for i in range(len(mkt.columns)):\n", + " cols.append(mkt.columns[i].lower().replace(' ','_'))\n", + "\n", + "mkt.columns = cols\n", + "mkt.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "f59d5cec", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_lifetime_valueincomemonthly_premium_automonths_since_last_claimmonths_since_policy_inceptionnumber_of_open_complaintsnumber_of_policiestotal_claim_amount
00.0106290.5628470.0337550.9142860.0505050.00.0000.132974
10.0624060.0000000.1392410.3714290.4242420.00.8750.391051
20.1349600.4877630.1983120.5142860.3838380.00.1250.195764
30.0705890.0000000.1898730.5142860.6565660.00.7500.183117
40.0112450.4384430.0506330.3428570.4444440.00.0000.047710
...........................
91290.2641370.7195470.0506330.5142860.8989900.00.1250.068485
91300.0147190.2160810.0759490.4000000.2828280.00.0000.131034
91310.0769510.0000000.1012660.2571430.3737370.60.1250.273297
91320.0690980.2194520.1476790.9714290.0303030.00.2500.238876
91330.0087660.0000000.0675110.0857140.9090910.00.0000.127716
\n", + "

9134 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " customer_lifetime_value income monthly_premium_auto \\\n", + "0 0.010629 0.562847 0.033755 \n", + "1 0.062406 0.000000 0.139241 \n", + "2 0.134960 0.487763 0.198312 \n", + "3 0.070589 0.000000 0.189873 \n", + "4 0.011245 0.438443 0.050633 \n", + "... ... ... ... \n", + "9129 0.264137 0.719547 0.050633 \n", + "9130 0.014719 0.216081 0.075949 \n", + "9131 0.076951 0.000000 0.101266 \n", + "9132 0.069098 0.219452 0.147679 \n", + "9133 0.008766 0.000000 0.067511 \n", + "\n", + " months_since_last_claim months_since_policy_inception \\\n", + "0 0.914286 0.050505 \n", + "1 0.371429 0.424242 \n", + "2 0.514286 0.383838 \n", + "3 0.514286 0.656566 \n", + "4 0.342857 0.444444 \n", + "... ... ... \n", + "9129 0.514286 0.898990 \n", + "9130 0.400000 0.282828 \n", + "9131 0.257143 0.373737 \n", + "9132 0.971429 0.030303 \n", + "9133 0.085714 0.909091 \n", + "\n", + " number_of_open_complaints number_of_policies total_claim_amount \n", + "0 0.0 0.000 0.132974 \n", + "1 0.0 0.875 0.391051 \n", + "2 0.0 0.125 0.195764 \n", + "3 0.0 0.750 0.183117 \n", + "4 0.0 0.000 0.047710 \n", + "... ... ... ... \n", + "9129 0.0 0.125 0.068485 \n", + "9130 0.0 0.000 0.131034 \n", + "9131 0.6 0.125 0.273297 \n", + "9132 0.0 0.250 0.238876 \n", + "9133 0.0 0.000 0.127716 \n", + "\n", + "[9134 rows x 8 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#numerical data into dataframe\n", + "mkt_numerical = mkt.select_dtypes(include=[np.number])\n", + "\n", + "#normalization of numerical variables\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "\n", + "scaler = MinMaxScaler()\n", + "normalized_data = scaler.fit_transform(mkt_numerical)\n", + "normalized_data = pd.DataFrame(normalized_data, columns=mkt_numerical.columns)\n", + "normalized_data" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "01cb29cc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(9134,) (9134, 7)\n" + ] + } + ], + "source": [ + "#X-y split\n", + "Y = mkt_numerical['total_claim_amount']\n", + "X = mkt_numerical.drop(['total_claim_amount'], axis=1)\n", + "\n", + "print(Y.shape,X.shape)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}