diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..9249650 Binary files /dev/null and b/.DS_Store differ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7076c91 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +consumer_complaints.csv \ No newline at end of file diff --git a/.ipynb_checkpoints/consumer_analysis-checkpoint.ipynb b/.ipynb_checkpoints/consumer_analysis-checkpoint.ipynb new file mode 100644 index 0000000..7f1f005 --- /dev/null +++ b/.ipynb_checkpoints/consumer_analysis-checkpoint.ipynb @@ -0,0 +1,1742 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2532726f", + "metadata": {}, + "source": [ + "# Analyzing Consumer Complaints from Financial Corporations/Institutions" + ] + }, + { + "cell_type": "markdown", + "id": "5f270b27", + "metadata": {}, + "source": [ + "## Introduction:\n", + "In this notebook I will be analyzing the counts and percentage of **disputed** customer complaints with regard to financial institutions, states and financial products along with a quick and dirty machine learning model to give a rough prediction of when the disputes will occur" + ] + }, + { + "cell_type": "markdown", + "id": "dd4e748c", + "metadata": {}, + "source": [ + "## Getting Required Packages:" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "0060a11a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: xgboost in /Users/aryanvakharia/opt/anaconda3/lib/python3.9/site-packages (1.6.2)\n", + "Requirement already satisfied: numpy in /Users/aryanvakharia/opt/anaconda3/lib/python3.9/site-packages (from xgboost) (1.21.5)\n", + "Requirement already satisfied: scipy in /Users/aryanvakharia/opt/anaconda3/lib/python3.9/site-packages (from xgboost) (1.7.3)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m22.2.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install xgboost\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "import xgboost as xgb\n", + "from sklearn.model_selection import train_test_split, StratifiedShuffleSplit\n", + "from sklearn.metrics import accuracy_score, plot_confusion_matrix, classification_report\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "53fc5391", + "metadata": {}, + "source": [ + "## Importing and Cleaning Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f92ee33a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
date_receivedproductsub_productissuesub_issueconsumer_complaint_narrativecompany_public_responsecompanystatezipcodetagsconsumer_consent_providedsubmitted_viadate_sent_to_companycompany_response_to_consumertimely_responseconsumer_disputed?complaint_id
02013-08-30MortgageOther mortgageLoan modification,collection,foreclosureNaNNaNNaNU.S. BancorpCA95993NaNNaNReferral2013-09-03Closed with explanationYesYes511074
12013-08-30MortgageOther mortgageLoan servicing, payments, escrow accountNaNNaNNaNWells Fargo & CompanyCA91104NaNNaNReferral2013-09-03Closed with explanationYesYes511080
22013-08-30Credit reportingNaNIncorrect information on credit reportAccount statusNaNNaNWells Fargo & CompanyNY11764NaNNaNPostal mail2013-09-18Closed with explanationYesNo510473
32013-08-30Student loanNon-federal student loanRepaying your loanRepaying your loanNaNNaNNavient Solutions, Inc.MD21402NaNNaNEmail2013-08-30Closed with explanationYesYes510326
42013-08-30Debt collectionCredit cardFalse statements or representationAttempted to collect wrong amountNaNNaNResurgent Capital Services L.P.GA30106NaNNaNWeb2013-08-30Closed with explanationYesYes511067
\n", + "
" + ], + "text/plain": [ + " date_received product sub_product \\\n", + "0 2013-08-30 Mortgage Other mortgage \n", + "1 2013-08-30 Mortgage Other mortgage \n", + "2 2013-08-30 Credit reporting NaN \n", + "3 2013-08-30 Student loan Non-federal student loan \n", + "4 2013-08-30 Debt collection Credit card \n", + "\n", + " issue \\\n", + "0 Loan modification,collection,foreclosure \n", + "1 Loan servicing, payments, escrow account \n", + "2 Incorrect information on credit report \n", + "3 Repaying your loan \n", + "4 False statements or representation \n", + "\n", + " sub_issue consumer_complaint_narrative \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 Account status NaN \n", + "3 Repaying your loan NaN \n", + "4 Attempted to collect wrong amount NaN \n", + "\n", + " company_public_response company state zipcode tags \\\n", + "0 NaN U.S. Bancorp CA 95993 NaN \n", + "1 NaN Wells Fargo & Company CA 91104 NaN \n", + "2 NaN Wells Fargo & Company NY 11764 NaN \n", + "3 NaN Navient Solutions, Inc. MD 21402 NaN \n", + "4 NaN Resurgent Capital Services L.P. GA 30106 NaN \n", + "\n", + " consumer_consent_provided submitted_via date_sent_to_company \\\n", + "0 NaN Referral 2013-09-03 \n", + "1 NaN Referral 2013-09-03 \n", + "2 NaN Postal mail 2013-09-18 \n", + "3 NaN Email 2013-08-30 \n", + "4 NaN Web 2013-08-30 \n", + "\n", + " company_response_to_consumer timely_response consumer_disputed? \\\n", + "0 Closed with explanation Yes Yes \n", + "1 Closed with explanation Yes Yes \n", + "2 Closed with explanation Yes No \n", + "3 Closed with explanation Yes Yes \n", + "4 Closed with explanation Yes Yes \n", + "\n", + " complaint_id \n", + "0 511074 \n", + "1 511080 \n", + "2 510473 \n", + "3 510326 \n", + "4 511067 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('consumer_complaints.csv', parse_dates=['date_received', 'date_sent_to_company'], low_memory=False)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "88e39fd7", + "metadata": {}, + "source": [ + "Having a clean dataset allows for higher accuracy in visualizing, analyzing and in this case predicting on the dataset. In order to make the most efficient use of the above data, I am checking for the number of empty/NaN rows:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "73bf52d6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "date_received 0\n", + "product 0\n", + "sub_product 158322\n", + "issue 0\n", + "sub_issue 343335\n", + "consumer_complaint_narrative 489151\n", + "company_public_response 470833\n", + "company 0\n", + "state 4887\n", + "zipcode 4505\n", + "tags 477998\n", + "consumer_consent_provided 432499\n", + "submitted_via 0\n", + "date_sent_to_company 0\n", + "company_response_to_consumer 0\n", + "timely_response 0\n", + "consumer_disputed? 0\n", + "complaint_id 0\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Check NaNs\n", + "df.isna().sum()" + ] + }, + { + "cell_type": "markdown", + "id": "e8db998a", + "metadata": {}, + "source": [ + "From the above output it is evident that the dataset if populated with many NaNs. However, removing all NaN values can cause overfitting in the prediction model and inaccuracies in the data analysis.\n", + "\n", + "So, keeping threshold of *column contains >= 10% NaNs* we will clean each column" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "45eebff3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sub_product 0.284774\n", + "sub_issue 0.617557\n", + "consumer_complaint_narrative 0.879836\n", + "company_public_response 0.846887\n", + "tags 0.859775\n", + "consumer_consent_provided 0.777936\n", + "dtype: float64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "thresh = 0.10\n", + "col_nans = df.isna().mean(axis=0)\n", + "col_w_thresh = col_nans[col_nans >= thresh]\n", + "col_w_thresh" + ] + }, + { + "cell_type": "markdown", + "id": "dee366ce", + "metadata": {}, + "source": [ + "The above columns will now be dropped as they have too many NaNs to have any use in our dataset.\n", + "\n", + "*NOTE:* Along with these, the *complaint_id* column will aso be dropped for not being relevant in our analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "252d522a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
date_receivedproductissuecompanystatezipcodesubmitted_viadate_sent_to_companycompany_response_to_consumertimely_responseconsumer_disputed?
02013-08-30MortgageLoan modification,collection,foreclosureU.S. BancorpCA95993Referral2013-09-03111
12013-08-30MortgageLoan servicing, payments, escrow accountWells Fargo & CompanyCA91104Referral2013-09-03111
22013-08-30Credit reportingIncorrect information on credit reportWells Fargo & CompanyNY11764Postal mail2013-09-18110
32013-08-30Student loanRepaying your loanNavient Solutions, Inc.MD21402Email2013-08-30111
42013-08-30Debt collectionFalse statements or representationResurgent Capital Services L.P.GA30106Web2013-08-30111
\n", + "
" + ], + "text/plain": [ + " date_received product issue \\\n", + "0 2013-08-30 Mortgage Loan modification,collection,foreclosure \n", + "1 2013-08-30 Mortgage Loan servicing, payments, escrow account \n", + "2 2013-08-30 Credit reporting Incorrect information on credit report \n", + "3 2013-08-30 Student loan Repaying your loan \n", + "4 2013-08-30 Debt collection False statements or representation \n", + "\n", + " company state zipcode submitted_via \\\n", + "0 U.S. Bancorp CA 95993 Referral \n", + "1 Wells Fargo & Company CA 91104 Referral \n", + "2 Wells Fargo & Company NY 11764 Postal mail \n", + "3 Navient Solutions, Inc. MD 21402 Email \n", + "4 Resurgent Capital Services L.P. GA 30106 Web \n", + "\n", + " date_sent_to_company company_response_to_consumer timely_response \\\n", + "0 2013-09-03 1 1 \n", + "1 2013-09-03 1 1 \n", + "2 2013-09-18 1 1 \n", + "3 2013-08-30 1 1 \n", + "4 2013-08-30 1 1 \n", + "\n", + " consumer_disputed? \n", + "0 1 \n", + "1 1 \n", + "2 0 \n", + "3 1 \n", + "4 1 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "drops_l = list(col_w_thresh.index) + ['complaint_id']\n", + "df = df.drop(drops_l, axis=1)\n", + "\n", + "#Tokenizing Yes and No to 1 and 0 respectively\n", + "df = df.replace(['Yes', 'No'], [1, 0])\n", + "\n", + "#Also tokenizing company response for easier NLP classification\n", + "df.loc[df['company_response_to_consumer'].str.contains(\"Closed\"), 'company_response_to_consumer'] = 1\n", + "df.loc[df['company_response_to_consumer'] != 1, 'company_response_to_consumer'] = 0\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "0e9dee79", + "metadata": {}, + "source": [ + "## Data Analysis and Visualization:\n", + "Since I will be mainly checking the count AND rate of disputed issues, the following functions will be used to get these metrics as a DataFrame:\n", + "\n", + "(*NOTE: Since the data being analysed is categorical I will mostly be using bar graphs*)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "08b7d908", + "metadata": {}, + "outputs": [], + "source": [ + "def is_disputed(row, cat):\n", + " return df[(df['consumer_disputed?'] == 1) & (df[cat] == row[cat])]['consumer_disputed?'].sum()\n", + "\n", + "def get_count(cat, data=df):\n", + " df_by_cat = df[[cat, 'issue', 'consumer_disputed?']].groupby([cat]).count().reset_index()\n", + " \n", + " df_by_cat['consumer_disputed?'] = df_by_cat.apply(lambda r: is_disputed(r, cat), axis=1)\n", + " \n", + " df_by_cat['dispute_rate'] = df_by_cat.apply(lambda r: (r['consumer_disputed?'] / r['issue']) * 100, axis=1)\n", + " \n", + " return df_by_cat" + ] + }, + { + "cell_type": "markdown", + "id": "6693cced", + "metadata": {}, + "source": [ + "### Disputes according to State:" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "d6ad5d11", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "State is CA with max disputes: 17615\n" + ] + } + ], + "source": [ + "#State with most disputes using above function:\n", + "disp_st = get_count('state')\n", + "st_c = disp_st['consumer_disputed?']\n", + "m_st = disp_st['state'][pd.Series.argmax(st_c)]\n", + "m_c = disp_st['consumer_disputed?'][pd.Series.argmax(st_c)]\n", + "print(\"State is\", m_st, \"with max disputes:\", m_c)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6f0729b6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Dispute counts per state')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig1, ax1 = plt.subplots(figsize=(20, 9))\n", + "ax1.bar(disp_st['state'], disp_st['consumer_disputed?'])\n", + "ax1.set_xlabel(\"State\")\n", + "ax1.set_ylabel(\"Disputed issue counts\")\n", + "ax1.set_title(\"Dispute counts per state\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6dfa6225", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Dispute rates per state')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig2, ax2 = plt.subplots(figsize=(20, 9))\n", + "ax2.bar(disp_st['state'], disp_st['dispute_rate'])\n", + "ax2.set_xlabel(\"State\")\n", + "ax2.set_ylabel(\"Disputed issue rate(disputed count / issue count)\")\n", + "ax2.set_title(\"Dispute rates per state\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "17ec819c", + "metadata": {}, + "outputs": [], + "source": [ + "#Getting count of issues per state with products\n", + "state_issue_count = df[['state', 'product', 'issue']].groupby(['state', 'product', 'issue']).size().reset_index()\n", + "state_issue_count.columns = ['state', 'product', 'issue', 'count']" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "15c92542", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stateproductissuecount
501CAMortgageLoan modification,collection,foreclosure19433
942FLMortgageLoan modification,collection,foreclosure11337
502CAMortgageLoan servicing, payments, escrow account8957
4028TXCredit reportingIncorrect information on credit report8712
485CACredit reportingIncorrect information on credit report8585
\n", + "
" + ], + "text/plain": [ + " state product issue count\n", + "501 CA Mortgage Loan modification,collection,foreclosure 19433\n", + "942 FL Mortgage Loan modification,collection,foreclosure 11337\n", + "502 CA Mortgage Loan servicing, payments, escrow account 8957\n", + "4028 TX Credit reporting Incorrect information on credit report 8712\n", + "485 CA Credit reporting Incorrect information on credit report 8585" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#States with 5 highest dispute counts, issues and product\n", + "state_issue_count.sort_values(by = 'count', ascending = False).head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "63fa2a3a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stateproductissuecount
3083NVPrepaid cardOverdraft, savings or rewards features1
347AZConsumer LoanPayment to acct not credited1
344AZConsumer LoanLender repossessed or sold the vehicle1
1AACredit cardIdentity theft / Fraud / Embezzlement1
0AABank account or serviceAccount opening, closing, or management1
\n", + "
" + ], + "text/plain": [ + " state product issue \\\n", + "3083 NV Prepaid card Overdraft, savings or rewards features \n", + "347 AZ Consumer Loan Payment to acct not credited \n", + "344 AZ Consumer Loan Lender repossessed or sold the vehicle \n", + "1 AA Credit card Identity theft / Fraud / Embezzlement \n", + "0 AA Bank account or service Account opening, closing, or management \n", + "\n", + " count \n", + "3083 1 \n", + "347 1 \n", + "344 1 \n", + "1 1 \n", + "0 1 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#States with 5 lowest dispute counts with related issues and products\n", + "state_issue_count.sort_values(by = 'count', ascending = False).tail()" + ] + }, + { + "cell_type": "markdown", + "id": "eb748a29", + "metadata": {}, + "source": [ + "### **Analysis**: \n", + "As the above console output and bar graphs show, states with higher populations such as CA, NY, TX etc tend to have more disputes over financial issues. Moreover, West Coast states have a higher tendency to dipute complaints. Finally, Mortgage modifications and Credit reporting issues have caused the most disputed among the states \n", + "\n", + "Laws and operations regarding financial products and the companies headquartered in these states are areas which can be further analysed for why they yield such high issue counts for the a certain range of products." + ] + }, + { + "cell_type": "markdown", + "id": "f45775c1", + "metadata": {}, + "source": [ + "### Disputes according to Product:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1b73be97", + "metadata": {}, + "outputs": [], + "source": [ + "disp_prod = get_count('product')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "95b32810", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
productissuecount
75MortgageLoan modification,collection,foreclosure97191
59Credit reportingIncorrect information on credit report66718
76MortgageLoan servicing, payments, escrow account60375
62Debt collectionCont'd attempts collect debt not owed42285
0Bank account or serviceAccount opening, closing, or management26661
\n", + "
" + ], + "text/plain": [ + " product issue count\n", + "75 Mortgage Loan modification,collection,foreclosure 97191\n", + "59 Credit reporting Incorrect information on credit report 66718\n", + "76 Mortgage Loan servicing, payments, escrow account 60375\n", + "62 Debt collection Cont'd attempts collect debt not owed 42285\n", + "0 Bank account or service Account opening, closing, or management 26661" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Highest issues per product\n", + "product_issue_count = df.groupby(['product', 'issue']).size().reset_index()\n", + "product_issue_count.columns = ['product', 'issue', 'count']\n", + "product_issue_count.sort_values(by = 'count', ascending=False).head()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "cd258ff1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Product')" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig3, ax3 = plt.subplots(figsize=(16, 9))\n", + "ax3.barh(disp_prod['product'], disp_prod['consumer_disputed?']) \n", + "\n", + "ax3.set_title(\"Disputed issue counts per product\")\n", + "ax3.set_xlabel('Disputed issue counts')\n", + "ax3.set_ylabel('Product')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "40b666b1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 0, 'Disputed issue rate')" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig4, ax4 = plt.subplots(figsize=(16, 9))\n", + "ax4.barh(disp_prod['product'], disp_prod['dispute_rate']) \n", + "\n", + "ax4.set_title(\"Disputed issue rates per product\")\n", + "ax4.set_ylabel('Product')\n", + "ax4.set_xlabel('Disputed issue rate')" + ] + }, + { + "cell_type": "markdown", + "id": "61b80521", + "metadata": {}, + "source": [ + "### Analysis:\n", + "Using the same visualization it can be concluded that loan and credit related issues like *Mortgages, Credit Reporting, Student loans* etc are more likely to be disputed, meaning our company should be wary and prepared to efficiently face complicated customer service with such products" + ] + }, + { + "cell_type": "markdown", + "id": "32296070", + "metadata": {}, + "source": [ + "### Disputes according to submission medium:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "fd3459f9", + "metadata": {}, + "outputs": [], + "source": [ + "disp_med = get_count('submitted_via')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "abdbe069", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 17.200000000000003, 'Disputed issue count')" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7IAAAIICAYAAABTptJTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAmIUlEQVR4nO3df7Tn9V0f+OdLJhKSCIFkwqFDtsMKaoEaIiNLYu1qsQZNV9izcBxXDe2ypc2yatz2dElPW21P6YHuNnRpCz00sZA0BhDNhppGZaHRGBEy+SUhCc0oGEYojEKQasAOvvaP73tOvnNzmbkDA3feN4/HOd/z/Xxf3/f7M+/P3M+59z6/7/fnc6u7AwAAALP4uvUeAAAAABwMQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmMqm9R7Ac/XqV7+6t27dut7DAAAA4AXw8Y9//Pe7e/Nq700bZLdu3ZodO3as9zAAAAB4AVTV7z7be5YWAwAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVNYUZKvqJ6vq3qr6TFW9r6peWlXHVdVtVfWF8XzsUvu3V9XOqrqvqt60VD+zqu4Z711dVTXqR1bVTaN+V1VtPeRHCgAAwIZwwCBbVVuS/HiSbd19epIjkmxPclmS27v7lCS3j9epqlPH+6clOTfJNVV1xNjdtUkuSXLKeJw76hcneby7T05yVZIrD8nRAQAAsOGsdWnxpiRHVdWmJC9L8lCS85LcMN6/Icn5Y/u8JDd299PdfX+SnUnOqqoTkhzd3Xd2dyd594o+e/d1S5Jz9s7WAgAAwLIDBtnu/r0k/3eSLyZ5OMkT3f0rSY7v7odHm4eTvGZ02ZLkwaVd7Bq1LWN7ZX2fPt29J8kTSV61cixVdUlV7aiqHbt3717rMQIAALCBrGVp8bFZzJielOTPJHl5Vf3I/rqsUuv91PfXZ99C93Xdva27t23evHn/AwcAAGBDWsvS4u9Jcn937+7u/5rkF5K8MckjY7lwxvOjo/2uJK9d6n9iFkuRd43tlfV9+ozly8ckeey5HBAAAAAb26Y1tPlikrOr6mVJvpzknCQ7kvxRkouSXDGePzDa35rkZ6vqHVnM4J6S5O7ufqaqnqyqs5PcleQtSf7FUp+LktyZ5IIkd4zraAEAgMPQ1ss+uN5D4Hl44Io3r/cQnpcDBtnuvquqbknyiSR7knwyyXVJXpHk5qq6OIuwe+Fof29V3Zzks6P9pd39zNjdW5Ncn+SoJB8ajyR5V5L3VNXOLGZitx+SowMAAGDDWcuMbLr7p5L81Iry01nMzq7W/vIkl69S35Hk9FXqT2UEYQAAANiftf75HQAAADgsCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpHDDIVtU3V9Wnlh5/WFVvq6rjquq2qvrCeD52qc/bq2pnVd1XVW9aqp9ZVfeM966uqhr1I6vqplG/q6q2viBHCwAAwPQOGGS7+77uPqO7z0hyZpI/TvL+JJclub27T0ly+3idqjo1yfYkpyU5N8k1VXXE2N21SS5Jcsp4nDvqFyd5vLtPTnJVkisPydEBAACw4Rzs0uJzkvx2d/9ukvOS3DDqNyQ5f2yfl+TG7n66u+9PsjPJWVV1QpKju/vO7u4k717RZ+++bklyzt7ZWgAAAFh2sEF2e5L3je3ju/vhJBnPrxn1LUkeXOqza9S2jO2V9X36dPeeJE8kedVBjg0AAICvAWsOslX19Ul+IMnPHajpKrXeT31/fVaO4ZKq2lFVO3bv3n2AYQAAALARHcyM7Pcl+UR3PzJePzKWC2c8Pzrqu5K8dqnfiUkeGvUTV6nv06eqNiU5JsljKwfQ3dd197bu3rZ58+aDGDoAAAAbxcEE2R/KV5YVJ8mtSS4a2xcl+cBSffu4E/FJWdzU6e6x/PjJqjp7XP/6lhV99u7rgiR3jOtoAQAAYB+b1tKoql6W5C8n+RtL5SuS3FxVFyf5YpILk6S7762qm5N8NsmeJJd29zOjz1uTXJ/kqCQfGo8keVeS91TVzixmYrc/j2MCAABgA1tTkO3uP86Kmy919x9kcRfj1dpfnuTyVeo7kpy+Sv2pjCAMAAAA+3Owdy0GAACAdSXIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMJU1BdmqemVV3VJVn6+qz1XVG6rquKq6raq+MJ6PXWr/9qraWVX3VdWblupnVtU9472rq6pG/ciqumnU76qqrYf8SAEAANgQ1joj+/8k+aXu/pYkr0vyuSSXJbm9u09Jcvt4nao6Ncn2JKclOTfJNVV1xNjPtUkuSXLKeJw76hcneby7T05yVZIrn+dxAQAAsEEdMMhW1dFJ/mKSdyVJd/9Jd38pyXlJbhjNbkhy/tg+L8mN3f10d9+fZGeSs6rqhCRHd/ed3d1J3r2iz9593ZLknL2ztQAAALBsLTOy/22S3Un+bVV9sqreWVUvT3J8dz+cJOP5NaP9liQPLvXfNWpbxvbK+j59untPkieSvGrlQKrqkqraUVU7du/evcZDBAAAYCNZS5DdlOTbklzb3a9P8kcZy4ifxWozqb2f+v767Fvovq67t3X3ts2bN+9/1AAAAGxIawmyu5Ls6u67xutbsgi2j4zlwhnPjy61f+1S/xOTPDTqJ65S36dPVW1KckySxw72YAAAANj4Dhhku/s/J3mwqr55lM5J8tkktya5aNQuSvKBsX1rku3jTsQnZXFTp7vH8uMnq+rscf3rW1b02buvC5LcMa6jBQAAgH1sWmO7H0vy3qr6+iS/k+SvZRGCb66qi5N8McmFSdLd91bVzVmE3T1JLu3uZ8Z+3prk+iRHJfnQeCSLG0m9p6p2ZjETu/15HhcAAAAb1JqCbHd/Ksm2Vd4651naX57k8lXqO5Kcvkr9qYwgDAAAAPuz1r8jCwAAAIcFQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICprCnIVtUDVXVPVX2qqnaM2nFVdVtVfWE8H7vU/u1VtbOq7quqNy3Vzxz72VlVV1dVjfqRVXXTqN9VVVsP8XECAACwQRzMjOx3d/cZ3b1tvL4sye3dfUqS28frVNWpSbYnOS3JuUmuqaojRp9rk1yS5JTxOHfUL07yeHefnOSqJFc+90MCAABgI3s+S4vPS3LD2L4hyflL9Ru7++nuvj/JziRnVdUJSY7u7ju7u5O8e0Wfvfu6Jck5e2drAQAAYNlag2wn+ZWq+nhVXTJqx3f3w0kynl8z6luSPLjUd9eobRnbK+v79OnuPUmeSPKqlYOoqkuqakdV7di9e/cahw4AAMBGsmmN7b6jux+qqtckua2qPr+ftqvNpPZ+6vvrs2+h+7ok1yXJtm3bvup9AAAANr41zch290Pj+dEk709yVpJHxnLhjOdHR/NdSV671P3EJA+N+omr1PfpU1WbkhyT5LGDPxwAAAA2ugMG2ap6eVV9w97tJN+b5DNJbk1y0Wh2UZIPjO1bk2wfdyI+KYubOt09lh8/WVVnj+tf37Kiz959XZDkjnEdLQAAAOxjLUuLj0/y/nHvpU1Jfra7f6mqPpbk5qq6OMkXk1yYJN19b1XdnOSzSfYkubS7nxn7emuS65McleRD45Ek70rynqramcVM7PZDcGwAAABsQAcMst39O0let0r9D5Kc8yx9Lk9y+Sr1HUlOX6X+VEYQBgAAgP15Pn9+BwAAAF50giwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqaw6yVXVEVX2yqn5xvD6uqm6rqi+M52OX2r69qnZW1X1V9aal+plVdc947+qqqlE/sqpuGvW7qmrrITxGAAAANpCDmZH9iSSfW3p9WZLbu/uUJLeP16mqU5NsT3JaknOTXFNVR4w+1ya5JMkp43HuqF+c5PHuPjnJVUmufE5HAwAAwIa3piBbVScmeXOSdy6Vz0tyw9i+Icn5S/Ubu/vp7r4/yc4kZ1XVCUmO7u47u7uTvHtFn737uiXJOXtnawEAAGDZWmdk/3mSv5PkT5dqx3f3w0kynl8z6luSPLjUbteobRnbK+v79OnuPUmeSPKqlYOoqkuqakdV7di9e/cahw4AAMBGcsAgW1V/Jcmj3f3xNe5ztZnU3k99f332LXRf193bunvb5s2b1zgcAAAANpJNa2jzHUl+oKq+P8lLkxxdVf8uySNVdUJ3PzyWDT862u9K8tql/icmeWjUT1ylvtxnV1VtSnJMksee4zEBAACwgR1wRra7397dJ3b31ixu4nRHd/9IkluTXDSaXZTkA2P71iTbx52IT8ripk53j+XHT1bV2eP617es6LN3XxeMf+OrZmQBAABgLTOyz+aKJDdX1cVJvpjkwiTp7nur6uYkn02yJ8ml3f3M6PPWJNcnOSrJh8YjSd6V5D1VtTOLmdjtz2NcAAAAbGAHFWS7+8NJPjy2/yDJOc/S7vIkl69S35Hk9FXqT2UEYQAAANifg/k7sgAAALDuBFkAAACmIsgCAAAwFUEWAACAqQiyAAAATEWQBQAAYCqCLAAAAFMRZAEAAJiKIAsAAMBUBFkAAACmIsgCAAAwFUEWAACAqQiyAAAATEWQBQAAYCqCLAAAAFMRZAEAAJiKIAsAAMBUBFkAAACmIsgCAAAwFUEWAACAqQiyAAAATEWQBQAAYCqCLAAAAFMRZAEAAJiKIAsAAMBUBFkAAACmIsgCAAAwFUEWAACAqQiyAAAATEWQBQAAYCqCLAAAAFMRZAEAAJiKIAsAAMBUBFkAAACmIsgCAAAwFUEWAACAqQiyAAAATEWQBQAAYCqCLAAAAFMRZAEAAJiKIAsAAMBUBFkAAACmIsgCAAAwFUEWAACAqQiyAAAATEWQBQAAYCqCLAAAAFMRZAEAAJiKIAsAAMBUBFkAAACmcsAgW1Uvraq7q+rTVXVvVf3DUT+uqm6rqi+M52OX+ry9qnZW1X1V9aal+plVdc947+qqqlE/sqpuGvW7qmrrC3CsAAAAbABrmZF9Oslf6u7XJTkjyblVdXaSy5Lc3t2nJLl9vE5VnZpke5LTkpyb5JqqOmLs69oklyQ5ZTzOHfWLkzze3ScnuSrJlc//0AAAANiIDhhke+G/jJcvGY9Ocl6SG0b9hiTnj+3zktzY3U939/1JdiY5q6pOSHJ0d9/Z3Z3k3Sv67N3XLUnO2TtbCwAAAMvWdI1sVR1RVZ9K8miS27r7riTHd/fDSTKeXzOab0ny4FL3XaO2ZWyvrO/Tp7v3JHkiyatWGcclVbWjqnbs3r17TQcIAADAxrKmINvdz3T3GUlOzGJ29fT9NF9tJrX3U99fn5XjuK67t3X3ts2bNx9g1AAAAGxEB3XX4u7+UpIPZ3Ft6yNjuXDG86Oj2a4kr13qdmKSh0b9xFXq+/Spqk1Jjkny2MGMDQAAgK8Na7lr8eaqeuXYPirJ9yT5fJJbk1w0ml2U5ANj+9Yk28ediE/K4qZOd4/lx09W1dnj+te3rOizd18XJLljXEcLAAAA+9i0hjYnJLlh3Hn465Lc3N2/WFV3Jrm5qi5O8sUkFyZJd99bVTcn+WySPUku7e5nxr7emuT6JEcl+dB4JMm7krynqnZmMRO7/VAcHAAAABvPAYNsd/9WktevUv+DJOc8S5/Lk1y+Sn1Hkq+6vra7n8oIwgAAALA/B3WNLAAAAKw3QRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADCVAwbZqnptVf3HqvpcVd1bVT8x6sdV1W1V9YXxfOxSn7dX1c6quq+q3rRUP7Oq7hnvXV1VNepHVtVNo35XVW19AY4VAACADWAtM7J7kvyt7v5zSc5OcmlVnZrksiS3d/cpSW4frzPe257ktCTnJrmmqo4Y+7o2ySVJThmPc0f94iSPd/fJSa5KcuUhODYAAAA2oAMG2e5+uLs/MbafTPK5JFuSnJfkhtHshiTnj+3zktzY3U939/1JdiY5q6pOSHJ0d9/Z3Z3k3Sv67N3XLUnO2TtbCwAAAMsO6hrZseT39UnuSnJ8dz+cLMJukteMZluSPLjUbdeobRnbK+v79OnuPUmeSPKqgxkbAAAAXxvWHGSr6hVJfj7J27r7D/fXdJVa76e+vz4rx3BJVe2oqh27d+8+0JABAADYgNYUZKvqJVmE2Pd29y+M8iNjuXDG86OjvivJa5e6n5jkoVE/cZX6Pn2qalOSY5I8tnIc3X1dd2/r7m2bN29ey9ABAADYYNZy1+JK8q4kn+vudyy9dWuSi8b2RUk+sFTfPu5EfFIWN3W6eyw/frKqzh77fMuKPnv3dUGSO8Z1tAAAALCPTWto8x1JfjTJPVX1qVH7u0muSHJzVV2c5ItJLkyS7r63qm5O8tks7nh8aXc/M/q9Ncn1SY5K8qHxSBZB+T1VtTOLmdjtz++wAAAA2KgOGGS7+9ez+jWsSXLOs/S5PMnlq9R3JDl9lfpTGUEYAAAA9ueg7loMAAAA602QBQAAYCpruUYWAICvAVsv++B6D4Hn4YEr3rzeQ4AXjRlZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABTEWQBAACYygGDbFX9TFU9WlWfWaodV1W3VdUXxvOxS++9vap2VtV9VfWmpfqZVXXPeO/qqqpRP7Kqbhr1u6pq6yE+RgAAADaQtczIXp/k3BW1y5Lc3t2nJLl9vE5VnZpke5LTRp9rquqI0efaJJckOWU89u7z4iSPd/fJSa5KcuVzPRgAAAA2vgMG2e7+tSSPrSifl+SGsX1DkvOX6jd299PdfX+SnUnOqqoTkhzd3Xd2dyd594o+e/d1S5Jz9s7WAgAAwErP9RrZ47v74SQZz68Z9S1JHlxqt2vUtoztlfV9+nT3niRPJHnVav9oVV1SVTuqasfu3buf49ABAACY2aG+2dNqM6m9n/r++nx1sfu67t7W3ds2b978HIcIAADAzJ5rkH1kLBfOeH501Hclee1SuxOTPDTqJ65S36dPVW1Kcky+eikzAAAAJHnuQfbWJBeN7YuSfGCpvn3cifikLG7qdPdYfvxkVZ09rn99y4o+e/d1QZI7xnW0AAAA8FU2HahBVb0vyXcleXVV7UryU0muSHJzVV2c5ItJLkyS7r63qm5O8tkke5Jc2t3PjF29NYs7IB+V5EPjkSTvSvKeqtqZxUzs9kNyZAAAAGxIBwyy3f1Dz/LWOc/S/vIkl69S35Hk9FXqT2UEYQAAADiQQ32zJwAAAHhBCbIAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVQRYAAICpCLIAAABMRZAFAABgKoIsAAAAUxFkAQAAmMqm9R4AAAdv62UfXO8h8Bw9cMWb13sIADA9M7IAAABMRZAFAABgKoIsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADAVf0cWADYwf3N4bv7uMMDqzMgCAAAwFUEWAACAqQiyAAAATEWQBQAAYCqCLAAAAFMRZAEAAJiKIAsAAMBUBFkAAACmIsgCAAAwFUEWAACAqQiyAAAATEWQBQAAYCqCLAAAAFMRZAEAAJiKIAsAAMBUBFkAAACmIsgCAAAwFUEWAACAqQiyAAAATEWQBQAAYCqCLAAAAFPZtN4DABa2XvbB9R4Cz8MDV7x5vYcAAPA1w4wsAAAAUxFkAQAAmIogCwAAwFQEWQAAAKYiyAIAADCVwybIVtW5VXVfVe2sqsvWezwAAAAcng6LIFtVRyT5V0m+L8mpSX6oqk5d31EBAABwODosgmySs5Ls7O7f6e4/SXJjkvPWeUwAAAAchjat9wCGLUkeXHq9K8l/t05jOWS2XvbB9R4Cz8MDV7x5vYcAAACsorp7vceQqrowyZu6+38dr380yVnd/WMr2l2S5JLx8puT3PeiDpSVXp3k99d7EEzD+cLBcL6wVs4VDobzhYPhfFl/f7a7N6/2xuEyI7sryWuXXp+Y5KGVjbr7uiTXvViDYv+qakd3b1vvcTAH5wsHw/nCWjlXOBjOFw6G8+XwdrhcI/uxJKdU1UlV9fVJtie5dZ3HBAAAwGHosJiR7e49VfW/J/nlJEck+ZnuvnedhwUAAMBh6LAIsknS3f8hyX9Y73FwUCzz5mA4XzgYzhfWyrnCwXC+cDCcL4exw+JmTwAAALBWh8s1sgAAALAmgixJkqp6pqo+tfS47BDt9zfG89aq+syh2CeHp1XOoa3rPSbW19I58Zmq+rmqepnvBV87Vvv6H2T/rVX1P6+x3SE/p6rqB/b+LKyqn66qv32o/w3WbsX59O+r6pUHaL+5qu6qqk9W1Xe+AONxTkysqq6qqrctvf7lqnrn0ut/VlX/x7P0/XBVuZPxYUCQZa8vd/cZS48rDsVOu/uNh2I/TGHlOfTAeg+Idbf3nDg9yZ8k+ZvrPSBeVM/36781yQGD7Aulu289VD8LOSSWz6fHklx6gPbnJPl8d7++uz+yln+gqo7Y32s2lN9I8sYkqaqvy+LvxZ629P4bk3x0HcbFQRBk2a+qeqCq/klV3VlVO6rq28anVr9dVX9ztHlFVd1eVZ+oqnuq6ryl/v9l/UbPenq286Kqvr2qfquqXlpVL6+qe6vq9PUeLy+4jyQ5eWwfUVX/Znztf6WqjkqSqjqjqn5znB/vr6pjR/3DVXVlVd1dVf9p7+xKVR1RVf9XVX1s9Pkb63NorMFHkpxcVcdV1f87vl6/WVXfmiRV9d8vreb4ZFV9Q5IrknznqP3kmHn9yPie8omq2u8HpVX1XVX1q1V18zhvrqiqHx7n0T1V9Y2j3f+wNHP3/1XV8aP+V6vqX77A/y88N3cm2ZIkVfWNVfVLVfXxcX58S1WdkeSfJvn+cf4cVVXfO36X+cRYIfCK0f+BqvoHVfXrSS5c5fVfH99jPl1VP18HubKAw9ZHM4JsFgH2M0merKpjq+rIJH8uScb3kI+P331PWOr/I1X1G7VYIXDWizt09hJk2euo2ndZ6A8uvfdgd78hi19Erk9yQZKzk/yj8f5TSf7H7v62JN+d5J9VVb2IY+fwsHwOvT/Pcl5098ey+DvR/ziLXzT+XXdbarqBVdWmJN+X5J5ROiXJv+ru05J8Kcn/NOrvTvJ/dve3jrY/tbSbTd19VpK3LdUvTvJEd397km9P8ter6qQX8FB4DlZ8/f9hkk+Or/HfzeJrniR/O8ml3X1Gku9M8uUklyX5yJiFuyrJo0n+8vie8oNJrl7DP/+6JD+R5M8n+dEk3zTOo3cm+bHR5teTnN3dr09yY5K/8/yOmBdSLWZJz8ni50iyuKvsj3X3mVmcR9d096eS/IMkN41z6uVJ/l6S7xnnz44ky8tGn+ruv9DdN67y+he6+9u7+3VJPpfF9x0m190PJdlTVf9NFoH2ziR3JXlDkm1ZfK2vSnLBOLd+JsnlS7t4+Vh1+L+N91gHh82f32HdfXl8s1/N3h8W9yR5RXc/mcWnVk/V4hqVP0ryT6rqLyb50yw+JT0+yX9+YYfMYWafc6iqXpJnPy/+UZKPZRF2f/zFHyovkqOq6lNj+yNJ3pXkzyS5f/yimSQfT7K1qo5J8sru/tVRvyHJzy3t6xeW24/t703yrVV1wXh9TBYh+f5Dexg8R6t9/e/K+OCiu++oqleNr/1Hk7yjqt6bRXDYtcrnoS9J8i/HbNszSb5pDWP4WHc/nCRV9dtJfmXU78niA7YkOTHJTWO25evj/Dlc7T2ftmbxfeC2Mav6xiQ/t3S+HLlK37OTnJrko6Pd12cRXPa6aUX75denV9U/TvLKJK9I8svP5yA4rOydlX1jkndk8XvKG5M8keT3svgZc9s4Z45I8vBS3/clSXf/WlUdXVWv7O4vvXhDJxFkWZunx/OfLm3vfb0pyQ8n2ZzkzO7+r1X1QJKXvqgj5HC0v/PiuCx+IXjJqP3RuoyQF9pXfUA2fiFY/j7yTJKj1rCvvX2eyVd+dlUWMzF+sTw8rfb1X221Tnf3FVX1wSTfn+Q3q+p7Vmn3k0keyWKW9euy+CDsQFb+zFr+ebb3PPoXSd7R3bdW1Xcl+ek17JcX35e7+4zxwccvZnGN7PVJvrSfD+L3qiS3dfcPPcv7K38GLb++Psn53f3pqvqrSb7r4IbNYWzvdbJ/PoulxQ8m+VtJ/jDJHUm2jBWJq1n590v9PdN1YGkxh8IxSR4dYeW7k/zZ9R4Qh4X9nRfXJfn7Sd6b5Mr1GByHl+5+Isnj9ZW7i/5okl/dT5dkMTPy1jH7n6r6pqp6+Qs4TJ6/X8viQ66M0Pj73f2HVfWN3X1Pd1+ZxbLPb0nyZJJvWOp7TJKHu/tPszg/DtWNeI7JYvYlSS46RPvkBTK+V/x4FsuIv5zk/qq6MFl8UFJVr1ul228m+Y6qOnm0e1lVrWVGP1mcgw+P7zM//LwPgMPJR5P8lSSPdfcz3f1YFjPvb8hiVn5zVb0hWawyq6rlm0H94Kj/hSwucXniRR05SczI8hXLS8CS5Je6e61/gue9Sf59Ve1I8qkknz/EY2NOq54XVfWWJHu6+2fHtU6/UVV/qbvvWL+hcpi4KMm/HjdT+Z0kf+0A7d+ZxTLDT4yZvt1Jzn8hB8jz9tNJ/m1V/VaSP85XguPbxgdezyT5bJIPZTFruqeqPp3FrNg1SX5+hJb/mEO3kuOns1ia+ntZBB7XWR/muvuT47zYnkW4vLaq/l4Wq3xuTPLpFe13j9nU940b+SSLa2b/0xr+ub+fxZL4381iSfo37L85E7kni7sV/+yK2iu6+9Fx2crVYxXApiT/PMm9o93jtfgTk0cn+V9evCGzrLrNhAMAADAPS4sBAACYiiALAADAVARZAAAApiLIAgAAMBVBFgAAgKkIsgAAAExFkAUAAGAqgiwAAABT+f8BJeOZKz7AJjwAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig5, ax5 = plt.subplots(figsize=(16, 9))\n", + "ax5.bar(disp_med['submitted_via'], disp_med['consumer_disputed?'])\n", + "\n", + "ax4.set_title(\"Disputed issue counts per submission medium\")\n", + "ax4.set_xlabel('Medium')\n", + "ax4.set_xlabel('Disputed issue count')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "5c033a69", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Disputed issue rate')" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig6, ax6 = plt.subplots(figsize=(16, 9))\n", + "ax6.bar(disp_med['submitted_via'], disp_med['dispute_rate'])\n", + "\n", + "ax6.set_title(\"Disputed issue rates per submission medium\")\n", + "ax6.set_xlabel('Medium')\n", + "ax6.set_ylabel('Disputed issue rate')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "52069de3", + "metadata": {}, + "outputs": [], + "source": [ + "df_by_med = df[['submitted_via', 'timely_response']].groupby(['submitted_via', 'timely_response']).size().reset_index()\n", + "df_by_med = df_by_med[df_by_med['timely_response'] == 1]\n", + "df_by_med.columns = ['submitted_via', 'was timely_response', 'count']" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "06157da5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Timely response count')" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig7, ax7 = plt.subplots(figsize=(16, 9))\n", + "ax7.bar(df_by_med['submitted_via'], df_by_med['count'])\n", + "\n", + "ax7.set_title(\"Number of timely responses per submission medium\")\n", + "ax7.set_xlabel('Medium')\n", + "ax7.set_ylabel('Timely response count')" + ] + }, + { + "cell_type": "markdown", + "id": "86c3e653", + "metadata": {}, + "source": [ + "### Analysis:\n", + "Quicker, digital forms of communication such as *Email* and *Web* clearly have a higher dispute rate than *Postal mail* or *Referral*, however it should be noted that timeliness of the response does not hold much weight for digital forms. This could highlight the possible complacency/bureaucracy that these companies may have that causes them to give a clearly untimely response, and can also warrant further delving into the structure of customer service in such companies." + ] + }, + { + "cell_type": "markdown", + "id": "a6eae2cf", + "metadata": {}, + "source": [ + "## Sentiment Analysis/Prediction:" + ] + }, + { + "cell_type": "markdown", + "id": "05f5c8e9", + "metadata": {}, + "source": [ + "### Setting up data:\n", + "Since the key data is categorical in nature we can use *one-hot encoding* for it as there is no ordinal relationship in any of the data that is being used." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "c380bd0b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
date_receivedcompanyzipcodedate_sent_to_companycompany_response_to_consumertimely_responseconsumer_disputed?product_Bank account or serviceproduct_Consumer Loanproduct_Credit card...state_WAstate_WIstate_WVstate_WYsubmitted_via_Emailsubmitted_via_Faxsubmitted_via_Phonesubmitted_via_Postal mailsubmitted_via_Referralsubmitted_via_Web
0735110U.S. Bancorp95993735114111000...0000000010
1735110Wells Fargo & Company91104735114111000...0000000010
2735110Wells Fargo & Company11764735129110000...0000000100
3735110Navient Solutions, Inc.21402735110111000...0000100000
4735110Resurgent Capital Services L.P.30106735110111000...0000000001
\n", + "

5 rows Ă— 181 columns

\n", + "
" + ], + "text/plain": [ + " date_received company zipcode \\\n", + "0 735110 U.S. Bancorp 95993 \n", + "1 735110 Wells Fargo & Company 91104 \n", + "2 735110 Wells Fargo & Company 11764 \n", + "3 735110 Navient Solutions, Inc. 21402 \n", + "4 735110 Resurgent Capital Services L.P. 30106 \n", + "\n", + " date_sent_to_company company_response_to_consumer timely_response \\\n", + "0 735114 1 1 \n", + "1 735114 1 1 \n", + "2 735129 1 1 \n", + "3 735110 1 1 \n", + "4 735110 1 1 \n", + "\n", + " consumer_disputed? product_Bank account or service product_Consumer Loan \\\n", + "0 1 0 0 \n", + "1 1 0 0 \n", + "2 0 0 0 \n", + "3 1 0 0 \n", + "4 1 0 0 \n", + "\n", + " product_Credit card ... state_WA state_WI state_WV state_WY \\\n", + "0 0 ... 0 0 0 0 \n", + "1 0 ... 0 0 0 0 \n", + "2 0 ... 0 0 0 0 \n", + "3 0 ... 0 0 0 0 \n", + "4 0 ... 0 0 0 0 \n", + "\n", + " submitted_via_Email submitted_via_Fax submitted_via_Phone \\\n", + "0 0 0 0 \n", + "1 0 0 0 \n", + "2 0 0 0 \n", + "3 1 0 0 \n", + "4 0 0 0 \n", + "\n", + " submitted_via_Postal mail submitted_via_Referral submitted_via_Web \n", + "0 0 1 0 \n", + "1 0 1 0 \n", + "2 1 0 0 \n", + "3 0 0 0 \n", + "4 0 0 1 \n", + "\n", + "[5 rows x 181 columns]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Converting dates to numerical values instead of datetime\n", + "date_cols = ['date_received', 'date_sent_to_company']\n", + "for date_col in date_cols:\n", + " df[date_col] = df[date_col].apply(pd.Timestamp.toordinal)\n", + "\n", + "analysis_cols = ['product', 'issue', 'state', 'submitted_via']\n", + "encoded = df\n", + "for c in analysis_cols:\n", + " c_encoded = pd.get_dummies(encoded[c], prefix=c)\n", + " encoded = pd.concat((encoded, c_encoded), axis=1).drop(c, axis=1)\n", + "encoded.head() " + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "44062878", + "metadata": {}, + "outputs": [], + "source": [ + "X = encoded.drop(['company', 'zipcode', 'consumer_disputed?'], axis=1)\n", + "y = encoded['consumer_disputed?']\n", + "X_tr, X_t, y_tr, y_t = train_test_split(X, y, test_size=0.2, random_state=0)" + ] + }, + { + "cell_type": "markdown", + "id": "b0de514c", + "metadata": {}, + "source": [ + "**First using Logistic Regression to classify one-hot-encoded features:**" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "25a11427", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classifier accuracy is 0.7980160443197353\n" + ] + } + ], + "source": [ + "classifier1 = LogisticRegression()\n", + "classifier1.fit(X_tr, y_tr)\n", + "y_pred = classifier1.predict(X_t)\n", + "\n", + "print(\"Classifier accuracy is\", accuracy_score(y_t, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "a3ce0650", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/aryanvakharia/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function plot_confusion_matrix is deprecated; Function `plot_confusion_matrix` is deprecated in 1.0 and will be removed in 1.2. Use one of the class methods: ConfusionMatrixDisplay.from_predictions or ConfusionMatrixDisplay.from_estimator.\n", + " warnings.warn(msg, category=FutureWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plot_confusion_matrix(classifier1, X_t, y_t, cmap=plt.cm.Greens)" + ] + }, + { + "cell_type": "markdown", + "id": "141b8875", + "metadata": {}, + "source": [ + "### Analysis:\n", + "As we can see the classifier needs a great deal of work to accurately predict a disputed complaint as the above confusion matrix shows that a true disputed complaint is not recognized by the classifier." + ] + }, + { + "cell_type": "markdown", + "id": "730fb665", + "metadata": {}, + "source": [ + "**Using Random Forest Classifier to better prediction:**" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "fc0a33cc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classifier accuracy is 0.7119846751564861\n" + ] + } + ], + "source": [ + "classifier2 = DecisionTreeClassifier(random_state=42)\n", + "classifier2.fit(X_tr, y_tr)\n", + "y_pred = classifier2.predict(X_t)\n", + "\n", + "print(\"Classifier accuracy is\", accuracy_score(y_t, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "0435bfbf", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/aryanvakharia/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function plot_confusion_matrix is deprecated; Function `plot_confusion_matrix` is deprecated in 1.0 and will be removed in 1.2. Use one of the class methods: ConfusionMatrixDisplay.from_predictions or ConfusionMatrixDisplay.from_estimator.\n", + " warnings.warn(msg, category=FutureWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plot_confusion_matrix(classifier2, X_t, y_t, cmap=plt.cm.Reds)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "2862af72", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.81 0.84 0.82 88733\n", + " 1 0.26 0.23 0.24 22459\n", + "\n", + " accuracy 0.71 111192\n", + " macro avg 0.53 0.53 0.53 111192\n", + "weighted avg 0.70 0.71 0.70 111192\n", + "\n" + ] + } + ], + "source": [ + "print(classification_report(y_t, y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "52f413a8", + "metadata": {}, + "source": [ + "### Analysis:\n", + "The decision tree model has a lower accuracy but because it deals with the categorical data better it manages to correctly predict some of the disputed issues." + ] + }, + { + "cell_type": "markdown", + "id": "c04cfc62", + "metadata": {}, + "source": [ + "## Conclusion:" + ] + }, + { + "cell_type": "markdown", + "id": "ce1b2eb9", + "metadata": {}, + "source": [ + "Since both models are very quick and dirty, detailed parsing of the data using the *nltk* library would be ideal in getting a higher recall and higher number of true positive recognition in the future.\n", + "\n", + "However, the visualizations and insights provided by the dataset do inform of key areas for our company to be wary and cautious of, and with constant improvement of data preparation and more robust training models there lies a higher scope of predicting disputed issues and even company responses." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/README.md b/README.md index 4519ef1..3c3034a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,3 @@ -# data-oa -Online Assessments for the Data Team - # Data Science: Exploratory Data Analysis on a Consumer Finance Dataset Exploratory data analysis plays a huge part in quantitative finance. Often, in a variety of scenarios, consumer interests, expectations and beliefs play a huge role in deciding major moves for organisations. @@ -32,30 +29,3 @@ Link to dataset: https://drive.google.com/file/d/1Vnny6HVGBgF_mdDtIo2ddOTtOiGF5j Log in with your GApps@Illinois account to access this dataset. -## **Deliverables** -1. A Jupyter Notebook with all the code and your thought process. (Remember the thought process is what we're looking for) -2. A generated, dynamic PDF File with the insights that you have gathered. -3. A list of future hypotheses that you may not have yet implemented / couldn't figure out how to, but feel that could be interesting. Ideally, this would be structured in LaTex. - -## **Grading** -We will be looking at your project and grading it under these five criteria: -1. Code - - If it works - - Modular - - Follows best practices (ie. OOP) -2. Documentation - - Concise and exact. - - Follows popular conventions. - - Visuality. (Lots of graphs and statistics) -3. Styling - - Human readable - - Can quickly glance to receive all relevant information - - Follows Google Style Guide (preferred if it exists) or most popular convention (ie. PEP8) -4. Robustness - - Customizable - - No technical debt (future proof) - - Handles bad inputs and errors -5. Git - - [Good commit messages](https://cbea.ms/git-commit/#seven-rules) - - Commits are properly sized - diff --git a/consumer_analysis.ipynb b/consumer_analysis.ipynb new file mode 100644 index 0000000..5e10f2a --- /dev/null +++ b/consumer_analysis.ipynb @@ -0,0 +1,1744 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2532726f", + "metadata": {}, + "source": [ + "# Analyzing Consumer Complaints from Financial Corporations/Institutions" + ] + }, + { + "cell_type": "markdown", + "id": "5f270b27", + "metadata": {}, + "source": [ + "## Introduction:\n", + "In this notebook I will be analyzing the counts and percentage of **disputed** customer complaints with regard to financial institutions, states and financial products along with a quick and dirty machine learning model to give a rough prediction of when the disputes will occur" + ] + }, + { + "cell_type": "markdown", + "id": "dd4e748c", + "metadata": {}, + "source": [ + "## Getting Required Packages:" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "0060a11a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: xgboost in /Users/aryanvakharia/opt/anaconda3/lib/python3.9/site-packages (1.6.2)\n", + "Requirement already satisfied: numpy in /Users/aryanvakharia/opt/anaconda3/lib/python3.9/site-packages (from xgboost) (1.21.5)\n", + "Requirement already satisfied: scipy in /Users/aryanvakharia/opt/anaconda3/lib/python3.9/site-packages (from xgboost) (1.7.3)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m22.2.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install xgboost\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "import xgboost as xgb\n", + "from sklearn.model_selection import train_test_split, StratifiedShuffleSplit\n", + "from sklearn.metrics import accuracy_score, plot_confusion_matrix, classification_report\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "id": "53fc5391", + "metadata": {}, + "source": [ + "## Importing and Cleaning Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f92ee33a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
date_receivedproductsub_productissuesub_issueconsumer_complaint_narrativecompany_public_responsecompanystatezipcodetagsconsumer_consent_providedsubmitted_viadate_sent_to_companycompany_response_to_consumertimely_responseconsumer_disputed?complaint_id
02013-08-30MortgageOther mortgageLoan modification,collection,foreclosureNaNNaNNaNU.S. BancorpCA95993NaNNaNReferral2013-09-03Closed with explanationYesYes511074
12013-08-30MortgageOther mortgageLoan servicing, payments, escrow accountNaNNaNNaNWells Fargo & CompanyCA91104NaNNaNReferral2013-09-03Closed with explanationYesYes511080
22013-08-30Credit reportingNaNIncorrect information on credit reportAccount statusNaNNaNWells Fargo & CompanyNY11764NaNNaNPostal mail2013-09-18Closed with explanationYesNo510473
32013-08-30Student loanNon-federal student loanRepaying your loanRepaying your loanNaNNaNNavient Solutions, Inc.MD21402NaNNaNEmail2013-08-30Closed with explanationYesYes510326
42013-08-30Debt collectionCredit cardFalse statements or representationAttempted to collect wrong amountNaNNaNResurgent Capital Services L.P.GA30106NaNNaNWeb2013-08-30Closed with explanationYesYes511067
\n", + "
" + ], + "text/plain": [ + " date_received product sub_product \\\n", + "0 2013-08-30 Mortgage Other mortgage \n", + "1 2013-08-30 Mortgage Other mortgage \n", + "2 2013-08-30 Credit reporting NaN \n", + "3 2013-08-30 Student loan Non-federal student loan \n", + "4 2013-08-30 Debt collection Credit card \n", + "\n", + " issue \\\n", + "0 Loan modification,collection,foreclosure \n", + "1 Loan servicing, payments, escrow account \n", + "2 Incorrect information on credit report \n", + "3 Repaying your loan \n", + "4 False statements or representation \n", + "\n", + " sub_issue consumer_complaint_narrative \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 Account status NaN \n", + "3 Repaying your loan NaN \n", + "4 Attempted to collect wrong amount NaN \n", + "\n", + " company_public_response company state zipcode tags \\\n", + "0 NaN U.S. Bancorp CA 95993 NaN \n", + "1 NaN Wells Fargo & Company CA 91104 NaN \n", + "2 NaN Wells Fargo & Company NY 11764 NaN \n", + "3 NaN Navient Solutions, Inc. MD 21402 NaN \n", + "4 NaN Resurgent Capital Services L.P. GA 30106 NaN \n", + "\n", + " consumer_consent_provided submitted_via date_sent_to_company \\\n", + "0 NaN Referral 2013-09-03 \n", + "1 NaN Referral 2013-09-03 \n", + "2 NaN Postal mail 2013-09-18 \n", + "3 NaN Email 2013-08-30 \n", + "4 NaN Web 2013-08-30 \n", + "\n", + " company_response_to_consumer timely_response consumer_disputed? \\\n", + "0 Closed with explanation Yes Yes \n", + "1 Closed with explanation Yes Yes \n", + "2 Closed with explanation Yes No \n", + "3 Closed with explanation Yes Yes \n", + "4 Closed with explanation Yes Yes \n", + "\n", + " complaint_id \n", + "0 511074 \n", + "1 511080 \n", + "2 510473 \n", + "3 510326 \n", + "4 511067 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('consumer_complaints.csv', parse_dates=['date_received', 'date_sent_to_company'], low_memory=False)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "88e39fd7", + "metadata": {}, + "source": [ + "Having a clean dataset allows for higher accuracy in visualizing, analyzing and in this case predicting on the dataset. In order to make the most efficient use of the above data, I am checking for the number of empty/NaN rows:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "73bf52d6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "date_received 0\n", + "product 0\n", + "sub_product 158322\n", + "issue 0\n", + "sub_issue 343335\n", + "consumer_complaint_narrative 489151\n", + "company_public_response 470833\n", + "company 0\n", + "state 4887\n", + "zipcode 4505\n", + "tags 477998\n", + "consumer_consent_provided 432499\n", + "submitted_via 0\n", + "date_sent_to_company 0\n", + "company_response_to_consumer 0\n", + "timely_response 0\n", + "consumer_disputed? 0\n", + "complaint_id 0\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Check NaNs\n", + "df.isna().sum()" + ] + }, + { + "cell_type": "markdown", + "id": "e8db998a", + "metadata": {}, + "source": [ + "From the above output it is evident that the dataset if populated with many NaNs. However, removing all NaN values can cause overfitting in the prediction model and inaccuracies in the data analysis.\n", + "\n", + "So, keeping threshold of *column contains >= 10% NaNs* we will clean each column" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "45eebff3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sub_product 0.284774\n", + "sub_issue 0.617557\n", + "consumer_complaint_narrative 0.879836\n", + "company_public_response 0.846887\n", + "tags 0.859775\n", + "consumer_consent_provided 0.777936\n", + "dtype: float64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "thresh = 0.10\n", + "col_nans = df.isna().mean(axis=0)\n", + "col_w_thresh = col_nans[col_nans >= thresh]\n", + "col_w_thresh" + ] + }, + { + "cell_type": "markdown", + "id": "dee366ce", + "metadata": {}, + "source": [ + "The above columns will now be dropped as they have too many NaNs to have any use in our dataset.\n", + "\n", + "*NOTE:* Along with these, the *complaint_id* column will aso be dropped for not being relevant in our analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "252d522a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
date_receivedproductissuecompanystatezipcodesubmitted_viadate_sent_to_companycompany_response_to_consumertimely_responseconsumer_disputed?
02013-08-30MortgageLoan modification,collection,foreclosureU.S. BancorpCA95993Referral2013-09-03111
12013-08-30MortgageLoan servicing, payments, escrow accountWells Fargo & CompanyCA91104Referral2013-09-03111
22013-08-30Credit reportingIncorrect information on credit reportWells Fargo & CompanyNY11764Postal mail2013-09-18110
32013-08-30Student loanRepaying your loanNavient Solutions, Inc.MD21402Email2013-08-30111
42013-08-30Debt collectionFalse statements or representationResurgent Capital Services L.P.GA30106Web2013-08-30111
\n", + "
" + ], + "text/plain": [ + " date_received product issue \\\n", + "0 2013-08-30 Mortgage Loan modification,collection,foreclosure \n", + "1 2013-08-30 Mortgage Loan servicing, payments, escrow account \n", + "2 2013-08-30 Credit reporting Incorrect information on credit report \n", + "3 2013-08-30 Student loan Repaying your loan \n", + "4 2013-08-30 Debt collection False statements or representation \n", + "\n", + " company state zipcode submitted_via \\\n", + "0 U.S. Bancorp CA 95993 Referral \n", + "1 Wells Fargo & Company CA 91104 Referral \n", + "2 Wells Fargo & Company NY 11764 Postal mail \n", + "3 Navient Solutions, Inc. MD 21402 Email \n", + "4 Resurgent Capital Services L.P. GA 30106 Web \n", + "\n", + " date_sent_to_company company_response_to_consumer timely_response \\\n", + "0 2013-09-03 1 1 \n", + "1 2013-09-03 1 1 \n", + "2 2013-09-18 1 1 \n", + "3 2013-08-30 1 1 \n", + "4 2013-08-30 1 1 \n", + "\n", + " consumer_disputed? \n", + "0 1 \n", + "1 1 \n", + "2 0 \n", + "3 1 \n", + "4 1 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "drops_l = list(col_w_thresh.index) + ['complaint_id']\n", + "df = df.drop(drops_l, axis=1)\n", + "\n", + "#Tokenizing Yes and No to 1 and 0 respectively\n", + "df = df.replace(['Yes', 'No'], [1, 0])\n", + "\n", + "#Also tokenizing company response for easier NLP classification\n", + "df.loc[df['company_response_to_consumer'].str.contains(\"Closed\"), 'company_response_to_consumer'] = 1\n", + "df.loc[df['company_response_to_consumer'] != 1, 'company_response_to_consumer'] = 0\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "0e9dee79", + "metadata": {}, + "source": [ + "## Data Analysis and Visualization:\n", + "Since I will be mainly checking the count AND rate of disputed issues, the following functions will be used to get these metrics as a DataFrame:\n", + "\n", + "(*NOTE: Since the data being analysed is categorical I will mostly be using bar graphs*)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "08b7d908", + "metadata": {}, + "outputs": [], + "source": [ + "def is_disputed(row, cat):\n", + " return df[(df['consumer_disputed?'] == 1) & (df[cat] == row[cat])]['consumer_disputed?'].sum()\n", + "\n", + "def get_count(cat, data=df):\n", + " df_by_cat = df[[cat, 'issue', 'consumer_disputed?']].groupby([cat]).count().reset_index()\n", + " \n", + " df_by_cat['consumer_disputed?'] = df_by_cat.apply(lambda r: is_disputed(r, cat), axis=1)\n", + " \n", + " df_by_cat['dispute_rate'] = df_by_cat.apply(lambda r: (r['consumer_disputed?'] / r['issue']) * 100, axis=1)\n", + " \n", + " return df_by_cat" + ] + }, + { + "cell_type": "markdown", + "id": "6693cced", + "metadata": {}, + "source": [ + "### Disputes according to State:" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "d6ad5d11", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "State is CA with max disputes: 17615\n" + ] + } + ], + "source": [ + "#State with most disputes using above function:\n", + "disp_st = get_count('state')\n", + "st_c = disp_st['consumer_disputed?']\n", + "m_st = disp_st['state'][pd.Series.argmax(st_c)]\n", + "m_c = disp_st['consumer_disputed?'][pd.Series.argmax(st_c)]\n", + "print(\"State is\", m_st, \"with max disputes:\", m_c)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6f0729b6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Dispute counts per state')" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig1, ax1 = plt.subplots(figsize=(20, 9))\n", + "ax1.bar(disp_st['state'], disp_st['consumer_disputed?'])\n", + "ax1.set_xlabel(\"State\")\n", + "ax1.set_ylabel(\"Disputed issue counts\")\n", + "ax1.set_title(\"Dispute counts per state\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6dfa6225", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Dispute rates per state')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig2, ax2 = plt.subplots(figsize=(20, 9))\n", + "ax2.bar(disp_st['state'], disp_st['dispute_rate'])\n", + "ax2.set_xlabel(\"State\")\n", + "ax2.set_ylabel(\"Disputed issue rate(disputed count / issue count)\")\n", + "ax2.set_title(\"Dispute rates per state\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "17ec819c", + "metadata": {}, + "outputs": [], + "source": [ + "#Getting count of issues per state with products\n", + "state_issue_count = df[['state', 'product', 'issue']].groupby(['state', 'product', 'issue']).size().reset_index()\n", + "state_issue_count.columns = ['state', 'product', 'issue', 'count']" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "15c92542", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stateproductissuecount
501CAMortgageLoan modification,collection,foreclosure19433
942FLMortgageLoan modification,collection,foreclosure11337
502CAMortgageLoan servicing, payments, escrow account8957
4028TXCredit reportingIncorrect information on credit report8712
485CACredit reportingIncorrect information on credit report8585
\n", + "
" + ], + "text/plain": [ + " state product issue count\n", + "501 CA Mortgage Loan modification,collection,foreclosure 19433\n", + "942 FL Mortgage Loan modification,collection,foreclosure 11337\n", + "502 CA Mortgage Loan servicing, payments, escrow account 8957\n", + "4028 TX Credit reporting Incorrect information on credit report 8712\n", + "485 CA Credit reporting Incorrect information on credit report 8585" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#States with 5 highest dispute counts, issues and product\n", + "state_issue_count.sort_values(by = 'count', ascending = False).head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "63fa2a3a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stateproductissuecount
3083NVPrepaid cardOverdraft, savings or rewards features1
347AZConsumer LoanPayment to acct not credited1
344AZConsumer LoanLender repossessed or sold the vehicle1
1AACredit cardIdentity theft / Fraud / Embezzlement1
0AABank account or serviceAccount opening, closing, or management1
\n", + "
" + ], + "text/plain": [ + " state product issue \\\n", + "3083 NV Prepaid card Overdraft, savings or rewards features \n", + "347 AZ Consumer Loan Payment to acct not credited \n", + "344 AZ Consumer Loan Lender repossessed or sold the vehicle \n", + "1 AA Credit card Identity theft / Fraud / Embezzlement \n", + "0 AA Bank account or service Account opening, closing, or management \n", + "\n", + " count \n", + "3083 1 \n", + "347 1 \n", + "344 1 \n", + "1 1 \n", + "0 1 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#States with 5 lowest dispute counts with related issues and products\n", + "state_issue_count.sort_values(by = 'count', ascending = False).tail()" + ] + }, + { + "cell_type": "markdown", + "id": "eb748a29", + "metadata": {}, + "source": [ + "### **Analysis**: \n", + "As the above console output and bar graphs show, states with higher populations such as CA, NY, TX etc tend to have more disputes over financial issues. Moreover, West Coast states have a higher tendency to dipute complaints. Finally, Mortgage modifications and Credit reporting issues have caused the most disputed among the states \n", + "\n", + "Laws and operations regarding financial products and the companies headquartered in these states are areas which can be further analysed for why they yield such high issue counts for the a certain range of products." + ] + }, + { + "cell_type": "markdown", + "id": "f45775c1", + "metadata": {}, + "source": [ + "### Disputes according to Product:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1b73be97", + "metadata": {}, + "outputs": [], + "source": [ + "disp_prod = get_count('product')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "95b32810", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
productissuecount
75MortgageLoan modification,collection,foreclosure97191
59Credit reportingIncorrect information on credit report66718
76MortgageLoan servicing, payments, escrow account60375
62Debt collectionCont'd attempts collect debt not owed42285
0Bank account or serviceAccount opening, closing, or management26661
\n", + "
" + ], + "text/plain": [ + " product issue count\n", + "75 Mortgage Loan modification,collection,foreclosure 97191\n", + "59 Credit reporting Incorrect information on credit report 66718\n", + "76 Mortgage Loan servicing, payments, escrow account 60375\n", + "62 Debt collection Cont'd attempts collect debt not owed 42285\n", + "0 Bank account or service Account opening, closing, or management 26661" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Highest issues per product\n", + "product_issue_count = df.groupby(['product', 'issue']).size().reset_index()\n", + "product_issue_count.columns = ['product', 'issue', 'count']\n", + "product_issue_count.sort_values(by = 'count', ascending=False).head()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "cd258ff1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Product')" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig3, ax3 = plt.subplots(figsize=(16, 9))\n", + "ax3.barh(disp_prod['product'], disp_prod['consumer_disputed?']) \n", + "\n", + "ax3.set_title(\"Disputed issue counts per product\")\n", + "ax3.set_xlabel('Disputed issue counts')\n", + "ax3.set_ylabel('Product')" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "40b666b1", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 0, 'Disputed issue rate')" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig4, ax4 = plt.subplots(figsize=(16, 9))\n", + "ax4.barh(disp_prod['product'], disp_prod['dispute_rate']) \n", + "\n", + "ax4.set_title(\"Disputed issue rates per product\")\n", + "ax4.set_ylabel('Product')\n", + "ax4.set_xlabel('Disputed issue rate')" + ] + }, + { + "cell_type": "markdown", + "id": "61b80521", + "metadata": {}, + "source": [ + "### Analysis:\n", + "Using the same visualization it can be concluded that loan and credit related issues like *Mortgages, Credit Reporting, Student loans* etc are more likely to be disputed, meaning our company should be wary and prepared to efficiently face complicated customer service with such products" + ] + }, + { + "cell_type": "markdown", + "id": "32296070", + "metadata": {}, + "source": [ + "### Disputes according to submission medium:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "fd3459f9", + "metadata": {}, + "outputs": [], + "source": [ + "disp_med = get_count('submitted_via')" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "abdbe069", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 0, 'Disputed issue count')" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig5, ax5 = plt.subplots(figsize=(16, 9))\n", + "ax5.bar(disp_med['submitted_via'], disp_med['consumer_disputed?'])\n", + "\n", + "ax5.set_title(\"Disputed issue counts per submission medium\")\n", + "ax5.set_xlabel('Medium')\n", + "ax5.set_xlabel('Disputed issue count')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "5c033a69", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Disputed issue rate')" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA6wAAAImCAYAAABXZwdOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAqiklEQVR4nO3debytdV0v8M9XIEEGRUFzPjmWkmKBOZZe0xzTSlPDIjPRe70qXbuFpoamXRrUzDFMc8jUzDQcUssSh5zAATQtS46JooCKoKICfu8f6zmx2OxzzjrD2vt32O/367Vfez3z91nrt4fP+v2eZ1V3BwAAAEZzpfUuAAAAAFYjsAIAADAkgRUAAIAhCawAAAAMSWAFAABgSAIrAAAAQxJYAQZWVS+uqqesdx3bUlV3qaozt7H8m1V1o7WsiZ2zvddyJ/b3qaq6yy5s//dVdfTuqmctrHwOd/U5ANjo9l7vAgA2qqranORaSS5OckmSf03yyiQndvf3k6S7H70Gdbw8yZnd/eRl7L+7D1jGftdCVR2f5Cbd/bD1rmVP1N233MXt77W7alkvu/ocAGx0elgB1tf9uvvAJDdMckKS307y0vUtaWOoqivEm7ZXlPMAgNUIrAAD6O5vdPdJSR6c5OiqOiyZ9X5W1TOmx4dU1Vuq6ryq+lpVvbeqrjQt21xVT6yqf62qr1fVX1TVvtOyX62q980fr6q6qm5SVcckOSrJb01Dd988Lb9OVb2hqs6pqjOq6nFz2+431fX1qvrXJEdu69y2HGt6fO+pxguq6otV9ZsLnNt/b7/yOZmm71tVH5+2/ZequtV2anlMVX02yWenec+tqi9U1flVdWpV3Xmaf88kT0ry4Om5+cQ0/6pV9dKqOms6h2dU1V7TsptU1clV9Y2qOreqXreVOjZNtRxTVV+a9vWEueVXqqrjquo/q+qrVfXXVXX1Fds+oqr+K8k/rbL/nX4+p3lPmurfXFVHrVj3hdNQ3W9W1fur6ger6k+m9vCZqrrN3Pqbq+qnp8e3rapTpuf5K1X17Gn+vlX1l9N5nldVH6mqa03L3l1Vvz73nDy5qj5fVWdX1Sur6qornpOjq+q/ptp/ZxvtYEfPY6d/HlY8Byvb7srhw5ur6v9W1WlV9a2pnV1rqvOCqvrHqjp4a+cFcEUksAIMpLs/nOTMJHdeZfETpmWHZjaU+ElJem75UUl+JsmNk9wsyXaH+Hb3iUleneQPu/uA7r7fFGzenOQTSa6b5G5Jjq2qn5k2+93pGDeejrcj1xi+NMmjpl7lw3Jp2Nreua2qqn4sycuSPCrJNZL8WZKTqurK29jsAUl+IsktpumPJDk8ydWT/FWS11fVvt399iS/n+R103Nz62n9V2Q2jPsmSW6T5B5Jfn1a9ntJ3pnk4CTXS/K87ZzCXZPcdNrHcVuCTZLHTXX+VJLrJPl6khes2PankvxIZq/BSjv1fE5+MMkhmb32Ryc5sapuPrf8FzNrW4ck+W6SDyT56DT9N0mevZX9PjfJc7v7oMzazl9P849OctUk18/sNXx0kgtX2f5Xp6+7JrlRkgOSPH/FOndKcvPM2uxTq+pHtnGeC53Hkn8eVvMLSe6e2c/w/ZL8fWav3yGZ/d/2uK1vCnDFI7ACjOdLmYWnlS5Kcu0kN+zui7r7vd09H0Ke391f6O6vJXlmkofu5PGPTHJodz+9u7/X3Z9L8pIkD5mW/2KSZ3b317r7C0n+dAf2fVGSW1TVQd399e7+6ILntjWPTPJn3f2h7r6ku1+RWfi43Ta2+X9T7RcmSXf/ZXd/tbsv7u5nJblyZqHncqaev3slOba7v9XdZyd5Ti59bi7KbHj3dbr7O939vtX2M+dp035OT/IXufQ1e1SS3+nuM7v7u0mOT/LAuuzw3+OnbVcLdzv7fG7xlO7+bnefnOStmb3mW7yxu0/t7u8keWOS73T3K7v7kiSvyyzEr+aiJDepqkO6+5vd/cG5+dfI7FrhS6Z9n7/K9kcleXZ3f667v5nkiUkesuI5eVp3X9jdn8gsYN56lf3s6Hks8+dhNc/r7q909xeTvDfJh7r7Y1M7eGO2/vwCXCEJrADjuW6Sr60y/4+S/EeSd1bV56rquBXLvzD3+POZ9cztjBsmuc40PPO8qjovsx6ea03Lr7PKsRb1C0nuneTzNRs6e/tp/vbObVu1PmFFrdfPts99vvZU1ROq6tM1G8Z7Xma9fYds43j7JDlr7nh/luSa0/LfSlJJPlyzu8P+2nbq39prdsMkb5w7xqczuzHXtbay7Uo7+3wmyde7+1tbqStJvjL3+MJVprd2k61HZNZr+Jlp2O99p/mvSvKOJK+t2fDoP6yqfVbZ/jq5bFv7fGY3j5x/Tr489/jb26hlR85jmT8Pu1IXwIYgsAIMpKqOzCywXq5nrrsv6O4ndPeNMhsq+H+q6m5zq1x/7vENMuupTZJvJbnK3DF+cOWuV0x/IckZ3X21ua8Du/ve0/KzVjnWQrr7I919/8wC3psyDQvdzrl9e77+zIasztf6zBW1XqW7X7OtMrY8qNn1qr+dWS/Zwd19tSTfyCx0XmbdueN9N8khc8c7aMudYLv7y939yO6+Tma9pC+suetFV7G11+wLSe614rz2nXrdLncelzvBnX8+k+Tgqtp/K3XttO7+bHc/NLPX/g+S/E1V7T/1AD+tu2+R5A5J7pvkV1bZxZcyC4/zdV2cywa6ZdidPw+X+VnM5Z97AFYQWAEGUFUHTT1Or03yl9MQ0ZXr3LdmN/WpJOdn1uN2ydwqj6mq69Xs5jxPymxYYzIbGnnLqjq8ZjdiOn7Frr+S2TWBW3w4yflV9dvTDWX2qqrDpjCdzELmE6vq4Kq6XpLHLniOP1BVR1XVVbv7orlz2N65fTzJL0113DOzaze3eEmSR1fVT9TM/lV1n6o6cJGakhyYWeg5J8neVfXUJAeteG42TdcxprvPyuwa1WdNr9mVqurGVfVT03k8aHpOktl1p53LvkYrPaWqrlJVt0zy8Fz6mr04yTOr6obTfg+tqvsveE678nxu8bTp9bpzZgHy9Yseexs1PayqDu3ZRzadN82+pKruWlU/WrMbV52f2RDh1Z6z1yT5jar6oao6IJdeX3zxrta2Hbvz5+HjSe5dVVef3jg6dqmVA1wBCKwA6+vNVXVBZr04v5PZjV4evpV1b5rkH5N8M7MbxLywu989t/yvMgtTn5u+npEk3f3vSZ4+bfvZXL739qWZXVd6XlW9abqG736Z3YjojCTnJvnzzIbKJsnTMhv2eMZ0vFftwPn+cpLNVXV+ZjfX2fL5pts6t8dP9ZyX2XWMb9qys+4+JbPrWJ+fWUD8j8xuzLOod2R2U5t/n87pO7ns8M4tQe2rVbXlettfSfIDmX1u7tczu0HPtadlRyb5UFV9M8lJSR7f3Wds4/gnTzW/K8kfd/c7p/nPnbZ/59Q+PpjZjaIWtVPP5+TL03l9KbMbcj26uz+zA8femnsm+dT03Dw3yUOm60d/MLPn8PzMhj6fnOQvV9n+ZZm1tfdk1va+kwXfLNkVu/nn4VWZvYG0eVp31btIA3Cp2rF7MAAwoqranOTXu/sf17sWtq+qNmUWcPZZgx5CANhj6WEFAABgSAIrAAAAQzIkGAAAgCHpYQUAAGBIAisAAABD2nu9C1jEIYcc0ps2bVrvMgAAAFiCU0899dzuPnTl/D0isG7atCmnnHLKepcBAADAElTV51ebb0gwAAAAQxJYAQAAGJLACgAAwJAEVgAAAIYksAIAADAkgRUAAIAhCawAAAAMSWAFAABgSAIrAAAAQxJYAQAAGJLACgAAwJAEVgAAAIYksAIAADAkgRUAAIAhCawAAAAMSWAFAABgSAIrAAAAQxJYAQAAGJLACgAAwJAEVgAAAIYksAIAADCkvde7AAAAYGybjnvrepfALth8wn3Wu4SdpocVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIe293gXARrPpuLeudwnsgs0n3Ge9SwAA2DD0sAIAADAkgRUAAIAhCawAAAAMSWAFAABgSAIrAAAAQxJYAQAAGJLACgAAwJAEVgAAAIYksAIAADAkgRUAAIAhCawAAAAMSWAFAABgSAIrAAAAQxJYAQAAGJLACgAAwJAEVgAAAIYksAIAADAkgRUAAIAh7b3eBVxRbDruretdAjtp8wn3We8SAACAVehhBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEhLC6xVdf2q+ueq+nRVfaqqHj/Nv3pV/UNVfXb6fvCyagAAAGDPtcwe1ouTPKG7fyTJ7ZI8pqpukeS4JO/q7psmedc0DQAAAJextMDa3Wd190enxxck+XSS6ya5f5JXTKu9IskDllUDAAAAe641uYa1qjYluU2SDyW5VneflcxCbZJrbmWbY6rqlKo65ZxzzlmLMgEAABjI0gNrVR2Q5A1Jju3u8xfdrrtP7O4juvuIQw89dHkFAgAAMKSlBtaq2iezsPrq7v7bafZXqura0/JrJzl7mTUAAACwZ1rmXYIryUuTfLq7nz236KQkR0+Pj07yd8uqAQAAgD3X3kvc9x2T/HKS06vq49O8JyU5IclfV9UjkvxXkgctsQYAAAD2UEsLrN39viS1lcV3W9ZxAQAAuGJYk7sEAwAAwI4SWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEh7r3cBAACsvU3HvXW9S2AXbD7hPutdAqwJPawAAAAMSWAFAABgSAIrAAAAQxJYAQAAGJKbLgEMzE1R9mxuigIAu0YPKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQlhZYq+plVXV2VX1ybt7xVfXFqvr49HXvZR0fAACAPdsye1hfnuSeq8x/TncfPn29bYnHBwAAYA+2tMDa3e9J8rVl7R8AAIArtvW4hvV/V9Vp05Dhg7e2UlUdU1WnVNUp55xzzlrWBwAAwADWOrC+KMmNkxye5Kwkz9rait19Yncf0d1HHHrooWtUHgAAAKNY08Da3V/p7ku6+/tJXpLktmt5fAAAAPYcaxpYq+rac5M/l+STW1sXAACAjW3vZe24ql6T5C5JDqmqM5P8bpK7VNXhSTrJ5iSPWtbxAQAA2LMtLbB290NXmf3SZR0PAACAK5b1uEswAAAAbJfACgAAwJAEVgAAAIa0tGtYAYC1tem4t653CeyCzSfcZ71LABiOHlYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQthtYq+oqVfWUqnrJNH3Tqrrv8ksDAABgI1ukh/Uvknw3ye2n6TOTPGNpFQEAAEAWC6w37u4/THJRknT3hUlqqVUBAACw4S0SWL9XVfsl6SSpqhtn1uMKAAAAS7P3Auscn+TtSa5fVa9OcsckD19mUQAAALDdwNrd76yqU5PcLrOhwI/v7nOXXhkAAAAb2iJ3CX5Xd3+1u9/a3W/p7nOr6l1rURwAAAAb11Z7WKtq3yRXSXJIVR2cS2+0dFCS66xBbQAAAGxg2xoS/Kgkx2YWTk/NpYH1/CQvWG5ZAAAAbHRbDazd/dwkz62qx3b389awJgAAAFjopkvPq6rDktwiyb5z81+5zMIAAADY2LYbWKvqd5PcJbPA+rYk90ryviQCKwAAAEuz3bsEJ3lgkrsl+XJ3PzzJrZNcealVAQAAsOEtElgv7O7vJ7m4qg5KcnaSGy23LAAAADa67Q4JTnJKVV0tyUsyu1vwN5N8eJlFAQAAwDYDa1VVkv/X3ecleXFVvT3JQd192loUBwAAwMa1zSHB3d1J3jQ3vVlYBQAAYC0scg3rB6vqyKVXAgAAAHMWuYb1rkkeVVWfT/KtJJVZ5+utlloZAAAAG9oigfVeS68CAAAAVthuYO3uz69FIQAAADBvkWtYAQAAYM0JrAAAAAxpocBaVTesqp+eHu9XVQcutywAAAA2uu0G1qp6ZJK/SfJn06zrZe6zWQEAAGAZFulhfUySOyY5P0m6+7NJrrnMogAAAGCRwPrd7v7elomq2jtJL68kAAAAWCywnlxVT0qyX1XdPcnrk7x5uWUBAACw0S0SWI9Lck6S05M8Ksnbkjx5mUUBAADA3ttbobu/n+Ql0xcAAACsie0G1qo6I6tcs9rdN1pKRQAAAJAFAmuSI+Ye75vkQUmuvpxyAAAAYGa717B291fnvr7Y3X+S5H8svzQAAAA2skWGBP/Y3OSVMutxPXBpFQEAAEAWGxL8rLnHFyfZnOQXl1INAAAATBa5S/Bd16IQAAAAmLfda1ir6vFVdVDN/HlVfbSq7rEWxQEAALBxbTewJvm17j4/yT2SXDPJw5OcsNSqAAAA2PAWCaw1fb93kr/o7k/MzQMAAIClWCSwnlpV78wssL6jqg5M8v3llgUAAMBGt8hdgh+R5PAkn+vub1fV1TMbFgwAAABLs0gP6+2T/Ft3n1dVD0vy5CTfWG5ZAAAAbHSLBNYXJfl2Vd06yW8l+XySVy61KgAAADa8RQLrxd3dSe6f5Lnd/dwkBy63LAAAADa6Ra5hvaCqnpjkYUl+sqr2SrLPcssCAABgo1ukh/XBSb6b5BHd/eUk103yR0utCgAAgA1vuz2sU0h99tz0f8U1rAAAACzZVgNrVb2vu+9UVRck6flFSbq7D1p6dQAAAGxYWw2s3X2n6bsbLAEAALDmFrmGFQAAANacwAoAAMCQBFYAAACGJLACAAAwpG3dJXjl3YEvw12CAQAAWKZt3SX4wCSpqqcn+XKSV2X2kTZHJXHnYAAAAJZqkSHBP9PdL+zuC7r7/O5+UZJfWHZhAAAAbGyLBNZLquqoqtqrqq5UVUcluWTZhQEAALCxLRJYfynJLyb5yvT1oGkeAAAALM1Wr2Hdors3J7n/8ksBAACAS223h7WqblZV76qqT07Tt6qqJy+/NAAAADayRYYEvyTJE5NclCTdfVqShyyzKAAAAFgksF6luz+8Yt7FyygGAAAAtlgksJ5bVTdO0klSVQ9MctZSqwIAAGDD2+5Nl5I8JsmJSX64qr6Y5IwkRy21KgAAADa8RQJrd/dPV9X+Sa7U3RdU1Q8tuzAAAAA2tkWGBL8hSbr7W919wTTvb5ZXEgAAAGyjh7WqfjjJLZNctap+fm7RQUn2XXZhAAAAbGzbGhJ88yT3TXK1JPebm39BkkcusSYAAADYemDt7r9L8ndVdfvu/sAa1gQAAAAL3XTpmKq6XI9qd//aEuoBAACAJIsF1rfMPd43yc8l+dJyygEAAICZ7QbW7n7D/HRVvSbJPy6tIgAAAMhiH2uz0k2T3GB3FwIAAADzttvDWlUXJOkkNX3/cpLfXnJdAAAAbHCLDAk+cC0KAQAAgHmL3HQpVfXzSe6UWQ/re7v7TcssCgAAALZ7DWtVvTDJo5OcnuSTSR5dVS9YdmEAAABsbIv0sP5UksO6u5Okql6RWXjdpqp6WZL7Jjm7uw+b5l09yeuSbEqyOckvdvfXd6pyAAAArtAWuUvwv+WydwW+fpLTFtju5UnuuWLecUne1d03TfKuaRoAAAAuZ5HAeo0kn66qd1fVu5P8a5JDq+qkqjppaxt193uSfG3F7PsnecX0+BVJHrDDFQMAALAhLDIk+Km78XjX6u6zkqS7z6qqa25txao6JskxSXKDG/jYVwAAgI1mkcB6SpILu/v7VXWzJD+c5O+7+6JlFtbdJyY5MUmOOOKIXuaxAAAAGM8iQ4Lfk2TfqrpuZtedPjyz61N3xleq6tpJMn0/eyf3AwAAwBXcIoG1uvvbSX4+yfO6++eS3HInj3dSkqOnx0cn+bud3A8AAABXcAsF1qq6fZKjkrx1mrfXAhu9JskHkty8qs6sqkckOSHJ3avqs0nuPk0DAADA5SxyDeuxSZ6Y5I3d/amqulGSf97eRt390K0sutvi5QEAALBRbTewdvfJSU6em/5cksctsygAAADYamCtqj/p7mOr6s1JLneX3u7+2aVWBgAAwIa2rR7WV03f/3gtCgEAAIB5Ww2s3X3q9P3kqjp0enzOWhUGAADAxrbVuwTXzPFVdW6SzyT596o6p6qeunblAQAAsFFt62Ntjk1yxyRHdvc1uvvgJD+R5I5V9RtrURwAAAAb17YC668keWh3n7FlxnSH4IdNywAAAGBpthVY9+nuc1fOnK5j3Wd5JQEAAMC2A+v3dnIZAAAA7LJtfazNravq/FXmV5J9l1QPAAAAJNn2x9rstZaFAAAAwLxtDQkGAACAdSOwAgAAMCSBFQAAgCEJrAAAAAxJYAUAAGBIAisAAABDElgBAAAYksAKAADAkARWAAAAhiSwAgAAMCSBFQAAgCEJrAAAAAxJYAUAAGBIAisAAABDElgBAAAYksAKAADAkARWAAAAhiSwAgAAMCSBFQAAgCEJrAAAAAxJYAUAAGBIAisAAABDElgBAAAYksAKAADAkARWAAAAhiSwAgAAMCSBFQAAgCEJrAAAAAxJYAUAAGBIAisAAABDElgBAAAYksAKAADAkARWAAAAhiSwAgAAMCSBFQAAgCEJrAAAAAxJYAUAAGBIAisAAABDElgBAAAYksAKAADAkARWAAAAhiSwAgAAMCSBFQAAgCEJrAAAAAxJYAUAAGBIAisAAABDElgBAAAYksAKAADAkARWAAAAhiSwAgAAMCSBFQAAgCEJrAAAAAxJYAUAAGBIAisAAABDElgBAAAYksAKAADAkARWAAAAhiSwAgAAMCSBFQAAgCEJrAAAAAxJYAUAAGBIAisAAABDElgBAAAYksAKAADAkARWAAAAhiSwAgAAMCSBFQAAgCEJrAAAAAxJYAUAAGBIAisAAABDElgBAAAYksAKAADAkARWAAAAhiSwAgAAMCSBFQAAgCEJrAAAAAxJYAUAAGBIAisAAABDElgBAAAYksAKAADAkARWAAAAhiSwAgAAMCSBFQAAgCEJrAAAAAxJYAUAAGBIAisAAABD2ns9DlpVm5NckOSSJBd39xHrUQcAAADjWpfAOrlrd5+7jscHAABgYIYEAwAAMKT1Cqyd5J1VdWpVHbPaClV1TFWdUlWnnHPOOWtcHgAAAOttvQLrHbv7x5LcK8ljquonV67Q3Sd29xHdfcShhx669hUCAACwrtYlsHb3l6bvZyd5Y5LbrkcdAAAAjGvNA2tV7V9VB255nOQeST651nUAAAAwtvW4S/C1kryxqrYc/6+6++3rUAcAAAADW/PA2t2fS3LrtT4uAAAAexYfawMAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADElgBQAAYEgCKwAAAEMSWAEAABjSugTWqrpnVf1bVf1HVR23HjUAAAAwtjUPrFW1V5IXJLlXklskeWhV3WKt6wAAAGBs69HDetsk/9Hdn+vu7yV5bZL7r0MdAAAADGw9Aut1k3xhbvrMaR4AAAD8t+rutT1g1YOS/Ex3//o0/ctJbtvdj12x3jFJjpkmb57k39a0UFY6JMm5610EewRthR2hvbAjtBd2hPbCjtBe1t8Nu/vQlTP3XodCzkxy/bnp6yX50sqVuvvEJCeuVVFsW1Wd0t1HrHcdjE9bYUdoL+wI7YUdob2wI7SXca3HkOCPJLlpVf1QVf1AkockOWkd6gAAAGBga97D2t0XV9X/TvKOJHsleVl3f2qt6wAAAGBs6zEkON39tiRvW49js9MMz2ZR2go7QnthR2gv7AjthR2hvQxqzW+6BAAAAItYj2tYAQAAYLsE1g2oqi6pqo/PfR23m/b7L9P3TVX1yd2xT8a0ShvatN41sb7m2sQnq+r1VXUVvws2jtVe/x3cflNV/dKC6+32NlVVP7vlb2FVHV9Vv7m7j8HiVrSnN1fV1baz/qFV9aGq+lhV3XkJ9WgTe7Cqek5VHTs3/Y6q+vO56WdV1f/Zyrbvrip3Dl5nAuvGdGF3Hz73dcLu2Gl332F37Ic9wso2tHm9C2LdbWkThyX5XpJHr3dBrKldff03JdluYF2W7j5pd/0tZLeYb09fS/KY7ax/tySf6e7bdPd7FzlAVe21rWmuUP4lyR2SpKqulNnnrd5ybvkdkrx/HepiQQIr/62qNlfV71fVB6rqlKr6seldqP+sqkdP6xxQVe+qqo9W1elVdf+57b+5ftWznrbWLqrqyKo6rar2rar9q+pTVXXYetfL0r03yU2mx3tV1Uum1/6dVbVfklTV4VX1wal9vLGqDp7mv7uq/qCqPlxV/76lt6Sq9qqqP6qqj0zbPGp9To0FvDfJTarq6lX1pun1+mBV3SpJquqn5kZnfKyqDkxyQpI7T/N+Y+pJfe/0O+WjVbXNN0Sr6i5VdXJV/fXUbk6oqqOmdnR6Vd14Wu9+cz1x/1hV15rm/2pVPX/Jzws75wNJrpskVXXjqnp7VZ06tY8frqrDk/xhkntP7We/qrrH9L/MR6ce/wOm7TdX1VOr6n1JHrTK9COn3zGfqKo31A6OFGBY788UWDMLqp9MckFVHVxVV07yI0ky/Q45dfrf99pz2z+sqv6lZj3+t13b0kkE1o1qv7rscM4Hzy37QnffPrN/OF6e5IFJbpfk6dPy7yT5ue7+sSR3TfKsqqo1rJ0xzLehN2Yr7aK7P5LZ5yw/I7N/KP6yuw0RvQKrqr2T3CvJ6dOsmyZ5QXffMsl5SX5hmv/KJL/d3bea1v3dud3s3d23TXLs3PxHJPlGdx+Z5Mgkj6yqH1riqbATVrz+T0vysek1flJmr3mS/GaSx3T34UnunOTCJMclee/Uq/acJGcnufv0O+XBSf50gcPfOsnjk/xokl9OcrOpHf15ksdO67wvye26+zZJXpvkt3btjFmmmvV63i2zvyPJ7C6uj+3uH8+sHb2wuz+e5KlJXje1qf2TPDnJT0/t55Qk88M9v9Pdd+ru164y/bfdfWR33zrJpzP7vcMerru/lOTiqrpBZsH1A0k+lOT2SY7I7LV+TpIHTm3rZUmeObeL/adRhP9rWsYaW5ePtWHdXTj9Ul/Nlj8Kpyc5oLsvyOxdqO/U7BqSbyX5/ar6ySTfz+xdz2sl+fJyS2Ywl2lDVbVPtt4unp7kI5mF2setfamskf2q6uPT4/cmeWmS6yQ5Y/qHMklOTbKpqq6a5GrdffI0/xVJXj+3r7+dX396fI8kt6qqB07TV80sDJ+xe0+DnbTa6/+hTG9QdPc/VdU1ptf+/UmeXVWvziwgnLnK+577JHn+1Ht2SZKbLVDDR7r7rCSpqv9M8s5p/umZvZGWJNdL8rqp9+QHov2Makt72pTZ74F/mHpJ75Dk9XPt5cqrbHu7JLdI8v5pvR/ILKBs8boV689PH1ZVz0hytSQHJHnHrpwEQ9nSy3qHJM/O7P+UOyT5RpIvZvY35h+mNrNXkrPmtn1NknT3e6rqoKq6Wneft3alI7Cy0nen79+fe7xleu8kRyU5NMmPd/dFVbU5yb5rWiEj2la7uHpmf/j3meZ9a10qZNku90bY9Id//vfIJUn2W2BfW7a5JJf+narMelb8Azmm1V7/1UbfdHefUFVvTXLvJB+sqp9eZb3fSPKVzHpNr5TZG17bs/Jv1vzfsy3t6HlJnt3dJ1XVXZIcv8B+WXsXdvfh0xscb8nsGtaXJzlvG2+4b1FJ/qG7H7qV5Sv/Bs1PvzzJA7r7E1X1q0nusmNlM7At17H+aGZDgr+Q5AlJzk/yT0muO40wXM3KzwD1maBrzJBgdtRVk5w9hZK7JrnhehfEELbVLk5M8pQkr07yB+tRHGPp7m8k+XpdejfPX05y8jY2SWY9Hf9z6s1PVd2sqvZfYpnsuvdk9mZWpnB4bnefX1U37u7Tu/sPMhuu+cNJLkhy4Ny2V01yVnd/P7P2sbtuiHPVzHpTkuTo3bRPlmT6XfG4zIb/XpjkjKp6UDJ7Q6Sqbr3KZh9Mcsequsm03lWqapEe+mTWBs+afs8ctcsnwEjen+S+Sb7W3Zd099cy60m/fWa97IdW1e2T2aixqpq/KdODp/l3yuzSlG+saeXoYd2g5oduJcnbu3vRj7Z5dZI3V9UpST6e5DO7uTb2TKu2i6r6lSQXd/dfTdci/UtV/Y/u/qf1K5VBHJ3kxdNNTT6X5OHbWf/PMxse+NGp5+6cJA9YZoHssuOT/EVVnZbk27k0IB47vbF1SZJ/TfL3mfWCXlxVn8isl+uFSd4whZN/zu4bmXF8ZkNKv5hZsHEd9OC6+2NTu3hIZiHyRVX15MxG7bw2ySdWrH/O1Dv6mumGOsnsmtZ/X+BwT8lsKPvnMxtKfuC2V2cPcnpmdwf+qxXzDujus6fLTf506tXfO8mfJPnUtN7Xa/bRjQcl+bW1K5ktqluvNgAAAOMxJBgAAIAhCawAAAAMSWAFAABgSAIrAAAAQxJYAQAAGJLACgC7QVV1Vb1qbnrvqjqnqt6yg/t5d1UdMT1+W1VdbTeXCgB7DJ/DCgC7x7eSHFZV+3X3hUnunuSLu7LD7r73bqkMAPZQelgBYPf5+yT3mR4/NMlrtiyoqv2r6mVV9ZGq+lhV3X+av19VvbaqTquq1yXZb26bzVV1SFVtqqpPzs3/zao6fnr87qp6TlW9p6o+XVVHVtXfVtVnq+oZa3DOALA0AisA7D6vTfKQqto3ya2SfGhu2e8k+afuPjLJXZP8UVXtn+R/Jvl2d98qyTOT/PhOHPd73f2TSV6c5O+SPCbJYUl+taqusdNnAwDrzJBgANhNuvu0qtqUWe/q21YsvkeSn62q35ym901ygyQ/meRP57Y/bScOfdL0/fQkn+rus5Kkqj6X5PpJvroT+wSAdSewAsDudVKSP05ylyTzvZuV5Be6+9/mV66qJOnt7PPiXHZU1L4rln93+v79ucdbpv2tB2CPZUgwAOxeL0vy9O4+fcX8dyR5bE0JtapuM81/T5KjpnmHZTaUeKWvJLlmVV2jqq6c5L5LqRwABiOwAsBu1N1ndvdzV1n0e0n2SXLadAOl35vmvyjJAdNQ4N9K8uFV9nlRkqdndk3sW5J8Zhm1A8Boqnt7o5AAAABg7elhBQAAYEgCKwAAAEMSWAEAABiSwAoAAMCQBFYAAACGJLACAAAwJIEVAACAIQmsAAAADOn/AzjDS5+ZZsZkAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig6, ax6 = plt.subplots(figsize=(16, 9))\n", + "ax6.bar(disp_med['submitted_via'], disp_med['dispute_rate'])\n", + "\n", + "ax6.set_title(\"Disputed issue rates per submission medium\")\n", + "ax6.set_xlabel('Medium')\n", + "ax6.set_ylabel('Disputed issue rate')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "52069de3", + "metadata": {}, + "outputs": [], + "source": [ + "df_by_med = df[['submitted_via', 'timely_response']].groupby(['submitted_via', 'timely_response']).size().reset_index()\n", + "df_by_med = df_by_med[df_by_med['timely_response'] == 1]\n", + "df_by_med.columns = ['submitted_via', 'was timely_response', 'count']" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "06157da5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'Timely response count')" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA8YAAAImCAYAAACRopP3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAA56UlEQVR4nO3de7xtZV0v/s83QCEV5KZHgcS85EGOUSJesrIoMK2wDh4xL1gW5sHSU57CbhhKYSfll8ejHRMUSQXykqQSkuQlL8hWUcRLkOIBQUE3Iqmg4Pf3xxgr516svfbasOdae+/xfr9e87XGfMZ4xnzGWGNdPvN5xjOruwMAAABT9X1r3QAAAABYS4IxAAAAkyYYAwAAMGmCMQAAAJMmGAMAADBpgjEAAACTJhgDTExVvaaqXrhGr11V9eqquq6qPrzCOn9dVX88h7bsX1VdVTtu6X2zdqrq8qr6mS20rz+oqlfdjvpPqqp3bom2rKbZc3h7zwHAtsI/AwBrrKouT7JLkh/s7m+MZb+e5Mnd/ag1bNo8PDLJzybZd+FYZ1XV05L8enc/cqGsu39z9ZoH39Pdf3Y7678uyeu2UHPWxO09BwDbCj3GAFuHHZM8e60bsbmqaofNrHKvJJcvFYq3VWMvuL+nt5HzB8DWwB8igK3D/0ry3Kq66+IVSw35rap3j73KqaqnVdX7q+rkqvpaVX2uqh4xll9RVddU1dGLdrtXVZ1XVTdU1Xuq6l4z+37AuG59VX22qv7bzLrXVNUrquodVfWNJD+1RHvvWVVnj/Uvq6rfGMufnuRVSR5eVf9eVX+6qN5/TvLXM+u/NvOaLxyXH1VVV1bV743HdXVVPa6qHlNV/zq+5h/M7PP7quq4qvq3qvpqVZ1VVXss0ebHV9VHFpX9blX9/eJtZ87/iVX1/iTfTPKDmzhvj6mqT43n+4tV9dxFx/MHVfWVcQjrk2bq7VZVr62qa6vqC1X1Rwshcvz+/ktV/eU4NP3zVfVzM3WfNl4LN4zrZvf7a1X16bHeuQvf/zGknjye2+ur6hNVdeAy5+DPq+rD47ZvnT23VfWwqvrAeE1+vKoetdz5W2L/vz+eqxvG83noWL7BrQAL53BR9YeM5/u6Gobu77zofK/0+nl+Vf3tuLxzVf3teB19raourKq7L3euF75HM/t7xFjv+vHrIxadkxfU8LN8Q1W9s6r22si536I/B1X1lPH6+mpV/eGi15o9B7c617XhsOvnV9Xfjefphqq6uKruX1XPG9t5RVUdttQxAaw1wRhg67AuybuTPPc21n9okk8k2TPJ65OckeQhSe6b5MlJXlZVd57Z/klJXpBkryQXZRzuWVV3SnLeuI+7JXlikpdX1QNn6v5KkhOT3CXJv+TW3pDkyiT3THJkkj+rqkO7+5Qkv5nkg9195+4+frZSd3960fq7buRY/1OSnZPsk+RPkvzNeIwPTvLjSf6kqhaC1m8neVySnxzbc12S/7PEPs9Ocu8awvmCJyc5fSNtSJKnJDkmw3m4Nsuft1OSPKO775LkwCTnLzqevcbjOTrJK6vqh8Z1/zvJbhmC408meWqSX52p+9Aknx3r/0WSU8Zwe6ckL03yc+NrPiLD9zlV9bgkf5Dkl5PsneR9Gb5nSXJYkp9Icv8kd03yhCRfXeYcPDXJr2U4tzePr5mq2ifJ25O8MMkeGa7rN1XV3hs5f1+Y3el4/M9K8pCx/YcnuXyZdiz2pLHOfcZj+aOZdZtz/cw6OsP3Yr8MP2e/meRby53rRce0R4Zz8tKx/kuSvL2q9pzZ7FcyfH/vluQOWf73wRb5OaiqA5K8IsP3455j2/Zd5nU35Rcy/NzsnuRjSc7N8P/mPklOSPJ/b8e+AeZGMAbYevxJkt9aFB5W6vPd/eruviXJmRn+eT+hu2/q7ncm+XaGkLzg7d393u6+KckfZuil3S/Jz2cY6vzq7r65uz+a5E0ZAu6Ct3b3+7v7u91942wjxn08Msnvd/eN3X1Rhl7ip9yGY9qY7yQ5sbu/k+ENgL2S/FV339DdlyS5JMmDxm2fkeQPu/vK8Vifn+TIWjTh1rjuzAzBImOg3T/J25Zpx2u6+5LuvjnJo7P8eftOkgOqatfuvm5cP+uPx+/VezKEp/9WwzD1JyR53nhslyd5cTY8l1/o7r8Zv++nJblHkruP676b5MCq2qW7rx7PzcI5+fPu/vTY9j9LclANvcbfyRBUH5Ckxm2uXuYcnN7dnxyHxv/xTLufnOQd3f2O8To5L8ObP49Z6vyN38tZtyS543jOduruy7v735Zpx2Iv6+4runt9hjdxnjizbnOunyyqt2eS+3b3Ld39ke7++rhuY+d61mOTXNrdp4/H/IYkn8kQJBe8urv/tbu/leSsJActc4xb6ufgyCRvm/l98Mfj8dxW7+vuc8dr6+8yvPly0kw7968lRsYArDXBGGAr0d2fzBDEjrsN1b88s/ytcX+Ly2Z7jK+Yed1/T7I+Q2/RvZI8dBwq+rUahjM/KUPv1K3qLuGeSdZ39w0zZV/I0Fu0pXx1DILJeKy59fEvHOu9krxl5lg+nSF03T23dlqSX6mqyhA+zxqDwsbMnodNnbf/miEUfqGGoesPn6l73aJ7rr+Q4TzulaHX8AuL1s2eyy8tLHT3N8fFO4/7e0KGXs2rq+rtVfWAmbb+1Uw71yepJPt09/lJXpahN/HLVfXKqtp1hefgC0l2Gtt9rySPX3Q+HpkhuC9VdwPdfVmS52QIcNdU1RlVdc9l2rGpds3W3ZzrZ9bpGXo/z6iqq6rqL8bQvty5nnXPLOoZzzLfzwxDzJdqx205juV+Du6ZDX8ffCPLjxLYlMVt+MoS7VzuuADWhGAMsHU5PslvZMN/lhdC0/fPlM0G1dtiv4WFcYj1HkmuyvAP8nu6+64zjzt39zNn6vYy+70qyR5VdZeZsh9I8sUVtmu5fd8WV2QY4jp7PDt3963a090fytCz/uMZhrQuN4x6cVuXPW/dfWF3H5FhiOzfZ+gNXLD7OBx3wQ9kOI9fydAreK9F61Z0Lsdeu5/NEEY/k2Go7UJbn7Gorbt09wfGei/t7gcneWCGYcj/c5mX2W9m+QfG9n5lfI3TF73Gnbr7pNkmbqL9r+9hdvJ7jdu+aFz1jWz6Z2Fxu65a7rVWoru/091/2t0HZBgu/fMZhpIvd65nXZUNv5cLbVvpz8btsdzPwdXZ8PfB92foGV/KBud+HB1wW0a4AGx1BGOArcjYU3ZmhnsCF8quzfDP85Oraoeq+rUM907eHo+pqkdW1R0y3Gt8QXdfkaHH+v7jZDw7jY+HLLr3drn2X5HkA0n+vIbJih6U5OlZ+UfWfDnJvmO7toS/TnJifW9yqb2r6ohltn9thh7Tm7t7qfunN2aj562q7lDD59nuNg4n/XqG3rpZfzpu9+MZAtffjb1sZ43tv8t4DL+T5G831ZiquntV/eIYuG9K8u8zr/nXSZ63cP9zDRN8PX5cfkhVPbSqdsoQgm5coq2znlxVB4xh6oQkbxzb/bdJfqGqDh+v2Z1rmLhpRfeuVtUPVdVPV9UdxzZ8a6YdF2W4fveoqv+UoWd5sWOrat/xvt4/yPAzdbtU1U9V1X8Zw+DXM7wJcMsmzvWsd2S4Rn6lqnasqickOSDLD9ffUpb7OXhjkp+f+X1wQjb+/+G/Jtm5qh47XiN/lGHIO8A2TzAG2PqckOROi8p+I0PP3Vcz9OR94Ha+xusz9E6vzzBZz5OSZBwCfViSozL0cH0pQ0/d5vzz+8QM9+deleQtSY4f7zFdifMz3Bv5par6yma85sb8VYaJtd5ZVTck+VCGCas25vQMk2Ntqrd4Ays4b09JcnlVfT3DkNsnz1T/UobJkK7K8AbCb3b3Z8Z1v5UhoH4uw0Rnr09y6gqa9H1Jfnfc5/oMky7997GtbxnbdsbYnk8mWZjNetcMvZ3XZRjm+9Ukf7nM65ye5DXjMeyc8Q2d8Q2SIzKE0msz9Fj+z6z8/447JjkpQ+/zlzL0tC/Msnx6ko9nmIzrnVk69L5+XPe58fHCJbbZXP8pQ4j8eoahyO/J8AbARs/1rO7+aoY3PX43w3n9vSQ/391b4jrflI3+HIz3Ix+b4ZxdneF7v3iW74zbXp/h2F6V4c26b2xsW4BtTXVv6VFrALBtqqpdklyT5Ee7+9JVeL1HJfnb7r49swCviap6d4a2v2qt2wIAt5ceYwD4nmcmuXA1QjEAsPXYcdObAMD2r6ouzzA78+PWtiUAwGozlBoAAIBJM5QaAACASROMAQAAmDT3GI/22muv3n///de6GQAAAMzBRz7yka90995LrROMR/vvv3/WrVu31s0AAABgDqrqCxtbZyg1AAAAkyYYAwAAMGmCMQAAAJMmGAMAADBpgjEAAACTJhgDAAAwaYIxAAAAkyYYAwAAMGmCMQAAAJMmGAMAADBpgjEAAACTJhgDAAAwaYIxAAAAkyYYAwAAMGmCMQAAAJMmGAMAADBpgjEAAACTJhgDAAAwaYIxAAAAkza3YFxVO1fVh6vq41V1SVX96Vj+/Kr6YlVdND4eM1PneVV1WVV9tqoOnyl/cFVdPK57aVXVWH7HqjpzLL+gqvafqXN0VV06Po6e13ECAACwbdtxjvu+KclPd/e/V9VOSf6lqs4Z153c3X85u3FVHZDkqCQPTHLPJP9UVffv7luSvCLJMUk+lOQdSR6d5JwkT09yXXfft6qOSvKiJE+oqj2SHJ/k4CSd5CNVdXZ3XzfH4wUAAGAbNLdg3N2d5N/HpzuNj16myhFJzujum5J8vqouS3JIVV2eZNfu/mCSVNVrkzwuQzA+Isnzx/pvTPKysTf58CTndff6sc55GcL0G7bU8QEAAFvW/se9fa2bwO1w+UmPXesm3GZzvce4qnaoqouSXJMhqF4wrnpWVX2iqk6tqt3Hsn2SXDFT/cqxbJ9xeXH5BnW6++Yk1yfZc5l9AQAAwAbmGoy7+5buPijJvhl6fw/MMCz6PkkOSnJ1khePm9dSu1im/LbW+Q9VdUxVrauqdddee+0yRwIAAMD2alVmpe7uryV5d5JHd/eXx8D83SR/k+SQcbMrk+w3U23fJFeN5fsuUb5BnaraMcluSdYvs6/F7Xpldx/c3Qfvvffet+cQAQAA2EbNc1bqvavqruPyLkl+JslnquoeM5v9UpJPjstnJzlqnGn63knul+TD3X11khuq6mHj/cNPTfLWmToLM04fmeT88d7mc5McVlW7j0O1DxvLAAAAYAPznJX6HklOq6odMgTws7r7bVV1elUdlGFo8+VJnpEk3X1JVZ2V5FNJbk5y7DgjdZI8M8lrkuySYdKthdmtT0ly+jhR1/oMs1qnu9dX1QuSXDhud8LCRFwAAAAwa56zUn8iyY8sUf6UZeqcmOTEJcrXJTlwifIbkzx+I/s6Ncmpm9FkAAAAJmhV7jEGAACArZVgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkzS0YV9XOVfXhqvp4VV1SVX86lu9RVedV1aXj191n6jyvqi6rqs9W1eEz5Q+uqovHdS+tqhrL71hVZ47lF1TV/jN1jh5f49KqOnpexwkAAMC2bZ49xjcl+enu/uEkByV5dFU9LMlxSd7V3fdL8q7xearqgCRHJXlgkkcneXlV7TDu6xVJjklyv/Hx6LH86Umu6+77Jjk5yYvGfe2R5PgkD01ySJLjZwM4AAAALJhbMO7Bv49PdxofneSIJKeN5acledy4fESSM7r7pu7+fJLLkhxSVfdIsmt3f7C7O8lrF9VZ2Ncbkxw69iYfnuS87l7f3dclOS/fC9MAAADwH+Z6j3FV7VBVFyW5JkNQvSDJ3bv76iQZv95t3HyfJFfMVL9yLNtnXF5cvkGd7r45yfVJ9lxmX4vbd0xVrauqdddee+3tOFIAAAC2VXMNxt19S3cflGTfDL2/By6zeS21i2XKb2ud2fa9srsP7u6D995772WaBgAAwPZqVWal7u6vJXl3huHMXx6HR2f8es242ZVJ9puptm+Sq8byfZco36BOVe2YZLck65fZFwAAAGxgnrNS711Vdx2Xd0nyM0k+k+TsJAuzRB+d5K3j8tlJjhpnmr53hkm2PjwOt76hqh423j/81EV1FvZ1ZJLzx/uQz01yWFXtPk66ddhYBgAAABvYcY77vkeS08aZpb8vyVnd/baq+mCSs6rq6Un+X5LHJ0l3X1JVZyX5VJKbkxzb3beM+3pmktck2SXJOeMjSU5JcnpVXZahp/iocV/rq+oFSS4ctzuhu9fP8VgBAADYRs0tGHf3J5L8yBLlX01y6EbqnJjkxCXK1yW51f3J3X1jxmC9xLpTk5y6ea0GAABgalblHmMAAADYWgnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKTNLRhX1X5V9c9V9emquqSqnj2WP7+qvlhVF42Px8zUeV5VXVZVn62qw2fKH1xVF4/rXlpVNZbfsarOHMsvqKr9Z+ocXVWXjo+j53WcAAAAbNt2nOO+b07yu9390aq6S5KPVNV547qTu/svZzeuqgOSHJXkgUnumeSfqur+3X1LklckOSbJh5K8I8mjk5yT5OlJruvu+1bVUUlelOQJVbVHkuOTHJykx9c+u7uvm+PxAgAAsA2aW49xd1/d3R8dl29I8ukk+yxT5YgkZ3T3Td39+SSXJTmkqu6RZNfu/mB3d5LXJnncTJ3TxuU3Jjl07E0+PMl53b1+DMPnZQjTAAAAsIFVucd4HOL8I0kuGIueVVWfqKpTq2r3sWyfJFfMVLtyLNtnXF5cvkGd7r45yfVJ9lxmX4vbdUxVrauqdddee+1tP0AAAAC2WXMPxlV15yRvSvKc7v56hmHR90lyUJKrk7x4YdMlqvcy5be1zvcKul/Z3Qd398F77733cocBAADAdmquwbiqdsoQil/X3W9Oku7+cnff0t3fTfI3SQ4ZN78yyX4z1fdNctVYvu8S5RvUqaodk+yWZP0y+wIAAIANzHNW6kpySpJPd/dLZsrvMbPZLyX55Lh8dpKjxpmm753kfkk+3N1XJ7mhqh427vOpSd46U2dhxukjk5w/3od8bpLDqmr3caj2YWMZAAAAbGCes1L/WJKnJLm4qi4ay/4gyROr6qAMQ5svT/KMJOnuS6rqrCSfyjCj9bHjjNRJ8swkr0myS4bZqM8Zy09JcnpVXZahp/iocV/rq+oFSS4ctzuhu9fP5SgBAADYps0tGHf3v2Tpe33fsUydE5OcuET5uiQHLlF+Y5LHb2RfpyY5daXtBQAAYJpWZVZqAAAA2FoJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEzaJoNxVT17JWUAAACwLVpJj/HRS5Q9bQu3AwAAANbEjhtbUVVPTPIrSe5dVWfPrLpLkq/Ou2EAAACwGjYajJN8IMnVSfZK8uKZ8huSfGKejQIAAIDVstFg3N1fSPKFJA9fveYAAADA6lrJ5Fu/XFWXVtX1VfX1qrqhqr6+Go0DAACAeVtuKPWCv0jyC9396Xk3BgAAAFbbSmal/rJQDAAAwPZqJT3G66rqzCR/n+SmhcLufvO8GgUAAACrZSXBeNck30xy2ExZJxGMAQAA2OZtMhh396+uRkMAAABgLWwyGFfVqzP0EG+gu39tLi0CAACAVbSSodRvm1neOckvJblqPs0BAACA1bWSodRvmn1eVW9I8k9zaxEAAACsopV8XNNi90vyA1u6IQAAALAWVnKP8Q0Z7jGu8euXkvz+nNsFAAAAq2IlQ6nvshoNAQAAgLWwksm3UlW/mOQnxqfv7u63Lbc9AAAAbCs2eY9xVZ2U5NlJPjU+nl1Vfz7vhgEAAMBqWEmP8WOSHNTd302SqjotyceSPG+eDQMAAIDVsNJZqe86s7zbHNoBAAAAa2IlPcZ/nuRjVfXPGWam/onoLQYAAGA7sZJZqd9QVe9O8pAMwfj3u/tL824YAAAArIaVTL71S0m+2d1nd/dbk9xYVY+be8sAAABgFazkHuPju/v6hSfd/bUkx8+tRQAAALCKVhKMl9pmRZ9/DAAAAFu7lQTjdVX1kqq6T1X9YFWdnOQj824YAAAArIaVBOPfSvLtJGcmOSvJt5IcO89GAQAAwGpZyazU30hy3Cq0BQAAAFbdSnqMAQAAYLslGAMAADBpcwvGVbVfVf1zVX26qi6pqmeP5XtU1XlVden4dfeZOs+rqsuq6rNVdfhM+YOr6uJx3UurqsbyO1bVmWP5BVW1/0ydo8fXuLSqjp7XcQIAALBt22Qwrqr7V9W7quqT4/MHVdUfrWDfNyf53e7+z0keluTYqjogw/3K7+ru+yV51/g847qjkjwwyaOTvLyqdhj39YokxyS53/h49Fj+9CTXdfd9k5yc5EXjvvbI8FnLD01ySJLjZwM4AAAALFhJj/HfJHleku8kSXd/IkOAXVZ3X93dHx2Xb0jy6ST7JDkiyWnjZqcledy4fESSM7r7pu7+fJLLkhxSVfdIsmt3f7C7O8lrF9VZ2Ncbkxw69iYfnuS87l7f3dclOS/fC9MAAADwH1YSjL+/uz+8qOzmzXmRcYjzjyS5IMndu/vqZAjPSe42brZPkitmql05lu0zLi8u36BOd9+c5Pokey6zLwAAANjASoLxV6rqPkk6SarqyCRXr/QFqurOSd6U5Dnd/fXlNl2irJcpv611Ztt2TFWtq6p111577TJNAwAAYHu1kmB8bJL/m+QBVfXFJM9J8syV7LyqdsoQil/X3W8ei788Do/O+PWasfzKJPvNVN83yVVj+b5LlG9Qp6p2TLJbkvXL7GsD3f3K7j64uw/ee++9V3JIAAAAbGc2GYy7+3Pd/TNJ9k7ygO5+ZHdfvql6472+pyT5dHe/ZGbV2UkWZok+OslbZ8qPGmeavneGSbY+PA63vqGqHjbu86mL6izs68gk54/3IZ+b5LCq2n2cdOuwsQwAAAA2sJJZqZ9dVbsm+WaSk6vqo1V12Ar2/WNJnpLkp6vqovHxmCQnJfnZqro0yc+Oz9PdlyQ5K8mnkvxjkmO7+5ZxX89M8qoME3L9W5JzxvJTkuxZVZcl+Z2MM1x39/okL0hy4fg4YSwDAACADey4gm1+rbv/avxc4bsl+dUkr07yzuUqdfe/ZOl7fZPk0I3UOTHJiUuUr0ty4BLlNyZ5/Eb2dWqSU5drIwAAAKzkHuOFcPuYJK/u7o9n44EXAAAAtikrCcYfqap3ZgjG51bVXZJ8d77NAgAAgNWxkqHUT09yUJLPdfc3q2rPDMOpAQAAYJu3yWDc3d+tqi8nOWD8SCQAAADYbmwy6FbVi5I8IcNs0QuzRHeS986xXQAAALAqVtID/LgkP9TdN825LQAAALDqVjL51ueS7DTvhgAAAMBaWEmP8TeTXFRV70ryH73G3f3bc2sVAAAArJKVBOOzxwcAAABsd1YyK/VpVXWHJPcfiz7b3d+Zb7MAAABgdaxkVupHJTktyeVJKsl+VXV0d5uVGgAAgG3eSoZSvzjJYd392SSpqvsneUOSB8+zYQAAALAaVjIr9U4LoThJuvtfY5ZqAAAAthMr6TFeV1WnJDl9fP6kJB+ZX5MAAABg9awkGD8zybFJfjvDPcbvTfLyeTYKAAAAVstKZqW+qapeluRdSb6bYVbqb8+9ZQAAALAKVjIr9WOT/HWSf8vQY3zvqnpGd58z78YBAADAvK10Vuqf6u7LkqSq7pPk7UkEYwAAALZ5K5mV+pqFUDz6XJJr5tQeAAAAWFUr6TG+pKrekeSsJJ3k8UkurKpfTpLufvMc2wcAAABztZJgvHOSLyf5yfH5tUn2SPILGYKyYAwAAMA2ayWzUv/qajQEAAAA1sIm7zGuqr+oql2raqeqeldVfaWqnrwajQMAAIB5W8nkW4d199eT/HySK5PcP8n/nGurAAAAYJWsJBjvNH59TJI3dPf6ObYHAAAAVtVKJt/6h6r6TJJvJfnvVbV3khvn2ywAAABYHZvsMe7u45I8PMnB3f2dJN9McsS8GwYAAACrYSWTb31/kmOTvGIsumeSg+fZKAAAAFgtK7nH+NVJvp3kEePzK5O8cG4tAgAAgFW0kmB8n+7+iyTfSZLu/laSmmurAAAAYJWsJBh/u6p2SdJJUlX3SXLTXFsFAAAAq2Qls1Ifn+Qfk+xXVa9L8mNJnjbPRgEAAMBqWTYYV9X3Jdk9yS8neViGIdTP7u6vrELbAAAAYO6WDcbd/d2qelZ3n5Xk7avUJgAAAFg1K7nH+Lyqem5V7VdVeyw85t4yAAAAWAUrucf418avx86UdZIf3PLNAQAAgNW1yWDc3fdejYYAAADAWljJUGoAAADYbgnGAAAATJpgDAAAwKRtMhhX1Zuq6rHjZxoDAADAdmUlYfcVSX4lyaVVdVJVPWDObQIAAIBVs8lg3N3/1N1PSvKjSS7P8LnGH6iqX62qnebdQAAAAJinFQ2Prqo9kzwtya8n+ViSv8oQlM+bW8sAAABgFWzyc4yr6s1JHpDk9CS/0N1Xj6vOrKp182wcAAAAzNsmg3GSl3X3+Uut6O6Dt3B7AAAAYFVtNBhX1S8vtbygu988r0YBAADAalmux/gXllnXSQRjAAAAtnkbDcbd/aur2RAAAABYC5uclbqq7l5Vp1TVOePzA6rq6fNvGgAAAMzfSj6u6TVJzk1yz/H5vyZ5zpzaAwAAAKtqJcF4r+4+K8l3k6S7b05yy1xbBQAAAKtkJcH4G1W1Z4YJt1JVD0ty/VxbBQAAAKtkJZ9j/DtJzk5yn6p6f5K9kxw511YBAADAKtlkMO7uj1bVTyb5oSSV5LPd/Z25twwAAABWwSaDcVXtkOQxSfYftz+sqtLdL5lz2wAAAGDuVjKU+h+S3Jjk4owTcAEAAMD2YiWTb+3b3b/c3cd3958uPDZVqapOraprquqTM2XPr6ovVtVF4+MxM+ueV1WXVdVnq+rwmfIHV9XF47qXVlWN5XesqjPH8guqav+ZOkdX1aXj4+iVngwAAACmZyXB+JyqOuw27Ps1SR69RPnJ3X3Q+HhHklTVAUmOSvLAsc7LxyHcSfKKJMckud/4WNjn05Nc1933TXJykheN+9ojyfFJHprkkCTHV9Xut6H9AAAATMBKgvGHkrylqr5VVV+vqhuq6uubqtTd702yfoXtOCLJGd19U3d/PsllSQ6pqnsk2bW7P9jdneS1SR43U+e0cfmNSQ4de5MPT3Jed6/v7uuSnJelAzoAAACsKBi/OMnDk3x/d+/a3Xfp7l1vx2s+q6o+MQ61XujJ3SfJFTPbXDmW7TMuLy7foE5335zhs5X3XGZfAAAAcCsrCcaXJvnk2GN7e70iyX2SHJTk6gyhOxk+BmqxXqb8ttbZQFUdU1Xrqmrdtddeu0yzAQAA2F6tZFbqq5O8u6rOSXLTQuFt+bim7v7ywnJV/U2St41Pr0yy38ym+ya5aizfd4ny2TpXVtWOSXbLMHT7yiSPWlTn3RtpzyuTvDJJDj744C0R/AEAANjGrKTH+PNJ3pXkDknuMvPYbOM9wwt+KcnCjNVnJzlqnGn63hkm2fpwd1+d5Iaqeth4//BTk7x1ps7CjNNHJjl/7NU+N8NnLe8+DtU+bCwDAACAW9lkj/FKPpppKVX1hgw9t3tV1ZUZZop+VFUdlGFo8+VJnjG+xiVVdVaSTyW5Ocmx3X3LuKtnZpjhepck54yPJDklyelVdVmGnuKjxn2tr6oXJLlw3O6E7l7pJGAAAABMzEaDcVW9rLufVVX/kCXu0e3uX1xux939xCWKT1lm+xOTnLhE+bokBy5RfmOSx29kX6cmOXW59gEAAECyfI/xU5M8K8lfrlJbAAAAYNUtF4z/LUm6+z2r1BYAAABYdcsF472r6nc2tvK2zEoNAAAAW5vlgvEOSe6cpT8XGAAAALYLywXjq7v7hFVrCQAAAKyB5T7HWE8xAAAA273lgvGhq9YKAAAAWCMbDcbdvX41GwIAAABrYbkeYwAAANjuCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABM2o5r3QAAALZf+x/39rVuArfD5Sc9dq2bAKtCjzEAAACTJhgDAAAwaYIxAAAAkyYYAwAAMGmCMQAAAJM2t2BcVadW1TVV9cmZsj2q6ryqunT8uvvMuudV1WVV9dmqOnym/MFVdfG47qVVVWP5HavqzLH8gqraf6bO0eNrXFpVR8/rGAEAANj2zbPH+DVJHr2o7Lgk7+ru+yV51/g8VXVAkqOSPHCs8/Kq2mGs84okxyS53/hY2OfTk1zX3fdNcnKSF4372iPJ8UkemuSQJMfPBnAAAACYNbdg3N3vTbJ+UfERSU4bl09L8riZ8jO6+6bu/nySy5IcUlX3SLJrd3+wuzvJaxfVWdjXG5McOvYmH57kvO5e393XJTkvtw7oAAAAkGT17zG+e3dfnSTj17uN5fskuWJmuyvHsn3G5cXlG9Tp7puTXJ9kz2X2dStVdUxVrauqdddee+3tOCwAAAC2VVvL5Fu1RFkvU35b62xY2P3K7j64uw/ee++9V9RQAAAAti+rHYy/PA6Pzvj1mrH8yiT7zWy3b5KrxvJ9lyjfoE5V7ZhktwxDtze2LwAAALiV1Q7GZydZmCX66CRvnSk/apxp+t4ZJtn68Djc+oaqeth4//BTF9VZ2NeRSc4f70M+N8lhVbX7OOnWYWMZAAAA3MqO89pxVb0hyaOS7FVVV2aYKfqkJGdV1dOT/L8kj0+S7r6kqs5K8qkkNyc5trtvGXf1zAwzXO+S5JzxkSSnJDm9qi7L0FN81Liv9VX1giQXjtud0N2LJwEDAACAJHMMxt39xI2sOnQj25+Y5MQlytclOXCJ8hszBusl1p2a5NQVNxYAAIDJ2lom3wIAAIA1IRgDAAAwaYIxAAAAkyYYAwAAMGmCMQAAAJMmGAMAADBpgjEAAACTJhgDAAAwaYIxAAAAkyYYAwAAMGmCMQAAAJMmGAMAADBpgjEAAACTJhgDAAAwaYIxAAAAkyYYAwAAMGmCMQAAAJMmGAMAADBpgjEAAACTJhgDAAAwaYIxAAAAkyYYAwAAMGmCMQAAAJMmGAMAADBpgjEAAACTJhgDAAAwaYIxAAAAkyYYAwAAMGmCMQAAAJMmGAMAADBpgjEAAACTJhgDAAAwaYIxAAAAkyYYAwAAMGmCMQAAAJMmGAMAADBpgjEAAACTJhgDAAAwaYIxAAAAkyYYAwAAMGmCMQAAAJMmGAMAADBpgjEAAACTJhgDAAAwaYIxAAAAkyYYAwAAMGmCMQAAAJMmGAMAADBpgjEAAACTJhgDAAAwaYIxAAAAkyYYAwAAMGmCMQAAAJMmGAMAADBpgjEAAACTJhgDAAAwaYIxAAAAk7YmwbiqLq+qi6vqoqpaN5btUVXnVdWl49fdZ7Z/XlVdVlWfrarDZ8ofPO7nsqp6aVXVWH7HqjpzLL+gqvZf9YMEAABgm7CWPcY/1d0HdffB4/Pjkryru++X5F3j81TVAUmOSvLAJI9O8vKq2mGs84okxyS53/h49Fj+9CTXdfd9k5yc5EWrcDwAAABsg7amodRHJDltXD4tyeNmys/o7pu6+/NJLktySFXdI8mu3f3B7u4kr11UZ2Ffb0xy6EJvMgAAAMxaq2DcSd5ZVR+pqmPGsrt399VJMn6921i+T5IrZupeOZbtMy4vLt+gTnffnOT6JHsubkRVHVNV66pq3bXXXrtFDgwAAIBty45r9Lo/1t1XVdXdkpxXVZ9ZZtulenp7mfLl6mxY0P3KJK9MkoMPPvhW6wEAANj+rUmPcXdfNX69JslbkhyS5Mvj8OiMX68ZN78yyX4z1fdNctVYvu8S5RvUqaodk+yWZP08jgUAAIBt26oH46q6U1XdZWE5yWFJPpnk7CRHj5sdneSt4/LZSY4aZ5q+d4ZJtj48Dre+oaoeNt4//NRFdRb2dWSS88f7kAEAAGADazGU+u5J3jLOhbVjktd39z9W1YVJzqqqpyf5f0kenyTdfUlVnZXkU0luTnJsd98y7uuZSV6TZJck54yPJDklyelVdVmGnuKjVuPAAAAA2PasejDu7s8l+eElyr+a5NCN1DkxyYlLlK9LcuAS5TdmDNYAAACwnK3p45oAAABg1QnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKTtuNYNAGDt7X/c29e6CdwOl5/02LVuAgBs0/QYAwAAMGmCMQAAAJMmGAMAADBp7jEGADaLe9K3be5JB7g1PcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCk7bjWDQDmY//j3r7WTeB2uPykx651EwAAJkOPMQAAAJMmGAMAADBpgjEAAACTJhgDAAAwaYIxAAAAkyYYAwAAMGnbdTCuqkdX1Wer6rKqOm6t2wMAAMDWZ7sNxlW1Q5L/k+TnkhyQ5IlVdcDatgoAAICtzXYbjJMckuSy7v5cd387yRlJjljjNgEAALCV2XGtGzBH+yS5Yub5lUkeukZt2SL2P+7ta90EbofLT3rsWjcBAABYQnX3WrdhLqrq8UkO7+5fH58/Jckh3f1bM9sck+SY8ekPJfnsqjeUWXsl+cpaN4JthuuFzeF6YXO4Xtgcrhc2h+tlbd2ru/deasX23GN8ZZL9Zp7vm+Sq2Q26+5VJXrmajWLjqmpddx+81u1g2+B6YXO4Xtgcrhc2h+uFzeF62Xptz/cYX5jkflV176q6Q5Kjkpy9xm0CAABgK7Pd9hh3981V9awk5ybZIcmp3X3JGjcLAACArcx2G4yTpLvfkeQda90OVsywdjaH64XN4Xphc7he2ByuFzaH62Urtd1OvgUAAAArsT3fYwwAAACbJBgzF1V1S1VdNPM4bgvt9wPj1/2r6pNbYp9svZa4jvZf6zaxdmauh09W1d9V1ff7XTAtS10Dm1l//6r6lRVut8Wvq6r6xYW/h1X1/Kp67pZ+DVZm0bX0D1V1101sv3dVXVBVH6uqH59De1wP27CqOrmqnjPz/NyqetXM8xdX1e9spO67q8os1VsBwZh5+VZ3HzTzOGlL7LS7H7El9sM2Y/F1dPlaN4g1tXA9HJjk20l+c60bxKq7vdfA/kk2GYznpbvP3lJ/D7ndZq+l9UmO3cT2hyb5THf/SHe/byUvUFU7LPec7coHkjwiSarq+zJ8VvEDZ9Y/Isn716BdbAbBmFVVVZdX1Z9V1Qeral1V/ej4rtq/VdVvjtvcuareVVUfraqLq+qImfr/vnatZ61t7NqoqodU1SeqauequlNVXVJVB651e5mr9yW577i8Q1X9zfh9f2dV7ZIkVXVQVX1ovDbeUlW7j+XvrqoXVdWHq+pfF3p/qmqHqvpfVXXhWOcZa3NorND7kty3qvaoqr8fv2cfqqoHJUlV/eTMaJOPVdVdkpyU5MfHsv8x9gy/b/yd8tGqWvbN16p6VFW9p6rOGq+dk6rqSeO1dHFV3Wfc7hdmehf/qaruPpY/rapeNufzwub7YJJ9kqSq7lNV/1hVHxmvjQdU1UFJ/iLJY8ZrZ5eqOmz8X+aj4+iFO4/1L6+qP6mqf0ny+CWe/8b4O+bjVfWm2sxRD2y13p8xGGcIxJ9MckNV7V5Vd0zyn5Nk/P3xkfF/33vM1H9yVX2ghhEMh6xu01kgGDMvu9SGQ2CfMLPuiu5+eIZ/al6T5MgkD0tywrj+xiS/1N0/muSnkry4qmoV287WY/Y6eks2cm1094UZPqf8hRn+efnb7ja8djtVVTsm+bkkF49F90vyf7r7gUm+luS/juWvTfL73f2gcdvjZ3azY3cfkuQ5M+VPT3J9dz8kyUOS/EZV3XuOh8JttOga+NMkHxu/z3+Q4fueJM9Ncmx3H5Tkx5N8K8lxSd439hSenOSaJD87/k55QpKXruDlfzjJs5P8lyRPSXL/8Vp6VZLfGrf5lyQP6+4fSXJGkt+7fUfMvNTQi3tohr8hyTBj8G9194MzXEMv7+6LkvxJkjPH6+lOSf4oyc+M1866JLPDZG/s7kd29xlLPH9zdz+ku384yacz/N5hG9fdVyW5uap+IENA/mCSC5I8PMnBGb7XJyc5cry2Tk1y4swu7jSOivzv4zrWwHb9cU2sqW+NfzyWsvDH5+Ikd+7uGzK8q3ZjDff4fCPJn1XVTyT5boZ3ce+e5EvzbTJboQ2uo6raKRu/Nk5IcmGG8Pzbq99UVsEuVXXRuPy+JKckuWeSz4//uCbJR5LsX1W7Jblrd79nLD8tyd/N7OvNs9uPy4cleVBVHTk+3y1D6P78lj0MboelroELMr4Z0t3nV9We4/f//UleUlWvyxBGrlziPdadkrxs7BG8Jcn9V9CGC7v76iSpqn9L8s6x/OIMb9glyb5Jzhx7hO4Q19DWaOFa2j/D74Hzxl7fRyT5u5lr5Y5L1H1YkgOSvH/c7g4ZgtCCMxdtP/v8wKp6YZK7JrlzknNvz0GwVVnoNX5Ekpdk+B/lEUmuT/LFDH9jzhuvmR2SXD1T9w1J0t3vrapdq+qu3f211Ws6iWDM2rhp/PrdmeWF5zsmeVKSvZM8uLu/U1WXJ9l5VVvI1mq5a2OPDP9k7DSWfWNNWsg83eoNt/EfjNnfI7ck2WUF+1qoc0u+97ewMvQU+Ud167XUNbDUiKLu7pOq6u1JHpPkQ1X1M0ts9z+SfDlDL/D3ZXhjbVMW/92a/Zu2cC397yQv6e6zq+pRSZ6/gv2yur7V3QeNb6K8LcM9xq9J8rVl3thfUEnO6+4nbmT94r8/s89fk+Rx3f3xqnpakkdtXrPZii3cZ/xfMgylviLJ7yb5epLzk+wzjphcyuLPz/V5umvAUGq2RrsluWYMPj+V5F5r3SC2GstdG69M8sdJXpfkRWvROLYe3X19kuvqe7PHPiXJe5apkgw9N88cRyakqu5fVXeaYzPZMt6b4U2zjCH0K9399aq6T3df3N0vyjDU9QFJbkhyl5m6uyW5uru/m+Ea2VKTI+2WoYcoSY7eQvtkDsbfFb+dYdj0t5J8vqoenwxvulTVDy9R7UNJfqyq7jtu9/1VtZLRBslw/V09/p550u0+ALYm70/y80nWd/ct3b0+w8iAh2cYNbB3VT08GUbAVdXs5FxPGMsfmeGWnutXteUk0WPM/MwOd0uSf+zulX5k0+uS/ENVrUtyUZLPbOG2se1a8tqoqqcmubm7Xz/eL/aBqvrp7j5/7ZrKVuDoJH89Tm7zuSS/uontX5VhWOVHx17Ia5M8bp4NZIt4fpJXV9Unknwz3wuizxnfQLslyaeSnJOhV/fmqvp4hp67lyd50xiE/jlbbqTJ8zMMx/1ihhDlXvWtWHd/bLwmjsoQVl9RVX+UYQTSGUk+vmj7a8fe3jeMEyslwz3H/7qCl/vjDMP/v5Bh+P1dlt+cbcjFGWajfv2isjt39zXjbTovHUcp7Jjk/0tyybjddTV8JOmuSX5t9ZrMrOrWUw8AAMB0GUoNAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAsA2pqq6q02ee71hV11bV2zZzP++uqoPH5XdU1V23cFMBYJvhc4wBYNvyjSQHVtUu3f2tJD+b5Iu3Z4fd/Zgt0jIA2EbpMQaAbc85SR47Lj8xyRsWVlTVnarq1Kq6sKo+VlVHjOW7VNUZVfWJqjozyS4zdS6vqr2qav+q+uRM+XOr6vnj8rur6uSqem9VfbqqHlJVb66qS6vqhatwzAAwN4IxAGx7zkhyVFXtnORBSS6YWfeHSc7v7ock+akk/6uq7pTkmUm+2d0PSnJikgffhtf9dnf/RJK/TvLWJMcmOTDJ06pqz9t8NACwxgylBoBtTHd/oqr2z9Bb/I5Fqw9L8otV9dzx+c5JfiDJTyR56Uz9T9yGlz57/Hpxkku6++okqarPJdkvyVdvwz4BYM0JxgCwbTo7yV8meVSS2d7aSvJfu/uzsxtXVZL0JvZ5czYcTbbzovU3jV+/O7O88Nz/FABsswylBoBt06lJTujuixeVn5vkt2pMwlX1I2P5e5M8aSw7MMMQ7MW+nORuVbVnVd0xyc/PpeUAsJURjAFgG9TdV3b3Xy2x6gVJdkryiXEirReM5a9IcudxCPXvJfnwEvv8TpITMtyz/LYkn5lH2wFga1PdmxpVBQAAANsvPcYAAABMmmAMAADApAnGAAAATJpgDAAAwKQJxgAAAEyaYAwAAMCkCcYAAABMmmAMAADApP3/V/7Lgfz5Cj0AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig7, ax7 = plt.subplots(figsize=(16, 9))\n", + "ax7.bar(df_by_med['submitted_via'], df_by_med['count'])\n", + "\n", + "ax7.set_title(\"Number of timely responses per submission medium\")\n", + "ax7.set_xlabel('Medium')\n", + "ax7.set_ylabel('Timely response count')" + ] + }, + { + "cell_type": "markdown", + "id": "86c3e653", + "metadata": {}, + "source": [ + "### Analysis:\n", + "Quicker, digital forms of communication such as *Email* and *Web* clearly have a higher dispute rate than *Postal mail* or *Referral*, however it should be noted that timeliness of the response does not hold much weight for digital forms. This could highlight the possible complacency/bureaucracy that these companies may have that causes them to give a clearly untimely response, and can also warrant further delving into the structure of customer service in such companies." + ] + }, + { + "cell_type": "markdown", + "id": "a6eae2cf", + "metadata": {}, + "source": [ + "## Sentiment Analysis/Prediction:" + ] + }, + { + "cell_type": "markdown", + "id": "05f5c8e9", + "metadata": {}, + "source": [ + "### Setting up data:\n", + "Since the key data is categorical in nature we can use *one-hot encoding* for it as there is no ordinal relationship in any of the data that is being used." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "c380bd0b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
date_receivedcompanyzipcodedate_sent_to_companycompany_response_to_consumertimely_responseconsumer_disputed?product_Bank account or serviceproduct_Consumer Loanproduct_Credit card...state_WAstate_WIstate_WVstate_WYsubmitted_via_Emailsubmitted_via_Faxsubmitted_via_Phonesubmitted_via_Postal mailsubmitted_via_Referralsubmitted_via_Web
0735110U.S. Bancorp95993735114111000...0000000010
1735110Wells Fargo & Company91104735114111000...0000000010
2735110Wells Fargo & Company11764735129110000...0000000100
3735110Navient Solutions, Inc.21402735110111000...0000100000
4735110Resurgent Capital Services L.P.30106735110111000...0000000001
\n", + "

5 rows Ă— 181 columns

\n", + "
" + ], + "text/plain": [ + " date_received company zipcode \\\n", + "0 735110 U.S. Bancorp 95993 \n", + "1 735110 Wells Fargo & Company 91104 \n", + "2 735110 Wells Fargo & Company 11764 \n", + "3 735110 Navient Solutions, Inc. 21402 \n", + "4 735110 Resurgent Capital Services L.P. 30106 \n", + "\n", + " date_sent_to_company company_response_to_consumer timely_response \\\n", + "0 735114 1 1 \n", + "1 735114 1 1 \n", + "2 735129 1 1 \n", + "3 735110 1 1 \n", + "4 735110 1 1 \n", + "\n", + " consumer_disputed? product_Bank account or service product_Consumer Loan \\\n", + "0 1 0 0 \n", + "1 1 0 0 \n", + "2 0 0 0 \n", + "3 1 0 0 \n", + "4 1 0 0 \n", + "\n", + " product_Credit card ... state_WA state_WI state_WV state_WY \\\n", + "0 0 ... 0 0 0 0 \n", + "1 0 ... 0 0 0 0 \n", + "2 0 ... 0 0 0 0 \n", + "3 0 ... 0 0 0 0 \n", + "4 0 ... 0 0 0 0 \n", + "\n", + " submitted_via_Email submitted_via_Fax submitted_via_Phone \\\n", + "0 0 0 0 \n", + "1 0 0 0 \n", + "2 0 0 0 \n", + "3 1 0 0 \n", + "4 0 0 0 \n", + "\n", + " submitted_via_Postal mail submitted_via_Referral submitted_via_Web \n", + "0 0 1 0 \n", + "1 0 1 0 \n", + "2 1 0 0 \n", + "3 0 0 0 \n", + "4 0 0 1 \n", + "\n", + "[5 rows x 181 columns]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Converting dates to numerical values instead of datetime\n", + "date_cols = ['date_received', 'date_sent_to_company']\n", + "for date_col in date_cols:\n", + " df[date_col] = df[date_col].apply(pd.Timestamp.toordinal)\n", + "\n", + "analysis_cols = ['product', 'issue', 'state', 'submitted_via']\n", + "encoded = df\n", + "for c in analysis_cols:\n", + " c_encoded = pd.get_dummies(encoded[c], prefix=c)\n", + " encoded = pd.concat((encoded, c_encoded), axis=1).drop(c, axis=1)\n", + "encoded.head() " + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "44062878", + "metadata": {}, + "outputs": [], + "source": [ + "X = encoded.drop(['company', 'zipcode', 'consumer_disputed?'], axis=1)\n", + "y = encoded['consumer_disputed?']\n", + "X_tr, X_t, y_tr, y_t = train_test_split(X, y, test_size=0.2, random_state=0)" + ] + }, + { + "cell_type": "markdown", + "id": "b0de514c", + "metadata": {}, + "source": [ + "**First using Logistic Regression to classify one-hot-encoded features:**" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "25a11427", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classifier accuracy is 0.7980160443197353\n" + ] + } + ], + "source": [ + "classifier1 = LogisticRegression()\n", + "classifier1.fit(X_tr, y_tr)\n", + "y_pred = classifier1.predict(X_t)\n", + "\n", + "print(\"Classifier accuracy is\", accuracy_score(y_t, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "a3ce0650", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/aryanvakharia/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function plot_confusion_matrix is deprecated; Function `plot_confusion_matrix` is deprecated in 1.0 and will be removed in 1.2. Use one of the class methods: ConfusionMatrixDisplay.from_predictions or ConfusionMatrixDisplay.from_estimator.\n", + " warnings.warn(msg, category=FutureWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plot_confusion_matrix(classifier1, X_t, y_t, cmap=plt.cm.Greens)" + ] + }, + { + "cell_type": "markdown", + "id": "141b8875", + "metadata": {}, + "source": [ + "### Analysis:\n", + "As we can see the classifier needs a great deal of work to accurately predict a disputed complaint as the above confusion matrix shows that a true disputed complaint is not recognized by the classifier." + ] + }, + { + "cell_type": "markdown", + "id": "730fb665", + "metadata": {}, + "source": [ + "**Using Random Forest Classifier to better prediction:**" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "fc0a33cc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Classifier accuracy is 0.7119846751564861\n" + ] + } + ], + "source": [ + "classifier2 = DecisionTreeClassifier(random_state=42)\n", + "classifier2.fit(X_tr, y_tr)\n", + "y_pred = classifier2.predict(X_t)\n", + "\n", + "print(\"Classifier accuracy is\", accuracy_score(y_t, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "0435bfbf", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/aryanvakharia/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function plot_confusion_matrix is deprecated; Function `plot_confusion_matrix` is deprecated in 1.0 and will be removed in 1.2. Use one of the class methods: ConfusionMatrixDisplay.from_predictions or ConfusionMatrixDisplay.from_estimator.\n", + " warnings.warn(msg, category=FutureWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plot_confusion_matrix(classifier2, X_t, y_t, cmap=plt.cm.Reds)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "2862af72", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.81 0.84 0.82 88733\n", + " 1 0.26 0.23 0.24 22459\n", + "\n", + " accuracy 0.71 111192\n", + " macro avg 0.53 0.53 0.53 111192\n", + "weighted avg 0.70 0.71 0.70 111192\n", + "\n" + ] + } + ], + "source": [ + "print(classification_report(y_t, y_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "52f413a8", + "metadata": {}, + "source": [ + "### Analysis:\n", + "The decision tree model has a lower accuracy but because it deals with the categorical data better it manages to correctly predict some of the disputed issues." + ] + }, + { + "cell_type": "markdown", + "id": "c04cfc62", + "metadata": {}, + "source": [ + "## Conclusion:" + ] + }, + { + "cell_type": "markdown", + "id": "ce1b2eb9", + "metadata": {}, + "source": [ + "Since both models are very quick and dirty, detailed parsing of the data using the *nltk* library would be ideal in getting a higher recall and higher number of true positive recognition in the future.\n", + "\n", + "However, the visualizations and insights provided by the dataset do inform of key areas for our company to be wary and cautious of, and with constant improvement of data preparation and more robust training models there lies a higher scope of predicting disputed issues and even company responses." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/main.tex b/main.tex new file mode 100644 index 0000000..8144cc6 --- /dev/null +++ b/main.tex @@ -0,0 +1,34 @@ +\documentclass{report} +\usepackage[utf8]{inputenc} + +\title{Future Hypotheses} +\author{Aryan Vakharia} +\date{September 30th, 2022} + +\setcounter{secnumdepth}{3} + +\begin{document} + +\maketitle + +\chapter{Current Project} + +\section{Optimizing the ML Model} +A Logistic Regression model and Decision Tree was used, with both identifying true positives(actual disputes) with low accuracy. +\subsection{If a package such as XGBoost is optimized for the data set, then the model bias will be less than the Logistic Regression model.} +\subsection{With deeper analysis on the data set, then the ML model will have greater accuracy with respect to consumer disputes.} + +\section{Data Insights} +For the following features–product, state, submission method, and issue: +\subsection{The more commonly used the financial product is within the company, the more likely it is to be disputed} +\subsection{If a consumer is located on the West Coast of the United States, then they are more likely to dispute.} +\subsection{If a consumer uses an digital form of communication, then they are more likely to dispute their resolution.} + + +\chapter{Additional Hypotheses} + +\section{Data Exploration} +\subsection{Interactive map with highlights of different zip codes.} +\subsection{Robust, accurate sentiment analysis on product, sub-product, issue, and sub-issue and even narratives.} + +\end{document} \ No newline at end of file diff --git a/report.pdf b/report.pdf new file mode 100644 index 0000000..40ea274 Binary files /dev/null and b/report.pdf differ