-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsubmission.py
More file actions
87 lines (75 loc) · 2.36 KB
/
submission.py
File metadata and controls
87 lines (75 loc) · 2.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Jun 27 12:53:36 2020
@author: farshad.toosi and Mohammed Hasanuzzaman
"""
# -*- coding: utf-8 -*-"""Repeat Exam 2020 Programming for Data Analytic """
# Please write your name and student ID:
#John Fitzgerald
#
#import libraries
import pandas as pd
# Load libraries
from pandas import read_csv
from pandas.plotting import scatter_matrix
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
def cleanFile():
#read the file and remove empty spaces from fields
dataFile = pd.read_csv("bank-full.csv",skipinitialspace=True,na_values={''},keep_default_na=False)
#clear file of empty spaces
dataFile.columns = dataFile.columns.str.strip()
#filter out rows with missing values
dataFile.dropna()
#remove columns will null 'nan' values
dataFile = dataFile[dataFile.notnull().all(axis = 1)]
#return file for use in 12 questions
return dataFile
def Task1():
"""
Your implementation goes here.
"""
#call the read file function
dataFile = cleanFile()
#set labels for single instance of each worktype
labels = dataFile['workclass'].unique()
#count of men group by workclass
men = dataFile['workclass'][dataFile['sex']=='Male'].value_counts().values
#count of women grouped by workclass
women = dataFile['workclass'][dataFile['sex']=='Female'].value_counts().values
def Task2():
"""
Your implementation goes here.
"""
def Task3():
"""
Your implementation goes here.
"""
def Task4():
"""
Your implementation goes here.
"""
def Task5():
"""
Your implementation goes here.
"""
def Task6():
"""
Your implementation goes here.
"""
def Task7():
"""
Your implementation goes here.
"""