-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexam2.Rmd
More file actions
142 lines (103 loc) · 3.1 KB
/
exam2.Rmd
File metadata and controls
142 lines (103 loc) · 3.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
---
title: "R Notebook"
output: html_notebook
---
```{r}
#Libraries
options(warn=-1)
library(naivebayes)
library(tidyverse)
library(psych)
library(e1071)
library(caret)
library(class)
```
```{r}
#importing the dataset
df <- read.csv("DatasetforFINAL.csv")
df <- subset(df , City == "Toronto")
df <- subset(df , select = c(FamilyIncome,FamilySize,boughtelectronics))
df$boughtelectronics <- ifelse(df$boughtelectronics == "YES", 1, 0)
df$boughtelectronics <- as.factor(df$boughtelectronics)
```
```{r}
#Question 1
#use 70% of dataset as training set and 30% as test set
sample <- sample(c(TRUE, FALSE), nrow(df), replace=TRUE, prob=c(0.7,0.3))
train <- df[sample, ]
test <- df[!sample, ]
```
```{r}
#Question 2a
x <- subset(df, select=c(FamilySize,FamilyIncome))
y <- df$boughtelectronics
model <- train(x,y,'nb',trControl=trainControl(method='cv',number=10))
```
```{r}
preds_naive <- predict(model, test)
```
```{r}
#Question 3a
##run knn function
model_knn <- train(boughtelectronics ~ FamilyIncome + FamilySize,data=train, method = "knn",tuneLength = 10 )
#Question 3b
preds_knn <- predict(model_knn, test)
```
```{r}
#Question 4a
#Confusion matrix for naives bayes classifier
conMat_naives <- confusionMatrix(preds_naive,test$boughtelectronics)
conMat_naives$byClass['Balanced Accuracy']
#Confusion matrix for knn
conMat_knn <- confusionMatrix(preds_knn,test$boughtelectronics)
conMat_knn$byClass['Balanced Accuracy']
```
#Navies bayes has higher accuracy
```{r}
#Question 4a
#Naive Bayes ROC cHART
library(ROCR)
pred_naive <- prediction(as.numeric(preds_naive), as.numeric(test$boughtelectronics))
perf <- performance(pred_naive,"tpr","fpr")
plot(perf, main="ROC curve Naive Bayes", colorize=T)
# And then a lift chart
perf2 <- performance(pred_naive,"lift","rpp")
plot(perf2, main="lift curve Naive Bayes", colorize=T)
# And then a gain chart
perf3 <- performance(pred_naive, "tpr", "rpp")
plot(perf3, main="Gain curve Naive Bayes", colorize=T)
```
```{r}
#Question 4b
pred_knn <- prediction(as.numeric(preds_knn), as.numeric(test$boughtelectronics))
perf4 <- performance(pred_knn,"tpr","fpr")
plot(perf4, main="ROC curve for knn classifier", colorize=T)
# And then a lift chart
perf5 <- performance(pred_knn,"lift","rpp")
plot(perf5, main="lift curve for knn classifier", colorize=T)
# And then a gain chart
perf6 <- performance(pred_knn, "tpr", "rpp")
plot(perf6, main="Gain curve for knn classifier", colorize=T)
```
```{r}
#Question 5
library(dplyr)
library(factoextra)
library(stats)
kmeans <- fviz_nbclust(df, kmeans, method = "silhouette")
kmeans
```
```{r}
#Question 6
Hierarchical_clustering <- fviz_nbclust(df, hcut, method = "silhouette")
Hierarchical_clustering
hc <- hclust(dist(df), "cen")
# Cut tree into 2 groups:
sub_grp <- cutree(hc, k = 2)
# Create plot of clusters:
fviz_cluster(list(data = df, cluster = paste0("Group", sub_grp)),
alpha = 1,
palette = "jco",
labelsize = 9,
ellipse.type = "norm")
```