-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCode_Practice2.R
More file actions
36 lines (27 loc) · 1.04 KB
/
Code_Practice2.R
File metadata and controls
36 lines (27 loc) · 1.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#Set working directory
setwd("F:/Cosmodea")
library(data.table)
library("RSiteCatalyst")
library("RTextTools") #Loads many packages useful for text mining
#Import data
df <- fread("Practice2.csv")
df1 <- df[,1]
#COnvert data table into vector
df_vec <- as.matrix(df1[c(1:227),])
dtm <- create_matrix(df_vec,
stemWords=TRUE,
removeStopwords=FALSE,
minWordLength=1,
removePunctuation= TRUE)
names(dtm)
#Build model
kmeans5<- kmeans(dtm, 5)
kw_with_cluster <- as.data.frame(cbind(df_vec, kmeans5$cluster))
names(kw_with_cluster) <- c("keyword", "kmeans5")
cluster1 <- subset(kw_with_cluster, subset=kmeans5 == 1)
cluster2 <- subset(kw_with_cluster, subset=kmeans5 == 2)
cluster3 <- subset(kw_with_cluster, subset=kmeans5 == 3)
cluster4 <- subset(kw_with_cluster, subset=kmeans5 == 4)
cluster5 <- subset(kw_with_cluster, subset=kmeans5 == 5)
a <- rbind(cluster1,cluster2,cluster3,cluster4,cluster5)
write.csv(a, "practice2_cluster.csv")