-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtutorial.R
More file actions
71 lines (56 loc) · 2.92 KB
/
tutorial.R
File metadata and controls
71 lines (56 loc) · 2.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#https://github.com/cjerzak/LinkIt-software.git
options(timeout=9999999); devtools::install_github("cjerzak/LinkIt-software/LinkIt/",
force = F, quiet = F,build_vignettes=F,dependencies = T)
library(LinkIt)
library(data.table)
x_mat <- data.frame("xname"=c("apple computers","j p morgan"),
"xdat"=c(rnorm(2)))
y_mat <- data.frame("yname"=c("apple inc","jp morgan"),
"ydat"=c(rnorm(2)))
fuzzyThres <- 0.2
z_LinkIt_markov <- LinkIt(x=as.data.table(x_mat), y=as.data.table(y_mat),
by.x = "xname",by.y="yname",
openBrowser=F,
algorithm = "markov", returnDiagnostics = T,
control = list(RemoveCommonWords = F,
ToLower = T,
NormalizeSpaces = T,
RemovePunctuation = F,
FuzzyThreshold = fuzzyThres,
matchMethod = "jw",
qgram = 2))
z_LinkIt_bipartite <- LinkIt(x=as.data.table(x_mat), y=as.data.table(y_mat),
by.x = "xname",by.y="yname",
algorithm = "bipartite", openBrowser=F,
returnDiagnostics = T,
control = list(RemoveCommonWords = F,
ToLower = T,
NormalizeSpaces = T,
RemovePunctuation = F,
FuzzyThreshold = fuzzyThres,
matchMethod = "jw",
qgram = 2))
#for machine learning clustering:
#must do install.packages("reticulate")
#from terminal: pip install tensorflow
#pip install keras
#pip install chars2vec
#make sure pip is using same python version as recitulate
z_LinkIt_ml <- LinkIt(x=as.data.table(x_mat), y=as.data.table(y_mat),
by.x = "xname",by.y="yname",
algorithm = "ml", openBrowser=F, returnDiagnostics = T,
control = list(RemoveCommonWords = F,
ToLower = T,
NormalizeSpaces = T,
RemovePunctuation = F,
FuzzyThreshold = fuzzyThres,
matchMethod = "jw",
qgram = 2))
z_fuzzy <- FastFuzzyMatch(x_mat,y_mat,
by.x="xname", by.y= "yname",
method = "jw", max_dist = fuzzyThres,
q = 2,openBrowser=F)
z_LinkIt_markov
z_LinkIt_bipartite
z_LinkIt_ml
z_fuzzy