-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtwt.R
More file actions
146 lines (111 loc) · 5.33 KB
/
twt.R
File metadata and controls
146 lines (111 loc) · 5.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
library(rtweet)
library(twitteR)
library(ROAuth)
library(dplyr)
library(tidytext)
library(tidyr)
require(twitteR)
require(ROAuth)
#require(RCurl)
library(stringr)
library(plyr)
library(purrr)
library(ggplot2)
library(wordcloud)
library(httr)
library(rtweet)
#library(sentiment)
library(syuzhet)
library(openssl)
library(httpuv)
library(ROAuth)
library(base64enc)
library(plyr)
#twitter authentication
auth_setup_default()
## install remotes package if it's not already
#retrieving data about keyword Iphone
rt <- search_tweets("#iPhone 13 ", n = 100,include_rts = FALSE , lang="en")
#display no of tweets using length function
length.rt <- length(rt)
length.rt
rt2 <-search_tweets("#SamsungS22Ultra", n = 100,include_rts = FALSE, lang="en")
#process each set of tweets into a tidy text or corpus objects
tweets.apple <- rt %>% select(screen_name,text)
tweets.samsung <- rt2 %>% select(screen_name,text)
#pre processing text transformations
head(tweets.apple$text)
head(tweets.samsung$text)
#cleaning data
# removing html links in tweets
tweets.apple$stripped_text1 <- gsub("http\\s+","",tweets.apple$text)
tweets.samsung$stripped_text2 <- gsub("http\\s+","",tweets.samsung$text)
# removing punctuations from list of words
tweets.apple_stem <- tweets.apple %>% select(stripped_text1) %>% unnest_tokens(word,stripped_text1)
tweets.samsung_stem <- tweets.samsung %>% select(stripped_text2) %>% unnest_tokens(word,stripped_text2)
head(tweets.apple_stem)
head(tweets.samsung_stem)
# remove stop words from stemmed word list
cleaned_tweets.apple <- tweets.apple_stem %>% anti_join(stop_words)
cleaned_tweets.samsung <- tweets.samsung_stem %>% anti_join(stop_words)
head(cleaned_tweets.apple)
head(cleaned_tweets.samsung)
# find out most commonly used words about apple iphone 13 and sanmsung ultra max 22
cleaned_tweets.apple %>% count(word,sort=TRUE)%>% top_n(10) %>% mutate(word=reorder(word,n)) %>% ggplot(aes(x=word,y=n)) + geom_col()+ coord_flip()+theme_light()+labs(x="Count", y="Unique Words",title="Unique Word Count found in Iphone tweets")
cleaned_tweets.samsung %>% count(word,sort=TRUE)%>% top_n(10) %>% mutate(word=reorder(word,n)) %>% ggplot(aes(x=word,y=n)) + geom_col()+ coord_flip()+theme_light()+labs(x="Count", y="Unique Words",title="Unique Word Count found in Samsung tweets")
# perform sentiment analaysis of tweets
#using bing
install.packages("textdata")
library(textdata)
get_sentiments("bing") %>% filter(sentiment=="positive")
get_sentiments("bing") %>% filter(sentiment=="negative")
bing_productApple <- cleaned_tweets.apple %>% inner_join(get_sentiments("bing")) %>% count(word,sentiment,sort = TRUE) %>% ungroup()
bing_productApple
bing_productSamsung <- cleaned_tweets.samsung%>% inner_join(get_sentiments("bing")) %>% count(word,sentiment,sort = TRUE) %>% ungroup()
bing_productSamsung
# using afinn
get_sentiments("afinn") %>% filter(value=="3")
get_sentiments("afinn") %>% filter(value=="3")
#vizualize the data
bing_productApple %>% group_by(sentiment) %>% top_n(10) %>% ungroup() %>% mutate(word=reorder(word,n)) %>% ggplot(aes(word,n,fill=sentiment))+ geom_col(show.legend = FALSE)+ facet_wrap(~sentiment,scales = "free_y") + labs(title = "Tweets containing Apple",y="contribution to sentiment",x=NULL)+ coord_flip() +theme_light()
bing_productSamsung %>% group_by(sentiment) %>% top_n(10) %>% ungroup() %>% mutate(word=reorder(word,n)) %>% ggplot(aes(word,n,fill=sentiment))+ geom_col(show.legend = FALSE)+ facet_wrap(~sentiment,scales = "free_y") + labs(title = "Tweets containing Samsung",y="contribution to sentiment",x=NULL)+ coord_flip() +theme_light()
#using nrc
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
# nrc emotion analysis
cleaned_tweets.apple %>% inner_join(nrc_joy) %>%count(word, sort = TRUE)
cleaned_tweets.samsung %>% inner_join(nrc_joy) %>%count(word, sort = TRUE)
# calculate score upon positive and negative words.
typeof(cleaned_tweets.apple)
score.sentiment <- function(sentences,pos.words,neg.words,.progress='none')
{
require(plyr)
require(stringr)
scores <-laply(sentences,function(sentence,pos.words,neg.words){
#remove punctuation are replace with ""
sentence <-gsub('[[:punct:]]',"",sentence)
#remove control space and replace with ""
sentence <-gsub('[[:cntrl:]]',"",sentence)
#remove digits and replace with ""
sentence <-gsub('\\d+',"",sentence)
# convert into lowercase
sentence <-tolower(sentence)
word.list <-str_split(sentence,'\\s+')
words <-unlist(word.list)
pos.matches <-match(words,pos.words)
neg.matches <-match(words,neg.words)
pos.matches<-!is.na(pos.matches)
neg.matches <-!is.na(neg.matches)
score <-sum(pos.matches)-sum(neg.matches)
return(score)
},pos.words,neg.words,.progress = .progress)
scores.df <-data.frame(score=scores,text=sentences)
return(scores.df)
}
# positive words and negative words
pos.words <-scan('D:/twitter/positive_words.txt',what='character',comment.char=";")
neg.words <-scan('D:/twitter/negative_words.txt',what='character',comment.char=";")
apple_score<- score.sentiment(tweets.apple$text,pos.words,neg.words ,.progress = 'text')
samsung_score<- score.sentiment(tweets.samsung$text,pos.words,neg.words ,.progress = 'text')
hist(apple_score$score)
hist(samsung_score$score)