forked from Science-for-Nature-and-People/soc-twitter
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmaps.R
More file actions
108 lines (79 loc) · 3.88 KB
/
maps.R
File metadata and controls
108 lines (79 loc) · 3.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
###################################
# Creating a map regarding tweets #
###################################
# === Libraries ===
library(tmap)
library(rworldmap)
library(tidyverse)
library(sf)
library(countrycode)
library(spdplyr)
library(classInt)
# === Datasets ===
## uncomment if not correct working directory
#setwd('/home/nolasco/soc-twitter')
#list.files()
## uncomment if not loaded into environment
twitter_merged <- read.csv("twitter_merged.csv", stringsAsFactors = FALSE)
twitter_merged_noRT <- read.csv("twitter_merged_noRT.csv", stringsAsFactors = FALSE)
# Creating a Country Code column for the datasets
twitter_merged['countryCode'] <- countrycode(twitter_merged$country, 'country.name', 'iso3c')
twitter_merged_noRT['countryCode'] <- countrycode(twitter_merged_noRT$country, 'country.name', 'iso3c')
# Not all countries had a proper match, hence I had to add it manually
no_matchCountry <- c('Belgique', 'Deutschland', 'España', 'Kenia', 'Nederland',
'Österreich', 'Panamá', 'Roumanie', 'Spanje', '台灣', 'भारत')
no_matchCode <- c('BEL', 'DEU', 'ESP', 'KEN', 'NLD',
'AUT', 'PAN', 'ROU', 'ESP', 'TWN', 'IND')
for (i in 1:length(no_matchCountry)){
twitter_merged$countryCode[twitter_merged$country == no_matchCountry[i]] <- no_matchCode[i]
twitter_merged_noRT$countryCode[twitter_merged_noRT$country == no_matchCountry[i]] <- no_matchCode[i]
}
# Creating a count dataframe
count.RT <- twitter_merged %>%
count(countryCode) %>%
na.omit()
count.noRT <- twitter_merged_noRT %>%
count(countryCode) %>%
na.omit()
# Changing column name so it isn't similar to function name
colnames(count.RT)[2] <- 'Frequency'
colnames(count.noRT)[2] <- 'Frequency'
# Converting the count dataframe into sp class
count.RTsp <- joinCountryData2Map(count.RT, joinCode = "ISO3", nameJoinColumn = "countryCode")
count.noRTsp <- joinCountryData2Map(count.noRT, joinCode = "ISO3", nameJoinColumn = "countryCode")
## Dataframes not including US (might be more efficient code involving spdplyr)
# Creating a count dataframe
count.RT_US <- twitter_merged %>%
count(countryCode) %>%
na.omit() %>%
filter(countryCode != 'USA')
count.noRT_US <- twitter_merged_noRT %>%
count(countryCode) %>%
na.omit() %>%
filter(countryCode != 'USA')
# Changing column name so it isn't similar to function name
colnames(count.RT_US )[2] <- 'Frequency'
colnames(count.noRT_US )[2] <- 'Frequency'
# Converting the count dataframe into sp class
count.RT_USsp <- joinCountryData2Map(count.RT_US, joinCode = "ISO3", nameJoinColumn = "countryCode")
count.noRT_USsp <- joinCountryData2Map(count.noRT_US, joinCode = "ISO3", nameJoinColumn = "countryCode")
# === Interactive Maps ===
tm_shape(count.RTsp) +
tm_polygons('Frequency') + tm_layout(main.title = "With RT")
tm_shape(count.noRTsp) +
tm_polygons('Frequency') + tm_layout(main.title = "No RT")
## As we can tell, US has the most number of tweets regarding soil
# Since US is such a huge outlier, I'm going to remove it to see
tm_shape(count.RT_USsp) +
tm_polygons('Frequency') + tm_layout(main.title = "With RT (not including US)")
tm_shape(count.noRT_USsp) +
tm_polygons('Frequency') + tm_layout(main.title = "No RT (not including US)")
# === Changing the frequency level to include US and more info about other states ===
tm_shape(count.noRTsp) +
tm_polygons('Frequency', style = 'fixed', breaks = c(1,2,5,10,100,929)) + tm_layout(main.title = "No RT")
tm_shape(count.RTsp) +
tm_polygons('Frequency', style = 'fixed', breaks = c(1,2,5,10,100,929)) + tm_layout(main.title = "RT")
# === Final map ===
# Because RT and no RT dataset has same frequency count, we just used one map
tm_shape(count.RTsp) +
tm_polygons('Frequency', style = 'fixed', breaks = c(1,2,5,10,100,929)) + tm_layout(main.title = "Tweet Frequency per Country")