-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdivvy_stations_data_extraction.R
More file actions
87 lines (67 loc) · 2.64 KB
/
divvy_stations_data_extraction.R
File metadata and controls
87 lines (67 loc) · 2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Divvy station data extraction
# Purpose -----------------
# Grabs the data using the API for the divvy historical stations data. This script will grab the data for an entire year, based on the value of 'input_year'.
# The timestamp for the data can be tricky since it's pulled in UTC but between midnights in CST, so the last few lines of code changes UTC to CST.
# Before starting, you need to get an app_token using the data.cityofchicago portal
# Steps on making an apptoken for the API:
# 1. Go to https://data.cityofchicago.org/login
# 2. Make an account or sign in
# 3. Press the little pen next to your username to edit your profile
# 4. Navigate to the "Developer Settings" Page using the sidebar menu
# 5. Click on the "Create New App Token" and fill out the popup accordingly. Keep it private
# 6. Copy your App Token and paste it into this script
# Also, this app token is custom to you and your account, so make sure you don't share it to others
# load packages from shared library, not necessary if not using QUEST or a shared library ------
lib <- '/projects/e30686/R/4.1'
# load packages ----
library("RSocrata", lib.loc = lib)
library("tidyverse", lib.loc = lib)
library('lubridate', lib.loc = lib)
library('filesstrings', lib.loc = lib)
# Define pathways and parameters ----
# folder to put data files in
my_folder <- "mydata"
# year nothing prior to 2013 -----
input_year <- 2014
first_day <- str_c(input_year, "-01-01")
last_day <- str_c(input_year, "-12-31")
# write file to path ----
out_path <- str_c(my_folder,"/raw/","divvy_", input_year, ".csv")
# Define apptoken using the process outlined above, should be a string -----
apptoken <- #insert app token here
# Define function for downloading data ----
get_data <- function(url){
read.socrata(
url,
app_token = apptoken, #fill in with your own API token
) %>%
as_tibble()
}
# setup tibble for downloading and storing data ----
divvy_db <- tibble(
day = seq(ymd(first_day), ymd(last_day), by='day'),
url_input = str_c(
"https://data.cityofchicago.org/resource/eq45-8inv.csv?$where=timestamp between ",
"\'",
day,
"T00:00:00\' and \'",
day,
"T23:59:59\'"
)
)
# collect daily data -----
divvy_db <- divvy_db %>%
mutate(
data = map(url_input, ~ get_data(url = .x))
)
# remove necessary columns ----
divvy_db <- divvy_db %>%
mutate(dim_check = map_int(data, nrow)) %>%
filter(dim_check > 0) %>%
unnest(data) %>%
select(-url_input, -day, -dim_check)
# fixing the timestamps from UTC to CST -----
divvy_db <- divvy_db %>%
mutate(timestamp = format(timestamp, tz = "US/Central"))
# write out the data -----
write_csv(divvy_db, out_path)