diff --git a/.RData b/.RData new file mode 100644 index 0000000..4a4be9c Binary files /dev/null and b/.RData differ diff --git a/.gitignore b/.gitignore index e8d82f0..f0eb7df 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ .Rproj.user .Rhistory -.RData .Ruserdata data_vis.pbix my_app_id.R @@ -8,4 +7,3 @@ data_exploration.html Process flow chart.jpg experimental_dashboard.pbix README.html -other documents & archive diff --git a/R/functions.R b/R/functions.R index 51e36e3..4289848 100644 --- a/R/functions.R +++ b/R/functions.R @@ -36,10 +36,9 @@ get_charity_fundraising_pages <- function(charity_name, id){ reduce(bind_rows)%>% mutate(charity = charity_name, searched_charity_id = id)%>% - filter(searched_charity_id==CharityId) + filter(searched_charity_id==CharityId) #filter out where id's do not match ('justgiving_id') return(fundraisers_data) } -#DO: filter on id=CharityID (and use justgiving_id in calling this) #This takes a fundraisers id and gets the data for it (a single row of info) get_fundraising_data <- function(fundraiser_id){ diff --git a/R/just_giving_data_pull.R b/R/just_giving_data_pull.R index 4d4e841..aba271f 100644 --- a/R/just_giving_data_pull.R +++ b/R/just_giving_data_pull.R @@ -5,10 +5,9 @@ charity_data <- charities_csv %>% read_csv %>% #drop_na(charity_name, regno) - drop_na(charity_name, justgiving_id) -#drop if there IS no 'justgiving_id' + drop_na(charity_name, justgiving_id) #drop if there IS no 'justgiving_id' -#%>% filter(give_well_top_2017==1 | give_well_standout_2017==1) + #%>% filter(give_well_top_2017==1 | give_well_standout_2017==1) #Get all fundraisers for target charities (just basic information) fundraiser_search_data <- @@ -19,20 +18,18 @@ fundraiser_search_data <- fundraiser_search_data_2018 <- fundraiser_search_data %>% mutate(date_created=date(CreatedDate)) %>% - filter(date_created>"2018-01-01") + filter(date_created>"2018-06-01") - -#Sample of 50 for testing... fundraiser_search_data <- tail(fundraiser_search_data,n=50) -#sample wateraid: fundraiser_search_data_w<- filter(fundraiser_search_data,charity=="WaterAid") -#fundraiser_search_data_a<-filter(fundraiser_search_data,charity=="Animal Equality") -#Note -- I did a check on these smaller charities -- the filtering below seems to be correct + #Sample of 10 for testing... fundraiser_search_data_t <- tail(fundraiser_search_data,n=10) + #sample wateraid: fundraiser_search_data_w<- filter(fundraiser_search_data,charity=="WaterAid") + #fundraiser_search_data_a<-filter(fundraiser_search_data,charity=="Animal Equality") #Get info about the fundraisers fundraising_page_data <- map(fundraiser_search_data$Id, get_fundraising_data) %>% reduce(bind_rows) %>% left_join(fundraiser_search_data, by = c('pageId' = 'Id')) %>% - dplyr::filter(unlist(Map(function(x, y) grepl(x, y), searched_charity_id, charity.registrationNumber))) %>% #match the 'regno' ... if it is *present* in the other variable (some give several regno's) + #dplyr::filter(unlist(Map(function(x, y) grepl(x, y), searched_charity_id, charity.registrationNumber))) %>% -- removed as already done above ... match the 'regno' ... if it is *present* in the other variable (some give several regno's) select(-grep('image.', names(.))) %>% select(-grep('videos.', names(.)))%>% select(-grep('branding.', names(.))) %>% diff --git a/README.pdf b/README.pdf new file mode 100644 index 0000000..1506d8f Binary files /dev/null and b/README.pdf differ diff --git a/main.R b/main.R index 36a3531..750d69c 100644 --- a/main.R +++ b/main.R @@ -11,8 +11,8 @@ charities_csv <- file.path(data_folder, 'effective_charities.csv') #replace with #A folder that contains all the fundraising and donation data, a new copy each time the code is run snapshots_folder <- file.path(data_folder, 'just_giving_data_snapshots') -#In the get_current... file, We don't look at pages with first donation that comes before the -experiment_start_date <- as.Date('2018/04/13') #REMEMBER to reset this!! +#In the get_current... file, We don't look at pages with first donation that comes before the... +experiment_start_date <- as.Date('2019/07/13') #REMEMBER to reset this!! date = Sys.Date() time = Sys.time() @@ -38,5 +38,6 @@ source("my_app_id.R") source("R/functions.R") #Downloads all current data for the target charities, also saves a snapshot source("R/just_giving_data_pull.R") + #Performs the randomisation, outputs a file listing all new treatment groups, and saves the current state of experimental pages source("R/get_current_state_and_randomise.R") \ No newline at end of file