Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
255 changes: 255 additions & 0 deletions clim_functions
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
require(stringr)
require(lubridate)
require(compiler)
require(ggplot2)

#Performs range test on relative humidity based on Estevez et al. (2011)####
#RH acceptable range = 0-100%
f.rh.r1 <- function(data) {
#Vectorize and format the relative humidity data
rh.1 <- as.numeric(data$Sensor1_RelativeHumidity)
rh.2 <- as.numeric(data$Sensor2_RelativeHumidity)
rh.3 <- as.numeric(data$Sensor3_RelativeHumidity)
#Index rows that fail the first/only range test for relative humidity
rh.1.fail.r1 <- which(rh.1 < 0 | rh.1 > 100)
rh.2.fail.r1 <- which(rh.2 < 0 | rh.2 > 100)
rh.3.fail.r1 <- which(rh.3 < 0 | rh.3 > 100)
#Create a data frame with results of the test
final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, rh.1 = rh.1,
rh.1.rangeTest = NA, rh.2 = rh.2, rh.2.rangeTest = NA, rh.3 = rh.3, rh.3.rangeTest = NA)
#Use the failed test index to store the error code
final.df$rh.1.rangeTest[rh.1.fail.r1] <- 'R1'
final.df$rh.2.rangeTest[rh.2.fail.r1] <- 'R1'
final.df$rh.3.rangeTest[rh.3.fail.r1] <- 'R1'
#Replace NAs with a blank
final.df$rh.1.rangeTest[is.na(final.df$rh.1.rangeTest)] <- ' '
final.df$rh.2.rangeTest[is.na(final.df$rh.2.rangeTest)] <- ' '
final.df$rh.3.rangeTest[is.na(final.df$rh.3.rangeTest)] <- ' '
return(final.df)
}

#Performs first range test on temperature based on Estevez et al. (2011)####
#Acceptable values fall between -30 and 50; revisit these numbers (Africa max high is 55--WMO)
f.temp.r1 <- function(data) {
#Vectorize and format the temperature data
temp.1 <- as.numeric(data$Sensor1_AvgTemp)
temp.2 <- as.numeric(data$Sensor2_AvgTemp)
temp.3 <- as.numeric(data$Sensor3_AvgTemp)
#Index rows that have failed the range test for temperature
temp.1.fail.r1 <- which(temp.1 < -30 | temp.1 > 50)
temp.2.fail.r1 <- which(temp.2 < -30 | temp.2 > 50)
temp.3.fail.r1 <- which(temp.3 < -30 | temp.3 > 50)
#Create a data frame with results of the test
final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, temp.1 = temp.1,
temp.1.rangeTest = NA, temp.2 = temp.2, temp.2.rangeTest = NA, temp.3 = temp.3,
temp.3.rangeTest = NA)
#Use the failed test index to store the error code
final.df$temp.1.rangeTest[temp.1.fail.r1] <- 'R1'
final.df$temp.2.rangeTest[temp.2.fail.r1] <- 'R1'
final.df$temp.3.rangeTest[temp.3.fail.r1] <- 'R1'
#Replace NAs with a blank
final.df$temp.1.rangeTest[is.na(final.df$temp.1.rangeTest)] <- ' '
final.df$temp.2.rangeTest[is.na(final.df$temp.2.rangeTest)] <- ' '
final.df$temp.3.rangeTest[is.na(final.df$temp.3.rangeTest)] <- ' '
return(final.df)
}

#Performs second range test on temperature based on Estevez et al. (2011)--UNDER CONSTRUCTION####
#Need to find an accurate list of country max/min temperatures; ideally long-term stations close to TEAM station

#Performs first range test on rainfall based on Estevez et al. (2011)####
#Acceptable values fall between 0 and 250 mm over a half hour period
#NEED TO PASTE TIME AND DATE (JUST DATE NOW) PRIOR TO JUNE 2010; FOR VB AT LEAST
f.rain.r1 <- function(data) {
#Vectorize and format the rain data
rain <- as.numeric(data$Rainfall)
#Format observation date as POSIX
data$Observation <- ymd_hms(data$Observation)
#Store the first day of the data (where to start the interval)
start.date <- min(data$Observation) #ymd('2010-07-01')
#Create a vector to store the index info for failed tests
rain.fail.r1 <- vector()
#Create a half hour interval window to select data during that period
int <- new_interval(start = start.date, end = start.date + minutes(30))
#For each record use the interval to index data and then sum rainfall measurements
for(i in 1:10000) {
#If data is missing for a particular interval, shift the interval by five minutes and start again
#at next record
if(length(which(data$Observation %within% int)) == 0) {
int <- int_shift(int = int, by = minutes(5))
} else {
#If the interval exists in the data, sum the data over the interval
half.hr.sum <- sum(rain[which(data$Observation %within% int)])
#If the sum of the interval is greater than 250 or less than 0, store the row number in the index
if(half.hr.sum > 250 | half.hr.sum < 0) {
rain.fail.r1[i] <- which(data$Observation %within% int)[1]
}
#After storing the row number, shift the interval by five minutes and start again at next record
int <- int_shift(int = int, by = minutes(5))
}
}
#Create a data frame with results of the test
final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, rain = rain,
rain.rangeTest = NA)
#Remove NAs that were introduced to the failed test index
rain.fail.r1 <- rain.fail.r1[!is.na(rain.fail.r1)]
#Use the failed test index to store the error code
final.df$rain.rangeTest[rain.fail.r1] <- 'R1'
#Replace NAs with a blank
final.df$rain.rangeTest[is.na(final.df$rain.rangeTest)] <- ' '
return(final.df)
}

#Performs second range test on rainfall based on Estevez et al. (2011)####
#Acceptable values fall between 0 and 500 mm
f.rain.r2 <- function(data) {
data$Observation <- ymd_hms(data$Observation)
#Vectorize and format rain data
rain <- as.numeric(data$Rainfall)
#Grab just the date of each record
TempDay <- floor_date(data$Observation, 'day')
TempDay <- ymd(TempDay)
#Generate daily summaries
Daily.Climate.Data <- data.frame(Rain = tapply(rain, TempDay, sum))
#Index the rows in which a day had more than 500 mm or less than 0 mm of rain
rain.fail.r2 <- as.vector(which(Daily.Climate.Data$Rain > 500 | Daily.Climate.Data$Rain < 0))
#Get the dates from the index number to flag the results
if(length(rain.fail.r2) != 0) {
dates.fail <- ymd(row.names(Daily.Climate.Data)[rain.fail.r2])
#Creates an index again to store the dates in the data that match the days that
#failed in the previous index
rain.fail.r2 <- which(TempDay %in% dates.fail)
}
#Create a data frame with results of the test
final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, rain = rain,
rain.rangeTest = NA)
#Use the failed test index to store the error code
final.df$rain.rangeTest[rain.fail.r2] <- 'R2'
#Replace NAs with a blank
final.df$rain.rangeTest[is.na(final.df$rain.rangeTest)] <- ' '
return(final.df)
}

#Performs first persistence test for temperature####
f.temp.rh.p1 <- function(data) {
data$Observation <- ymd_hms(data$Observation)
#Vectorize and format the temperature data
temp.1 <- as.numeric(data$Sensor1_AvgTemp)
temp.2 <- as.numeric(data$Sensor2_AvgTemp)
temp.3 <- as.numeric(data$Sensor3_AvgTemp)
rh.1 <- as.numeric(data$Sensor1_RelativeHumidity)
rh.2 <- as.numeric(data$Sensor2_RelativeHumidity)
rh.3 <- as.numeric(data$Sensor3_RelativeHumidity)
#Create a list of variables
var.list <- list(temp.3, rh.1, rh.2, rh.3)
#Run the persistence function to generate index of failed records for:
#first record (with data), and 1 and 2 days prior to current record
#See f.persistence function for the real magic; setup with data set
#and variable of interest
#Takes roughly 8 hours per sensor - should be able to speed this up
temp.1.fail.p1 <- f.persistence(data, temp.1)
#Start @ 11:49 AM
temp.2.fail.p1 <- f.persistence(data, temp.2)
temp.3.fail.p1 <- f.persistence(data, temp.3)
#Create a data frame with results of the test
final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, temp.1 = temp.1,
temp.1.perTest = NA, temp.2 = temp.2, temp.2.perTest = NA, temp.3 = temp.3,
temp.3.perTest = NA)
#Use the failed test index to store the error code
final.df$temp.1.perTest[temp.1.fail.p1] <- 'P1'
final.df$temp.2.perTest[temp.2.fail.p1] <- 'P1'
final.df$temp.3.perTest[temp.3.fail.p1] <- 'P1'
#Replace NAs with a blank
final.df$temp.1.perTest[is.na(final.df$temp.1.perTest)] <- ' '
final.df$temp.2.perTest[is.na(final.df$temp.2.perTest)] <- ' '
final.df$temp.3.perTest[is.na(final.df$temp.3.perTest)] <- ' '
return(final.df)
}

#This function creates an index to compare records to exactly 1 and 2 days prior to the
#current record and then returns an index of records that fail the test
#All three records will be flagged if they are the same value
#Must use a valid data set and variable
f.persistence <- function(data, var) {
#Find the date (and time) of the first record in the data set
this.date <- which(data$Observation == min(data$Observation))
#Get the index for a 2 day delayed start to make the for loop work properly
delayed.start <- which(data$Observation == data$Observation[this.date] + days(2))
#If there is no data for the first record, index the first record that has data
delayed.start <- ifelse(test=delayed.start < which(!is.na(var))[1],
yes=which(!is.na(var))[1],
no=delayed.start)
#Get the index for a 2 day delayed start from the first record that has data
#to make the for loop work properly (155-163 CAN PROBABLY BE CLEANED UP)
delayed.start <- which(data$Observation == data$Observation[delayed.start] + days(2))
#Create an empty list to store the index values for records that fail the test
fail.list <- list()
#Starts at the earliest record with data and fills list with records that fail test
for(i in delayed.start:nrow(data)) {
#Index the date and time of the current record
this.date <- which(data$Observation == data$Observation[i])
#index the date and time of the record exactly 1 and 2 days prior to current record
day.before <- which(data$Observation == data$Observation[i] - days(1))
days2.before <- which(data$Observation == data$Observation[i] - days(2))
#If the variable for the current record is equal to the value exactly 1 and 2 days
#before the current record, index all three rows
if(length(var[i] == var[day.before] | var[i] == days2.before ) == 0) {
this.date <- this.date
} else if(is.na(var[i]) | is.na(var[day.before]) | is.na(var[days2.before])) {
this.date <- this.date
} else if(var[i] == var[day.before] & var[i] == var[days2.before]) {
fail.list[[i]] <- c(this.date, day.before, days2.before)
}
}
#Convert the list index for failed records into a vector index and return
fail.index <- unlist(fail.list)
return(fail.index)
}

#Takes list of variables and runs persistence test on all
f.persistence.list <- function(data, var.list) {
#Find the date (and time) of the first record in the data set
this.date <- which(data$Observation == min(data$Observation))
#Get the index for a 2 day delayed start to make the for loop work properly
for (j in 1:length(var.list)) {
delayed.start <- vector()
delayed.start[j] <- which(data$Observation == data$Observation[this.date] + days(2))
#If there is no data for the first record, index the first record that has data
delayed.start[j] <- ifelse(test=delayed.start < which(!is.na(var.list[[j]]))[1],
yes=which(!is.na(var.list[[j]]))[1],
no=delayed.start)
#Get the index for a 2 day delayed start from the first record that has data
#to make the for loop work properly (155-163 CAN PROBABLY BE CLEANED UP)
delayed.start[j] <- which(data$Observation == data$Observation[delayed.start] + days(2))
}
#Create an empty list to store the index values for records that fail the test
fail.list <- list()
#Starts at the earliest record with data and fills list with records that fail test
for(i in delayed.start:nrow(data)) {
#Index the date and time of the current record
this.date <- which(data$Observation == data$Observation[i])
#index the date and time of the record exactly 1 and 2 days prior to current record
day.before <- which(data$Observation == data$Observation[i] - days(1))
days2.before <- which(data$Observation == data$Observation[i] - days(2))
#If the variable for the current record is equal to the value exactly 1 and 2 days
#before the current record, index all three rows
if(is.na(var[i]) | is.na(var[day.before]) | is.na(var[days2.before])) {
this.date <- this.date
}
if(length(var[i] == var[day.before] & var[i] == days2.before ) == 0) {
this.date <- this.date
} else {
if(var[i] == var[day.before] & var[i] == var[days2.before]) {
fail.list[[i]] <- c(this.date, day.before, days2.before)
}
}
}
#Convert the list index for failed records into a vector index and return
fail.index <- unlist(fail.list)
return(fail.index)
}
Loading