From 36addd8bd9d4a3b9668a72714f7e08ad160be871 Mon Sep 17 00:00:00 2001
From: jmaccarthy <jmaccarthy@conservation.org>
Date: Fri, 28 Feb 2014 17:09:59 -0500
Subject: [PATCH 01/10] testing forked commits

---
 team_db_query.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/team_db_query.R b/team_db_query.R
index 89c0589..f9a7bbc 100644
--- a/team_db_query.R
+++ b/team_db_query.R
@@ -1,6 +1,6 @@
 # team_db_query.r
 #
-#Hello!
+#Hello! This is different from the master version.
 f.teamdb.query <- function(dataset) {
   # Database query function of TEAM production database. Returns a R Object for
   # each dataset and saves to the workspace. The function contains views or pre-

From 1194443814e76ce0c1aaaa8243f8e7b0a7e58740 Mon Sep 17 00:00:00 2001
From: jmaccarthy <jmaccarthy@conservation.org>
Date: Mon, 28 Apr 2014 15:07:04 -0400
Subject: [PATCH 02/10] this file contains range test functions

---
 clim_functions | 125 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 clim_functions

diff --git a/clim_functions b/clim_functions
new file mode 100644
index 0000000..98dcd42
--- /dev/null
+++ b/clim_functions
@@ -0,0 +1,125 @@
+require(stringr)
+require(lubridate)
+require(compiler)
+require(ggplot2)
+
+#Performs range test on relative humidity based on Estevez et al. (2011)####
+#RH acceptable range = 0-100%
+f.rh.r1 <- function(data) {
+  #Vectorize and format the relative humidity data
+  rh.1 <- as.numeric(data$Sensor1_RelativeHumidity)
+  rh.2 <- as.numeric(data$Sensor2_RelativeHumidity)
+  rh.3 <- as.numeric(data$Sensor3_RelativeHumidity)
+  #Index rows that fail the first/only range test for relative humidity
+  rh.1.fail.r1 <- which(rh.1 < 0 | rh.1 > 100)
+  rh.2.fail.r1 <- which(rh.2 < 0 | rh.2 > 100)
+  rh.3.fail.r1 <- which(rh.3 < 0 | rh.3 > 100)
+  #Create a dataframe with results of the test
+  final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
+                         TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, rh.1 = rh.1,
+                         rh.1.rangeTest = NA, rh.2 = rh.2, rh.2.rangeTest = NA, rh.3 = rh.3, rh.3.rangeTest = NA)
+  #Use the failed test index to store the error code
+  final.df$rh.1.rangeTest[rh.1.fail.r1] <- 'R1'
+  final.df$rh.2.rangeTest[rh.2.fail.r1] <- 'R1'
+  final.df$rh.3.rangeTest[rh.3.fail.r1] <- 'R1'
+  #Replace NAs with a blank
+  final.df$rh.1.rangeTest[is.na(final.df$rh.1.rangeTest)] <- ' '
+  final.df$rh.2.rangeTest[is.na(final.df$rh.2.rangeTest)] <- ' '
+  final.df$rh.3.rangeTest[is.na(final.df$rh.3.rangeTest)] <- ' '
+  return(final.df)
+}
+
+#Performs first range test on temperature based on Estevez et al. (2011)####
+#Acceptable values fall between -30 and 50; revisit these numbers (Africa max high is 55--WMO)
+f.temp.r1 <- function(data) {
+  #Vectorize and format the temperature data
+  temp.1 <- as.numeric(data$Sensor1_AvgTemp)
+  temp.2 <- as.numeric(data$Sensor2_AvgTemp)
+  temp.3 <- as.numeric(data$Sensor3_AvgTemp)
+  #Index rows that have failed the range test for temperature
+  temp.1.fail.r1 <- which(temp.1 < -30 | temp.1 > 50)
+  temp.2.fail.r1 <- which(temp.2 < -30 | temp.2 > 50)
+  temp.3.fail.r1 <- which(temp.3 < -30 | temp.3 > 50)
+  #Create a dataframe with results of the test
+  final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
+                         TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, temp.1 = temp.1,
+                         temp.1.rangeTest = NA, temp.2 = temp.2, temp.2.rangeTest = NA, temp.3 = temp.3,
+                         temp.3.rangeTest = NA)
+  #Use the failed test index to store the error code
+  final.df$temp.1.rangeTest[temp.1.fail.r1] <- 'R1'
+  final.df$temp.2.rangeTest[temp.2.fail.r1] <- 'R1'
+  final.df$temp.3.rangeTest[temp.3.fail.r1] <- 'R1'
+  #Replace NAs with a blank
+  final.df$temp.1.rangeTest[is.na(final.df$temp.1.rangeTest)] <- ' '
+  final.df$temp.2.rangeTest[is.na(final.df$temp.2.rangeTest)] <- ' '
+  final.df$temp.3.rangeTest[is.na(final.df$temp.3.rangeTest)] <- ' '
+  return(final.df)
+}
+
+#Performs second range test on temperature based on Estevez et al. (2011)--UNDER CONSTRUCTION####
+#Need to find an accurate list of country max/min temperatures; ideally long-term stations close to TEAM station
+
+#Performs first range test on rainfall based on Estevez et al. (2011)####
+#NEED TO PASTE TIME AND DATE (JUST DATE NOW) PRIOR TO JUNE 2010; FOR VB AT LEAST
+f.rain.r1 <- function(data) {
+  #Vectorize and format the rain data
+  rain <- as.numeric(data$Rainfall)
+  #Format observation date as POSIX
+  data$Observation <- ymd_hms(data$Observation)
+  #Store the first day of the data (where to start the interval)
+  start.date <- min(data$Observation) #ymd('2010-07-01')
+  #Create a vector to store the index info for failed tests
+  rain.fail.r1 <- vector()
+  #Create a half hour interval window to select data during that period
+  int <- new_interval(start = start.date, end = start.date + minutes(30))
+  #For each record use the interval to index data and then sum rainfall measurements
+  for(i in 1:10000) {
+    #If data is missing for a particular interval, shift the interval by five minutes and start again
+    #at next record
+    if(length(which(data$Observation %within% int)) == 0) {
+      int <- int_shift(int = int, by = minutes(5))
+    } else {
+      #If the interval exists in the data, sum the data over the interval
+      half.hr.sum <- sum(rain[which(data$Observation %within% int)])
+      #If the sum of the interval is greater than 250 or less than 0, store the row number in the index
+      if(half.hr.sum > 250 | half.hr.sum < 0) {
+        rain.fail.r1[i] <- which(data$Observation %within% int)[1]
+      }
+      #After storing the row number, shift the interval by five minutes and start again at next record
+      int <- int_shift(int = int, by = minutes(5))
+    }
+  }
+  #Create a dataframe with results of the test
+  final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
+                         TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, rain = rain,
+                         rain.rangeTest = NA)
+  #Remove NAs that were introduced to the failed test index
+  rain.fail.r1 <- rain.fail.r1[!is.na(rain.fail.r1)]
+  #Use the failed test index to store the error code
+  final.df$rain.rangeTest[rain.fail.r1] <- 'R1'
+  #Replace NAs with a blank
+  final.df$rain.rangeTest[is.na(final.df$rain.rangeTest)] <- ' '
+  return(final.df)
+}
+
+#Performs second range test on rainfall based on Estevez et al. (2011)####
+f.rain.r2 <- function(data) {
+  data$Observation <- ymd_hms(data$Observation)
+  rain <- as.numeric(data$Rainfall)
+  TempDay <- floor_date(data$Observation, 'day')
+  Daily.Climate.Data <- data.frame(Rain = tapply(rain, TempDay, sum))
+  rain.fail.r2 <- as.vector(which(Daily.Climate.Data$Rain > 500 | Daily.Climate.Data$Rain < 0))
+  dates.fail <- ymd(row.names(Daily.Climate.Data)[rain.fail.r2])
+  rain.fail.r2 <- which(TempDay %in% dates.fail)
+  #Create a dataframe with results of the test
+  final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
+                         TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, rain = rain,
+                         rain.rangeTest = NA)
+  #Use the failed test index to store the error code
+  final.df$rain.rangeTest[rain.fail.r2] <- 'R2'
+  #Replace NAs with a blank
+  final.df$rain.rangeTest[is.na(final.df$rain.rangeTest)] <- ' '
+  return(final.df)
+}
+#head(str_split_fixed(data$Observation, pattern = ' ', 2))
+#strsplit(data$Observation, ' ')

From ca08c2e6c0a7b52764f20c4ddcc29b4355436ee9 Mon Sep 17 00:00:00 2001
From: jmaccarthy <jmaccarthy@conservation.org>
Date: Mon, 28 Apr 2014 15:08:06 -0400
Subject: [PATCH 03/10] range test program

---
 rangeTest | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 rangeTest

diff --git a/rangeTest b/rangeTest
new file mode 100644
index 0000000..fd5cc99
--- /dev/null
+++ b/rangeTest
@@ -0,0 +1,27 @@
+#source('~/Jimmy_Code/teamcode/tools/team_db_query.R')
+
+#Load in the TEAM climate data
+#cldata <- f.teamdb.query('climate')
+#clmeta <- cldata$cl_temp_maxmin
+#cldata <- cldata$cl_data
+
+load('cl_data_new2014-04-24.gzip')
+vb.data.3.0 <- cl_data_all[which(cl_data_all$TEAMSiteName == unique(cl_data_all$TEAMSiteName)[3] &
+                                 cl_data_all$ProtocolVersion == 'Climate 3'), ]
+
+#Relative humidity range test 1
+rh.r1.test <- f.rh.r1(vb.data)
+
+#Temperature range test 1
+temp.r1.test <- f.temp.r1(vb.data)
+
+#NEED TO ADD TEMPERATURE RANGE TEST 2
+
+#Rainfall range test 1
+system.time(rain.r1.test <- f.rain.r1(vb.data.3.0))
+#g<-cmpfun(f.rain.r1)
+#compare <- microbenchmark(f.rain.r1(vb.data.3.0), g(vb.data.3.0), times = 1000)
+#autoplot(compare)
+
+#Rainfall range test 2
+rain.r2.test

From 9c9bc5a244d01c33dab627202d94991e9629f565 Mon Sep 17 00:00:00 2001
From: jmaccarthy <jmaccarthy@conservation.org>
Date: Wed, 30 Apr 2014 23:22:46 -0400
Subject: [PATCH 04/10] Update clim_functions

adds persistence tests
---
 clim_functions | 85 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 79 insertions(+), 6 deletions(-)

diff --git a/clim_functions b/clim_functions
index 98dcd42..412af14 100644
--- a/clim_functions
+++ b/clim_functions
@@ -14,7 +14,7 @@ f.rh.r1 <- function(data) {
   rh.1.fail.r1 <- which(rh.1 < 0 | rh.1 > 100)
   rh.2.fail.r1 <- which(rh.2 < 0 | rh.2 > 100)
   rh.3.fail.r1 <- which(rh.3 < 0 | rh.3 > 100)
-  #Create a dataframe with results of the test
+  #Create a data frame with results of the test
   final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
                          TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, rh.1 = rh.1,
                          rh.1.rangeTest = NA, rh.2 = rh.2, rh.2.rangeTest = NA, rh.3 = rh.3, rh.3.rangeTest = NA)
@@ -40,7 +40,7 @@ f.temp.r1 <- function(data) {
   temp.1.fail.r1 <- which(temp.1 < -30 | temp.1 > 50)
   temp.2.fail.r1 <- which(temp.2 < -30 | temp.2 > 50)
   temp.3.fail.r1 <- which(temp.3 < -30 | temp.3 > 50)
-  #Create a dataframe with results of the test
+  #Create a data frame with results of the test
   final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
                          TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, temp.1 = temp.1,
                          temp.1.rangeTest = NA, temp.2 = temp.2, temp.2.rangeTest = NA, temp.3 = temp.3,
@@ -60,6 +60,7 @@ f.temp.r1 <- function(data) {
 #Need to find an accurate list of country max/min temperatures; ideally long-term stations close to TEAM station
 
 #Performs first range test on rainfall based on Estevez et al. (2011)####
+#Acceptable values fall between 0 and 250 mm over a half hour period
 #NEED TO PASTE TIME AND DATE (JUST DATE NOW) PRIOR TO JUNE 2010; FOR VB AT LEAST
 f.rain.r1 <- function(data) {
   #Vectorize and format the rain data
@@ -89,7 +90,7 @@ f.rain.r1 <- function(data) {
       int <- int_shift(int = int, by = minutes(5))
     }
   }
-  #Create a dataframe with results of the test
+  #Create a data frame with results of the test
   final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
                          TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, rain = rain,
                          rain.rangeTest = NA)
@@ -103,15 +104,23 @@ f.rain.r1 <- function(data) {
 }
 
 #Performs second range test on rainfall based on Estevez et al. (2011)####
+#Acceptable values fall between 0 and 500 mm
 f.rain.r2 <- function(data) {
   data$Observation <- ymd_hms(data$Observation)
+  #Vectorize and format rain data
   rain <- as.numeric(data$Rainfall)
+  #Grab just the date of each record
   TempDay <- floor_date(data$Observation, 'day')
+  #Generate daily summaries
   Daily.Climate.Data <- data.frame(Rain = tapply(rain, TempDay, sum))
+  #Index the rows in which a day had more than 500 mm or less than 0 mm of rain
   rain.fail.r2 <- as.vector(which(Daily.Climate.Data$Rain > 500 | Daily.Climate.Data$Rain < 0))
+  #Get the dates from the index number to flag the results
   dates.fail <- ymd(row.names(Daily.Climate.Data)[rain.fail.r2])
+  #Creates an index again to store the dates in the data that match the days that
+  #failed in the previous index (CAN 116-121 BE CLEANED UP AT ALL?)
   rain.fail.r2 <- which(TempDay %in% dates.fail)
-  #Create a dataframe with results of the test
+  #Create a data frame with results of the test
   final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
                          TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, rain = rain,
                          rain.rangeTest = NA)
@@ -121,5 +130,69 @@ f.rain.r2 <- function(data) {
   final.df$rain.rangeTest[is.na(final.df$rain.rangeTest)] <- ' '
   return(final.df)
 }
-#head(str_split_fixed(data$Observation, pattern = ' ', 2))
-#strsplit(data$Observation, ' ')
+
+#Performs first persistence test for temperature####
+f.temp.p1 <- function(data) {
+  data$Observation <- ymd_hms(data$Observation)
+  #Vectorize and format the temperature data
+  temp.1 <- as.numeric(data$Sensor1_AvgTemp)
+  temp.2 <- as.numeric(data$Sensor2_AvgTemp)
+  temp.3 <- as.numeric(data$Sensor3_AvgTemp)
+  #Run the persistence function to generate index of failed records for:
+  #first record (with data), and 1 and 2 days prior to current record
+  #See f.persistence function for the real magic; setup with data set
+  #and variable of interest
+  temp.1.fail.p1 <- f.persistence(data, temp.1)
+  temp.2.fail.p1 <- f.persistence(data, temp.2)
+  temp.3.fail.p1 <- f.persistence(data, temp.3)
+  #Create a data frame with results of the test
+  final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
+                         TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, temp.1 = temp.1,
+                         temp.1.perTest = NA, temp.2 = temp.2, temp.2.perTest = NA, temp.3 = temp.3,
+                         temp.3.perTest = NA)
+  #Use the failed test index to store the error code
+  final.df$temp.1.perTest[temp.1.fail.p1] <- 'P1'
+  final.df$temp.2.perTest[temp.2.fail.p1] <- 'P1'
+  final.df$temp.3.perTest[temp.3.fail.p1] <- 'P1'
+  #Replace NAs with a blank
+  final.df$temp.1.perTest[is.na(final.df$temp.1.perTest)] <- ' '
+  final.df$temp.2.perTest[is.na(final.df$temp.2.perTest)] <- ' '
+  final.df$temp.3.perTest[is.na(final.df$temp.3.perTest)] <- ' '
+  return(final.df)
+}
+
+#This function creates an index to compare records to exactly 1 and 2 days prior to the
+#current record and then returns an index of records that fail the test
+#All three records will be flagged if they are the same value
+#Must use a valid data set and variable
+f.persistence <- function(data, var) {
+  #Find the date (and time) of the first record in the data set
+  this.date <- which(data$Observation == min(data$Observation))
+  #Get the index for a 2 day delayed start to make the for loop work properly
+  delayed.start <- which(data$Observation == data$Observation[this.date] + days(2))
+  #If there is no data for the first record, index the first record that has data
+  delayed.start <- ifelse(test=delayed.start < which(!is.na(var))[1],
+                          yes=which(!is.na(var))[1],
+                          no=delayed.start)
+  #Get the index for a 2 day delayed start from the first record that has data
+  #to make the for loop work properly (155-163 CAN PROBABLY BE CLEANED UP)
+  delayed.start <- which(data$Observation == data$Observation[delayed.start] + days(2))
+  #Create an empty list to store the index values for records that fail the test
+  fail.list <- list()
+  #Starts at the earliest record with data and fills list with records that fail test
+  for(i in delayed.start:379732) {
+    #Index the date and time of the current record
+    this.date <- which(data$Observation == data$Observation[i])
+    #index the date and time of the record exactly 1 and 2 days prior to current record 
+    day.before <- which(data$Observation == data$Observation[i] - days(1))
+    days2.before <- which(data$Observation == data$Observation[i] - days(2))
+    #If the variable for the current record is equal to the value exactly 1 and 2 days
+    #before the current record, index all three rows
+    if(var[i] == var[day.before] & var[i] == var[days2.before])  {
+      fail.list[[i]] <- c(this.date, day.before, days2.before)
+    }
+  }
+  #Convert the list index for failed records into a vector index and return
+  fail.index <- unlist(fail.list)
+  return(fail.index)
+}

From 5a7615a301c2bfa25eb28403d10bf16ff3ca94d0 Mon Sep 17 00:00:00 2001
From: jmaccarthy <jmaccarthy@conservation.org>
Date: Wed, 30 Apr 2014 23:24:02 -0400
Subject: [PATCH 05/10] Create clim_run

this program runs the supplementary clim_functions file
---
 clim_run | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 clim_run

diff --git a/clim_run b/clim_run
new file mode 100644
index 0000000..bfb63fe
--- /dev/null
+++ b/clim_run
@@ -0,0 +1,31 @@
+#source('~/Jimmy_Code/teamcode/tools/team_db_query.R')
+
+#Load in the TEAM climate data
+#cldata <- f.teamdb.query('climate')
+#clmeta <- cldata$cl_temp_maxmin
+#cldata <- cldata$cl_data
+
+load('cl_data_new2014-04-24.gzip')
+vb.data.3.0 <- cl_data_all[which(cl_data_all$TEAMSiteName == unique(cl_data_all$TEAMSiteName)[3] &
+                                 cl_data_all$ProtocolVersion == 'Climate 3'), ]
+vb.data.3.0.st2 <- vb.data.3.0[which(vb.data.3.0$SamplingUnitName == 'CL-VB-2'), ]
+
+#Relative humidity range test 1
+rh.r1.test <- f.rh.r1(vb.data.3.0.st2)
+
+#Temperature range test 1
+temp.r1.test <- f.temp.r1(vb.data.3.0.st2)
+
+#NEED TO ADD TEMPERATURE RANGE TEST 2
+
+#Rainfall range test 1
+rain.r1.test <- f.rain.r1(vb.data.3.0.st2)
+#g<-cmpfun(f.rain.r1)
+#compare <- microbenchmark(f.rain.r1(vb.data.3.0), g(vb.data.3.0), times = 1000)
+#autoplot(compare)
+
+#Rainfall range test 2
+rain.r2.test <- f.rain.r2(vb.data.3.0)
+
+#Temperature persistence test 1
+temp.p1.test <- f.temp.p1(vb.data.3.0)

From f6fe51b704b42db50e2109c311cc5eac850ddf11 Mon Sep 17 00:00:00 2001
From: jmaccarthy <jmaccarthy@conservation.org>
Date: Thu, 1 May 2014 12:47:37 -0400
Subject: [PATCH 06/10] Update clim_functions

---
 clim_functions | 71 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 64 insertions(+), 7 deletions(-)

diff --git a/clim_functions b/clim_functions
index 412af14..0688e06 100644
--- a/clim_functions
+++ b/clim_functions
@@ -111,15 +111,18 @@ f.rain.r2 <- function(data) {
   rain <- as.numeric(data$Rainfall)
   #Grab just the date of each record
   TempDay <- floor_date(data$Observation, 'day')
+  TempDay <- ymd(TempDay)
   #Generate daily summaries
   Daily.Climate.Data <- data.frame(Rain = tapply(rain, TempDay, sum))
   #Index the rows in which a day had more than 500 mm or less than 0 mm of rain
   rain.fail.r2 <- as.vector(which(Daily.Climate.Data$Rain > 500 | Daily.Climate.Data$Rain < 0))
   #Get the dates from the index number to flag the results
-  dates.fail <- ymd(row.names(Daily.Climate.Data)[rain.fail.r2])
-  #Creates an index again to store the dates in the data that match the days that
-  #failed in the previous index (CAN 116-121 BE CLEANED UP AT ALL?)
-  rain.fail.r2 <- which(TempDay %in% dates.fail)
+  if(length(rain.fail.r2) != 0) {
+    dates.fail <- ymd(row.names(Daily.Climate.Data)[rain.fail.r2])
+    #Creates an index again to store the dates in the data that match the days that
+    #failed in the previous index
+    rain.fail.r2 <- which(TempDay %in% dates.fail)
+  }
   #Create a data frame with results of the test
   final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
                          TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, rain = rain,
@@ -132,17 +135,24 @@ f.rain.r2 <- function(data) {
 }
 
 #Performs first persistence test for temperature####
-f.temp.p1 <- function(data) {
+f.temp.rh.p1 <- function(data) {
   data$Observation <- ymd_hms(data$Observation)
   #Vectorize and format the temperature data
   temp.1 <- as.numeric(data$Sensor1_AvgTemp)
   temp.2 <- as.numeric(data$Sensor2_AvgTemp)
   temp.3 <- as.numeric(data$Sensor3_AvgTemp)
+  rh.1 <- as.numeric(data$Sensor1_RelativeHumidity)
+  rh.2 <- as.numeric(data$Sensor2_RelativeHumidity)
+  rh.3 <- as.numeric(data$Sensor3_RelativeHumidity)
+  #Create a list of variables
+  var.list <- list(temp.3, rh.1, rh.2, rh.3)
   #Run the persistence function to generate index of failed records for:
   #first record (with data), and 1 and 2 days prior to current record
   #See f.persistence function for the real magic; setup with data set
   #and variable of interest
+  #Takes roughly 8 hours per sensor - should be able to speed this up
   temp.1.fail.p1 <- f.persistence(data, temp.1)
+  #Start @ 11:49 AM
   temp.2.fail.p1 <- f.persistence(data, temp.2)
   temp.3.fail.p1 <- f.persistence(data, temp.3)
   #Create a data frame with results of the test
@@ -180,7 +190,7 @@ f.persistence <- function(data, var) {
   #Create an empty list to store the index values for records that fail the test
   fail.list <- list()
   #Starts at the earliest record with data and fills list with records that fail test
-  for(i in delayed.start:379732) {
+  for(i in delayed.start:nrow(data)) {
     #Index the date and time of the current record
     this.date <- which(data$Observation == data$Observation[i])
     #index the date and time of the record exactly 1 and 2 days prior to current record 
@@ -188,7 +198,11 @@ f.persistence <- function(data, var) {
     days2.before <- which(data$Observation == data$Observation[i] - days(2))
     #If the variable for the current record is equal to the value exactly 1 and 2 days
     #before the current record, index all three rows
-    if(var[i] == var[day.before] & var[i] == var[days2.before])  {
+    if(length(var[i] == var[day.before] | var[i] == days2.before ) == 0) {
+      this.date <- this.date
+    } else if(is.na(var[i]) | is.na(var[day.before]) | is.na(var[days2.before])) {
+      this.date <- this.date
+    } else if(var[i] == var[day.before] & var[i] == var[days2.before])  {
       fail.list[[i]] <- c(this.date, day.before, days2.before)
     }
   }
@@ -196,3 +210,46 @@ f.persistence <- function(data, var) {
   fail.index <- unlist(fail.list)
   return(fail.index)
 }
+
+#Takes list of variables and runs persistence test on all
+f.persistence.list <- function(data, var.list) {
+  #Find the date (and time) of the first record in the data set
+  this.date <- which(data$Observation == min(data$Observation))
+  #Get the index for a 2 day delayed start to make the for loop work properly
+  for (j in 1:length(var.list)) {
+    delayed.start <- vector()
+    delayed.start[j] <- which(data$Observation == data$Observation[this.date] + days(2))
+    #If there is no data for the first record, index the first record that has data
+    delayed.start[j] <- ifelse(test=delayed.start < which(!is.na(var.list[[j]]))[1],
+                            yes=which(!is.na(var.list[[j]]))[1],
+                            no=delayed.start)
+    #Get the index for a 2 day delayed start from the first record that has data
+    #to make the for loop work properly (155-163 CAN PROBABLY BE CLEANED UP)
+    delayed.start[j] <- which(data$Observation == data$Observation[delayed.start] + days(2))
+  }
+  #Create an empty list to store the index values for records that fail the test
+  fail.list <- list()
+  #Starts at the earliest record with data and fills list with records that fail test
+  for(i in delayed.start:nrow(data)) {
+    #Index the date and time of the current record
+    this.date <- which(data$Observation == data$Observation[i])
+    #index the date and time of the record exactly 1 and 2 days prior to current record 
+    day.before <- which(data$Observation == data$Observation[i] - days(1))
+    days2.before <- which(data$Observation == data$Observation[i] - days(2))
+    #If the variable for the current record is equal to the value exactly 1 and 2 days
+    #before the current record, index all three rows
+    if(is.na(var[i]) | is.na(var[day.before]) | is.na(var[days2.before])) {
+      this.date <- this.date
+    }
+    if(length(var[i] == var[day.before] & var[i] == days2.before ) == 0) {
+      this.date <- this.date
+    } else {
+      if(var[i] == var[day.before] & var[i] == var[days2.before])  {
+        fail.list[[i]] <- c(this.date, day.before, days2.before)
+      }
+    }
+  }
+  #Convert the list index for failed records into a vector index and return
+  fail.index <- unlist(fail.list)
+  return(fail.index)
+}

From e42f0568db2cc0157be6f0bde14039ff30b41f6f Mon Sep 17 00:00:00 2001
From: jmaccarthy <jmaccarthy@conservation.org>
Date: Thu, 1 May 2014 12:48:07 -0400
Subject: [PATCH 07/10] Update clim_run

---
 clim_run | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/clim_run b/clim_run
index bfb63fe..dd2308c 100644
--- a/clim_run
+++ b/clim_run
@@ -11,21 +11,25 @@ vb.data.3.0 <- cl_data_all[which(cl_data_all$TEAMSiteName == unique(cl_data_all$
 vb.data.3.0.st2 <- vb.data.3.0[which(vb.data.3.0$SamplingUnitName == 'CL-VB-2'), ]
 
 #Relative humidity range test 1
+#Less than a minute for 342637 records
 rh.r1.test <- f.rh.r1(vb.data.3.0.st2)
 
 #Temperature range test 1
+#Less than a minute for 342637 records
 temp.r1.test <- f.temp.r1(vb.data.3.0.st2)
 
 #NEED TO ADD TEMPERATURE RANGE TEST 2
 
 #Rainfall range test 1
+#Runs at 5 minutes for 342637 records
 rain.r1.test <- f.rain.r1(vb.data.3.0.st2)
 #g<-cmpfun(f.rain.r1)
 #compare <- microbenchmark(f.rain.r1(vb.data.3.0), g(vb.data.3.0), times = 1000)
 #autoplot(compare)
 
 #Rainfall range test 2
-rain.r2.test <- f.rain.r2(vb.data.3.0)
+#Runs at less than a minute for 342637 records
+rain.r2.test <- f.rain.r2(vb.data.3.0.st2)
 
 #Temperature persistence test 1
 temp.p1.test <- f.temp.p1(vb.data.3.0)

From 7ab8888468e76384ba268b6f7f00c448a230e3b7 Mon Sep 17 00:00:00 2001
From: jmaccarthy <jmaccarthy@conservation.org>
Date: Fri, 13 Jun 2014 16:46:25 -0400
Subject: [PATCH 08/10] Create climate_tests_v2

Range and persistence tests for TEAM climate data
---
 climate_tests_v2 | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 climate_tests_v2

diff --git a/climate_tests_v2 b/climate_tests_v2
new file mode 100644
index 0000000..dddcd7e
--- /dev/null
+++ b/climate_tests_v2
@@ -0,0 +1,37 @@
+#source('~/Jimmy_Code/teamcode/tools/team_db_query.R')
+
+#Load in the TEAM climate data
+#cldata <- f.teamdb.query('climate')
+#clmeta <- cldata$cl_temp_maxmin
+#cldata <- cldata$cl_data
+
+load('cl_data_new2014-04-24.gzip')
+vb.data.3.0 <- cl_data_all[which(cl_data_all$TEAMSiteName == unique(cl_data_all$TEAMSiteName)[3] &
+                                 cl_data_all$ProtocolVersion == 'Climate 3'), ]
+nak.data.3.0 <- cl_data_all[which(cl_data_all$TEAMSiteName == unique(cl_data_all$TEAMSiteName)[1] &
+                                   cl_data_all$ProtocolVersion == 'Climate 3'), ]
+vb.data.3.0.st2 <- vb.data.3.0[which(vb.data.3.0$SamplingUnitName == 'CL-VB-2'), ]
+vb.use.data <- vb.data.3.0.st2[1:30000, ]
+data<-vb.use.data[1:5000, ]
+
+#Range tests for relative humidity, temperature, and rainfall
+#System Time: 54.2 seconds for 5,000 records
+#System Time: 23396.4 seconds (6.5 hours) for 384,907
+system.time(rangeTest.Output <- f.rangeTest(nak.data.3.0))
+
+#NEED TO ADD TEMPERATURE RANGE TEST 2
+
+#Persistence tests for solar radiation, relative humidity, and temperature
+#System Time: 75.1 seconds for 5,000 records
+#System Time: 20883.3 seconds (5.8 hours) for 384,907
+system.time(persistenceTest.Output <- f.persistenceTest(nak.data.3.0))
+
+#Calculate the error rate
+#Returns a list of the error rate tables: range, step, consistency, and persistence
+#System Time: 4.6 seconds for 2 tests
+system.time(errorRate <- f.errorRate(rangeTest.Output, persistenceTest.Output))
+
+
+#g<-cmpfun(f.rain.r1)
+#compare <- microbenchmark(f.rain.r1(vb.data.3.0), g(vb.data.3.0), times = 1000)
+#autoplot(compare)

From e56ec7990e590eb5dd10a3db6d513ed3dedf7eb1 Mon Sep 17 00:00:00 2001
From: jmaccarthy <jmaccarthy@conservation.org>
Date: Fri, 13 Jun 2014 16:47:17 -0400
Subject: [PATCH 09/10] Rename climate_tests_v2 to clim_tests_v2

---
 climate_tests_v2 => clim_tests_v2 | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename climate_tests_v2 => clim_tests_v2 (100%)

diff --git a/climate_tests_v2 b/clim_tests_v2
similarity index 100%
rename from climate_tests_v2
rename to clim_tests_v2

From 7e646f1880812a504fc2ad6c8485b78d06275cb2 Mon Sep 17 00:00:00 2001
From: jmaccarthy <jmaccarthy@conservation.org>
Date: Fri, 13 Jun 2014 16:48:21 -0400
Subject: [PATCH 10/10] Create clim_funks_v2

functions for performing range and persistence tests on TEAM climate data...also includes error rate helper function
---
 clim_funks_v2 | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 226 insertions(+)
 create mode 100644 clim_funks_v2

diff --git a/clim_funks_v2 b/clim_funks_v2
new file mode 100644
index 0000000..251f15c
--- /dev/null
+++ b/clim_funks_v2
@@ -0,0 +1,226 @@
+require(stringr)
+require(lubridate)
+require(compiler)
+require(ggplot2)
+require(zoo)
+
+#Performs range test for relative humidity, and temperature based on Estevez et al. (2011)####
+#RH acceptable range = 0-100%
+#Temp acceptable rante = -30 and 50; revisit these numbers (Africa max high is 55--WMO)
+f.rangeTest <- function(data) {
+  #Vectorize and format the relative humidity, temperature, and rain data####
+  #Relative Humidity
+  rh.1 <- as.numeric(data$Sensor1_RelativeHumidity)
+  rh.2 <- as.numeric(data$Sensor2_RelativeHumidity)
+  rh.3 <- as.numeric(data$Sensor3_RelativeHumidity)
+  #Temperature
+  temp.1 <- as.numeric(data$Sensor1_AvgTemp)
+  temp.2 <- as.numeric(data$Sensor2_AvgTemp)
+  temp.3 <- as.numeric(data$Sensor3_AvgTemp)
+  #Rain
+  rain <- as.numeric(data$Rainfall)
+  data$Observation <- ymd_hms(data$Observation)
+  #Create indices for rows that fail the range test####
+  #Relative Humidity
+  rh.1.fail.r1 <- which(rh.1 < 0 | rh.1 > 100)
+  rh.2.fail.r1 <- which(rh.2 < 0 | rh.2 > 100)
+  rh.3.fail.r1 <- which(rh.3 < 0 | rh.3 > 100)
+  #Temperature - Test 1
+  temp.1.fail.r1 <- which(temp.1 < -30 | temp.1 > 50)
+  temp.2.fail.r1 <- which(temp.2 < -30 | temp.2 > 50)
+  temp.3.fail.r1 <- which(temp.3 < -30 | temp.3 > 50)
+  #Temperature - Test 2
+  #Performs second range test on temperature based on Estevez et al. (2011)--UNDER CONSTRUCTION####
+  #Need to find an accurate list of country max/min temperatures; ideally long-term stations close to TEAM station
+  #Rain - Test 1
+  #Store the first day of the data (where to start the interval)
+  start.date <- min(data$Observation) #ymd('2010-07-01')
+  #Create a vector to store the index info for failed tests
+  rain.fail.r1 <- vector()
+  #Create a half hour interval window to select data during that period
+  int <- new_interval(start = start.date, end = start.date + minutes(30))
+  #For each record use the interval to index data and then sum rainfall measurements
+  for(i in 1:nrow(data)) {
+    #If there is no data for a particular interval, shift the interval by five minutes and start again
+    #at next record
+    if(length(which(data$Observation %within% int)) == 0) {
+      int <- int_shift(int = int, by = minutes(5))
+    } else {
+      #If the any data exists in the interval, sum the data over the half-hour interval
+      half.hr.sum <- sum(rain[which(data$Observation %within% int)])
+      #If the sum of the interval is greater than 120 or less than 0, store the row number in the index
+      if(half.hr.sum > 120 | half.hr.sum < 0) {
+        rain.fail.r1[i] <- which(data$Observation %within% int)[1]
+      }
+      #After storing the row number in the index, shift the interval by five minutes and repeat
+      int <- int_shift(int = int, by = minutes(5))
+    }
+  }
+  #Rain - Test 2
+  #Grab just the date of each record
+  TempDay <- floor_date(data$Observation, 'day')
+  TempDay <- ymd(TempDay)
+  #Generate daily summaries
+  Daily.Climate.Data <- data.frame(Rain = tapply(rain, TempDay, sum))
+  #Index the rows in which a day had more than 500 mm or less than 0 mm of rain
+  rain.fail.r2 <- as.vector(which(Daily.Climate.Data$Rain > 500 | Daily.Climate.Data$Rain < 0))
+  #Get the dates from the index number to flag the results
+  if(length(rain.fail.r2) != 0) {
+    dates.fail <- ymd(row.names(Daily.Climate.Data)[rain.fail.r2])
+    #Creates an index again to store the dates in the data that match the days that
+    #failed in the previous index
+    rain.fail.r2 <- which(TempDay %in% dates.fail)
+  }
+  #Create a data frame with results of the test####
+  final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
+                         TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, rh.1 = rh.1,
+                         rh.1.rangeTest = NA, rh.2 = rh.2, rh.2.rangeTest = NA, rh.3 = rh.3, rh.3.rangeTest = NA,
+                         temp.1 = temp.1, temp.1.rangeTest = NA, temp.2 = temp.2, temp.2.rangeTest = NA,
+                         temp.3 = temp.3, temp.3.rangeTest = NA, rain = rain, rain.rangeTest1 = NA,
+                         rain.rangeTest2 = NA)
+  #Use the failed test index to store the error code####
+  #Relative Humidity
+  final.df$rh.1.rangeTest[rh.1.fail.r1] <- 'R1'
+  final.df$rh.2.rangeTest[rh.2.fail.r1] <- 'R1'
+  final.df$rh.3.rangeTest[rh.3.fail.r1] <- 'R1'
+  #Temperature
+  final.df$temp.1.rangeTest[temp.1.fail.r1] <- 'R1'
+  final.df$temp.2.rangeTest[temp.2.fail.r1] <- 'R1'
+  final.df$temp.3.rangeTest[temp.3.fail.r1] <- 'R1'
+  #Rain
+  #Remove NAs that were introduced to the failed test index
+  rain.fail.r1 <- rain.fail.r1[!is.na(rain.fail.r1)]
+  final.df$rain.rangeTest1[rain.fail.r1] <- 'R1'
+  final.df$rain.rangeTest2[rain.fail.r2] <- 'R2'
+  #Replace NAs with a blank####
+  #Relative Humidity
+  final.df$rh.1.rangeTest[is.na(final.df$rh.1.rangeTest)] <- ' '
+  final.df$rh.2.rangeTest[is.na(final.df$rh.2.rangeTest)] <- ' '
+  final.df$rh.3.rangeTest[is.na(final.df$rh.3.rangeTest)] <- ' '
+  #Temperature
+  final.df$temp.1.rangeTest[is.na(final.df$temp.1.rangeTest)] <- ' '
+  final.df$temp.2.rangeTest[is.na(final.df$temp.2.rangeTest)] <- ' '
+  final.df$temp.3.rangeTest[is.na(final.df$temp.3.rangeTest)] <- ' '
+  #Rain
+  final.df$rain.rangeTest1[is.na(final.df$rain.rangeTest1)] <- ' '
+  final.df$rain.rangeTest2[is.na(final.df$rain.rangeTest2)] <- ' '
+  return(final.df)
+}
+
+#Performs persistence test for solar radiation, relative humidity, and temperatur based on Estevez et al. (2011)e####
+#This function creates an index to compare records to exactly 1 and 2 days prior to the
+#current record and then returns an index of records that fail the test
+#All three records will be flagged if they are the same value
+f.persistenceTest <- function(data) {
+  data$Observation <- ymd_hms(data$Observation)
+  #Vectorize and format the data####
+  #Solar Radiation
+  sol.1 <- as.numeric(data$Sensor1_AvgSolarRadiation)
+  sol.2 <- as.numeric(data$Sensor2_AvgSolarRadiation)
+  #Relative Humidity
+  rh.1 <- as.numeric(data$Sensor1_RelativeHumidity)
+  rh.2 <- as.numeric(data$Sensor2_RelativeHumidity)
+  rh.3 <- as.numeric(data$Sensor3_RelativeHumidity)
+  #Temperature
+  temp.1 <- as.numeric(data$Sensor1_AvgTemp)
+  temp.2 <- as.numeric(data$Sensor2_AvgTemp)
+  temp.3 <- as.numeric(data$Sensor3_AvgTemp)
+  
+  #Find the date (and time) of the first record in the data set
+  start.date <- which(data$Observation == min(data$Observation))
+  #Get the index for a 2 day delayed start to make the for loop work properly
+  delayed.start <- which(data$Observation == data$Observation[start.date] + days(2))
+  index.list <- list()
+  for(i in delayed.start:nrow(data)) {
+    index.list[[i]] <- c(which(data$Observation == data$Observation[i]), which(data$Observation == data$Observation[i] - days(1)),
+                         which(data$Observation == data$Observation[i] - days(2)))
+  }
+  #Create an empty list to store the index values for records that fail the test
+  sol.1.index.vector <- vector()
+  sol.2.index.vector <- vector()
+  rh.1.index.vector <- vector()
+  rh.2.index.vector <- vector()
+  rh.3.index.vector <- vector()
+  temp.1.index.vector <- vector()
+  temp.2.index.vector <- vector()
+  temp.3.index.vector <- vector()
+  for(i in 1:length(index.list)) {
+    sol.1.index.vector[i] <- ifelse(sol.1[index.list[[i]][1]] == sol.1[index.list[[i]][2]] &
+                                 sol.1[index.list[[i]][1]] == sol.1[index.list[[i]][3]], 'P1', ' ')
+    sol.2.index.vector[i] <- ifelse(sol.2[index.list[[i]][1]] == sol.2[index.list[[i]][2]] &
+                                      sol.2[index.list[[i]][1]] == sol.2[index.list[[i]][3]], 'P1', ' ')
+    rh.1.index.vector[i] <- ifelse(temp.1[index.list[[i]][1]] == temp.1[index.list[[i]][2]] &
+                                       temp.1[index.list[[i]][1]] == temp.1[index.list[[i]][3]], 'P1', ' ')
+    rh.2.index.vector[i] <- ifelse(temp.1[index.list[[i]][1]] == temp.1[index.list[[i]][2]] &
+                                     temp.1[index.list[[i]][1]] == temp.1[index.list[[i]][3]], 'P1', ' ')
+    rh.3.index.vector[i] <- ifelse(temp.1[index.list[[i]][1]] == temp.1[index.list[[i]][2]] &
+                                     temp.1[index.list[[i]][1]] == temp.1[index.list[[i]][3]], 'P1', ' ')
+    temp.1.index.vector[i] <- ifelse(temp.1[index.list[[i]][1]] == temp.1[index.list[[i]][2]] &
+                                       temp.1[index.list[[i]][1]] == temp.1[index.list[[i]][3]], 'P1', ' ')
+    temp.2.index.vector[i] <- ifelse(temp.2[index.list[[i]][1]] == temp.2[index.list[[i]][2]] &
+                                       temp.2[index.list[[i]][1]] == temp.2[index.list[[i]][3]], 'P1', ' ')
+    temp.3.index.vector[i] <- ifelse(temp.3[index.list[[i]][1]] == temp.3[index.list[[i]][2]] &
+                                       temp.3[index.list[[i]][1]] == temp.3[index.list[[i]][3]], 'P1', ' ')
+  }
+  #Convert the list index for failed records into a vector index and return
+  sol.1.fail.list <- unlist(index.list[which(sol.1.index.vector == 'P1')])
+  sol.2.fail.list <- unlist(index.list[which(sol.2.index.vector == 'P1')])
+  rh.1.fail.list <- unlist(index.list[which(rh.1.index.vector == 'P1')])
+  rh.2.fail.list <- unlist(index.list[which(rh.2.index.vector == 'P1')])
+  rh.3.fail.list <- unlist(index.list[which(rh.3.index.vector == 'P1')])
+  temp.1.fail.list <- unlist(index.list[which(temp.1.index.vector == 'P1')])
+  temp.2.fail.list <- unlist(index.list[which(temp.2.index.vector == 'P1')])
+  temp.3.fail.list <- unlist(index.list[which(temp.3.index.vector == 'P1')])
+  #Create a data frame with results of the test
+  final.df <- data.frame(ReferenceID = data$ReferenceID, RecordID = data$RecordID, Observation = data$Observation,
+                         TEAMSiteName = data$TEAMSiteName, SamplingUnitName = data$SamplingUnitName, sol.1 = sol.1,
+                         sol.1.perTest = NA, sol.2 = sol.2, sol.2.perTest = NA, rh.1 = rh.1, rh.1.perTest = NA,
+                         rh.2 = rh.2, rh.2.perTest = NA, rh.3 = rh.3, rh.3.perTest = NA, temp.1 = temp.1,
+                         temp.1.perTest = NA, temp.2 = temp.2, temp.2.perTest = NA, temp.3 = temp.3,
+                         temp.3.perTest = NA)
+  #Use the failed test index to store the error code
+  final.df$sol.1.perTest[sol.1.fail.list] <- 'P1'
+  final.df$sol.2.perTest[sol.2.fail.list] <- 'P1'
+  final.df$rh.1.perTest[rh.1.fail.list] <- 'P1'
+  final.df$rh.2.perTest[rh.2.fail.list] <- 'P1'
+  final.df$rh.3.perTest[rh.3.fail.list] <- 'P1'
+  final.df$temp.1.perTest[temp.1.fail.list] <- 'P1'
+  final.df$temp.2.perTest[temp.2.fail.list] <- 'P1'
+  final.df$temp.3.perTest[temp.3.fail.list] <- 'P1'
+  #Replace NAs with a blank
+  final.df$sol.1.perTest[is.na(final.df$sol.1.perTest)] <- ' '
+  final.df$sol.2.perTest[is.na(final.df$sol.2.perTest)] <- ' '
+  final.df$rh.1.perTest[is.na(final.df$rh.1.perTest)] <- ' '
+  final.df$rh.2.perTest[is.na(final.df$rh.2.perTest)] <- ' '
+  final.df$rh.3.perTest[is.na(final.df$rh.3.perTest)] <- ' '
+  final.df$temp.1.perTest[is.na(final.df$temp.1.perTest)] <- ' '
+  final.df$temp.2.perTest[is.na(final.df$temp.2.perTest)] <- ' '
+  final.df$temp.3.perTest[is.na(final.df$temp.3.perTest)] <- ' '
+  return(final.df)
+}
+
+#Helper function to tally error rate
+f.errorRate <- function(range, persistence) {
+  #Range error rate
+  noRows.range <- data.frame(noRows = c(nrow(range[!is.na(range$rh.1), ]), nrow(range[!is.na(range$rh.2), ]), nrow(range[!is.na(range$rh.3), ]),
+                             nrow(range[!is.na(range$temp.1), ]), nrow(range[!is.na(range$temp.2), ]), nrow(range[!is.na(range$temp.3), ]),
+                             nrow(range[!is.na(range$rain), ]), nrow(range[!is.na(range$rain), ])))
+  row.names(noRows.range) <- c('rh.1', 'rh.2', 'rh.3', 'temp.1', 'temp.2', 'temp.3', 'rain1', 'rain2')
+  noRows.range$failed <- c(length(which(range$rh.1.rangeTest == 'R1')), length(which(range$rh.2.rangeTest == 'R1')),
+                           length(which(range$rh.3.rangeTest == 'R1')), length(which(range$temp.1.rangeTest == 'R1')),
+                           length(which(range$temp.2.rangeTest == 'R1')), length(which(range$temp.3.rangeTest == 'R1')),
+                           length(which(range$rain.rangeTest1 == 'R1')), length(which(range$rain.rangeTest2 == 'R2')))
+  noRows.range$errorRate <- noRows.range$failed / noRows.range$noRows
+  #Persistence error rate
+  noRows.per <- data.frame(noRows = c(nrow(persistence[!is.na(persistence$sol.1), ]), nrow(persistence[!is.na(persistence$sol.2), ]), nrow(persistence[!is.na(persistence$rh.1), ]),
+                                      nrow(persistence[!is.na(persistence$rh.2), ]), nrow(persistence[!is.na(persistence$rh.3), ]), nrow(persistence[!is.na(persistence$temp.1), ]),
+                                      nrow(persistence[!is.na(persistence$temp.2), ]), nrow(persistence[!is.na(persistence$temp.3), ])))
+  row.names(noRows.per) <- c('sol.1', 'sol.2', 'rh.1', 'rh.2', 'rh.3', 'temp.1', 'temp.2', 'temp.3')
+  noRows.per$failed <- c(length(which(persistence$sol.1.perTest == 'P1')), length(which(persistence$sol.2.perTest == 'P1')), 
+                         length(which(persistence$rh.1.perTest == 'P1')), length(which(persistence$rh.2.perTest == 'P1')),
+                         length(which(persistence$rh.3.perTest == 'P1')), length(which(persistence$temp.1.perTest == 'P1')),
+                         length(which(persistence$temp.2.perTest == 'P1')), length(which(persistence$temp.3.perTest == 'P1')))
+  noRows.per$errorRate <- noRows.per$failed / noRows.per$noRows
+  final.list <- list(noRows.range, noRows.per)
+  return(final.list)
+}