Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@
^docs$
^CRAN-SUBMISSION$
^logo\.svg$
^doc$
^Meta$
3 changes: 2 additions & 1 deletion .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@ jobs:
runs-on: windows-latest
strategy:
matrix:
r-version: ['4.3.0']
r-version: ['4.3.3'] # bump version 4.3.0 to 4.3.3 resolve non-conformal array error

steps:
- uses: actions/checkout@v3
- name: Set up R ${{ matrix.r-version }}
uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.r-version }}
use-public-rspm: true
- name: Install dependencies
uses: r-lib/actions/setup-r-dependencies@v2
with:
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ inst/doc
test_pkg
rstan_code
revdep
/doc/
/Meta/
6 changes: 5 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@ Encoding: UTF-8
LazyData: true
Depends:
R (>= 3.4.0)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.3
Imports:
deSolve,
dplyr,
tidyr,
janitor,
ggplot2,
locfit,
purrr,
Expand All @@ -30,6 +31,8 @@ Imports:
methods,
mgcv,
mixdist,
scam,
mvtnorm,
patchwork,
assertthat,
Rcpp (>= 0.12.0),
Expand Down Expand Up @@ -60,6 +63,7 @@ Collate:
'semiparametric_models.R'
'mixture_model.R'
'hierarchical_bayesian_model.R'
'to_titer.R'
'serosv.R'
'stanmodels.R'
'plots.R'
Expand Down
16 changes: 14 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ S3method(plot,sir_basic_model)
S3method(plot,sir_static_model)
S3method(plot,sir_subpops_model)
S3method(plot,weibull_model)
export(add_thresholds)
export(age_time_model)
export(compare_models)
export(compute_ci)
export(compute_ci.age_time_model)
export(compute_ci.fp_model)
export(compute_ci.hierarchical_bayesian_model)
export(compute_ci.lp_model)
export(compute_ci.mixture_model)
export(compute_ci.penalized_spline_model)
Expand All @@ -35,33 +37,40 @@ export(mixture_model)
export(mseir_model)
export(pava)
export(penalized_spline_model)
export(plot_corrected_prev)
export(plot_gcv)
export(plot_standard_curve)
export(plot_titer_qc)
export(polynomial_model)
export(set_plot_style)
export(sir_basic_model)
export(sir_static_model)
export(sir_subpops_model)
export(standardize_data)
export(to_titer)
export(transform_data)
export(weibull_model)
import(Rcpp)
import(assertthat)
import(dplyr)
import(ggplot2)
import(graphics)
import(magrittr)
import(methods)
import(mgcv)
import(patchwork)
import(purrr)
import(scam)
import(tidyr)
importFrom(RcppParallel,RcppParallelLibs)
importFrom(assertthat,assert_that)
importFrom(boot,inv.logit)
importFrom(deSolve,ode)
importFrom(dplyr,group_by)
importFrom(dplyr,mutate)
importFrom(dplyr,n)
importFrom(dplyr,summarize)
importFrom(graphics,par)
importFrom(graphics,plot)
importFrom(janitor,clean_names)
importFrom(locfit,"crit<-")
importFrom(locfit,crit)
importFrom(locfit,gcv)
Expand All @@ -70,12 +79,14 @@ importFrom(locfit,locfit)
importFrom(locfit,lp)
importFrom(magrittr,"%>%")
importFrom(methods,is)
importFrom(mgcv,betar)
importFrom(mgcv,gam)
importFrom(mgcv,gamm)
importFrom(mgcv,predict.gam)
importFrom(mixdist,mix)
importFrom(mixdist,mixgroup)
importFrom(mixdist,mixparam)
importFrom(mvtnorm,rmvnorm)
importFrom(purrr,imap_dfr)
importFrom(rstan,sampling)
importFrom(rstan,summary)
Expand All @@ -89,6 +100,7 @@ importFrom(stats,gaussian)
importFrom(stats,glm)
importFrom(stats,predict)
importFrom(stats,predict.glm)
importFrom(stats,prop.test)
importFrom(stats,qnorm)
importFrom(stats,qt)
importFrom(stats4,mle)
Expand Down
59 changes: 46 additions & 13 deletions R/age_time_model.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,36 @@
# ------ Demo function --------
#' Age-time varying seroprevalence
#' Fit age-stratified seroprevalence across multiple time points. Also try to monotonize age (or cohort) - specific seroprevalence.
#'
#'
#' @description Fit age-stratified seroprevalence across multiple time points. Also try to monotonize age (or birth cohort) - specific seroprevalence.
#'
#' @param data - input data, must have`age`, `status`, time, group columns, where group column determines how data is aggregated
#' @param time_col - name of the column for time (default to `date`)
#' @param grouping_col - name of the column for time (default to `group`)
#' @param age_correct - a boolean, if `TRUE`, monotonize age-specific prevalence. Monotonize birth cohort-specific seroprevalence otherwise.
#' @param le - number of bins to generate age grid, used when monotonizing data
#' @param ci - confidence interval for smoothing
#' @import mgcv
#' @param monotonize_method - either "pava" or "scam"
#' @import scam assertthat
#' @importFrom mgcv gam predict.gam betar
#'
#' @return a list of class time_age_model with 3 items
#' @return a list of class time_age_model with 4 items
#' \item{out}{a data.frame with dimension n_group x 9, where columns `info`, `sp`, `foi` store output for non-monotonized
#' data and `monotonized_info`, `monotonized_sp`, `monotonized_foi`, `monotonized_ci_mod` store output for monotnized data}
#' data and `monotonized_info`, `monotonized_sp`, `monotonized_foi`, `monotonized_ci_mod` store output for monotonized data}
#' \item{grouping_col}{name of the column for grouping}
#' \item{age_correct}{a boolean indicating whether the data is monotonized across age or cohort}
#' \item{datatype}{whether the input data is aggregated or line-listing data}
#' @export
age_time_model <- function(data, time_col="date", grouping_col="group",
age_correct=F, le=512, ci = 0.95){
age_time_model <- function(data, time_col="date", grouping_col="group", age_correct=F, le=512, ci = 0.95, monotonize_method = "pava"){
# work around to resolve no visible binding note NOTE during check()
x <- label <- family <- fit <- se.fit <- ymin <- ymax <- y <- mean_time <- prevalence <- sim_data <- NULL
age <- ys <- shift_no <- cohort <- col_time <- monotonized_mod <- df <- info <- sp <- monotonized_info <- monotonized_sp <- NULL


assert_that(
(monotonize_method == "pava") | (monotonize_method == "scam"),
msg = 'Monotonize method must be either "pava" or "scam"'
)

# ---- helper functions -----
shift_right <- \(n,x){ if(n == 1) x else dplyr::lag(x, n, default = NA)}
# function to simulate data for monotonize process
Expand All @@ -46,8 +57,23 @@ age_time_model <- function(data, time_col="date", grouping_col="group",
}
# function to monotonize data using serosv pava function
monotonize_data <- \(dat, grp){
dat <- dat %>% arrange(mean_time)
if(monotonize_method == "scam"){
out <- tryCatch(
{
mod <- scam(prevalence ~ s(mean_time, bs = "mpi"), data=dat,family = betar)
dat$prevalence <- predict(mod, list(mean_time = dat$mean_time), type = "response")
dat
},
error = \(e) {
e
}
)

if (!("error" %in% class(out))) return(out)
}

dat %>%
arrange(mean_time) %>%
mutate(
prevalence = serosv::pava(prevalence)$pai2
)
Expand All @@ -56,7 +82,7 @@ age_time_model <- function(data, time_col="date", grouping_col="group",
model <- list()

# --- preprocess data ------
check_input <- serosv:::check_input(data)
check_input <- check_input(data)
age_range <- range(data$age)
age_grid <- seq(age_range[1], age_range[2], length.out = le)

Expand Down Expand Up @@ -137,7 +163,7 @@ age_time_model <- function(data, time_col="date", grouping_col="group",

# mapping to covert cohort to age
cohort_age_mapping <- scam_data %>%
select(col_time, age, cohort) %>%
select(!!sym(grouping_col), age, cohort) %>%
unique()

# map cohort from monotized data to age (at collection time)
Expand All @@ -157,13 +183,19 @@ age_time_model <- function(data, time_col="date", grouping_col="group",
out <- scam_out %>%
mutate(
monotonized_mod = map(data, \(dat){
gam(y ~ s(age), family = betar, data = dat)
# handle potential error when dataset is small
k <- if(length(unique(dat$age)) < 10) length(unique(dat$age)) - 1 else -1

gam(y ~ s(age, k=k), family = betar, data = dat)
}),
# also have model for smooth ci
monotonized_ci_mod = map(data, \(dat){
# handle potential error when dataset is small
k <- if(length(unique(dat$age)) < 10) length(unique(dat$age)) - 1 else -1

list(
"ymin" = gam(ymin ~ s(age), family = betar, data = dat),
"ymax" = gam(ymax ~ s(age), family = betar, data = dat)
"ymin" = gam(ymin ~ s(age, k=k), family = gaussian, data = dat),
"ymax" = gam(ymax ~ s(age, k=k), family = gaussian, data = dat)
)
})
) %>%
Expand Down Expand Up @@ -206,3 +238,4 @@ age_time_model <- function(data, time_col="date", grouping_col="group",

model
}

2 changes: 2 additions & 0 deletions R/compare_models.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ compare_models <- function(...){
stop("Inputs must be serosv models")
}

# TODO: apply cross validation to get matrix instead

data.frame(
model = .y,
type = class(.x),
Expand Down
56 changes: 56 additions & 0 deletions R/compute_ci.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ compute_ci.lp_model <- function(x,ci = 0.95, ...){
out.DF
}


#' Compute confidence interval for penalized_spline_model
#'
#' @param x - serosv models
Expand Down Expand Up @@ -178,6 +179,58 @@ compute_ci.penalized_spline_model <- function(x,ci = 0.95, ...){
return(list(out.DF, out.FOI))
}

#' Compute 95\% credible interval for hierarchical Bayesian model
#'
#' @param x - serosv models
#' @param ... - arbitrary arguments
#' @importFrom mgcv predict.gam
#' @import dplyr
#'
#' @return list of confidence interval for seroprevalence and foi. Each confidence interval dataframe with 4 variables, x and y for the fitted values and ymin and ymax for the confidence interval
#' @export
compute_ci.hierarchical_bayesian_model <- function(x, ...){
out_x <- x$df$age
out.DF <- NULL

if (x$type == "far3"){
alpha1 <- x$info["alpha1",c("2.5%","50%", "97.5%")]
alpha2 <- x$info["alpha2",c("2.5%","50%", "97.5%")]
alpha3 <- x$info["alpha3",c("2.5%","50%", "97.5%")]

out.DF <- data.frame(
x = out_x,
ymin = x$sp_func(out_x, alpha1[1], alpha2[1], alpha3[1]),
y = x$sp_func(out_x, alpha1[2], alpha2[2], alpha3[2]),
ymax = x$sp_func(out_x, alpha1[3], alpha2[3], alpha3[3])
)
}else if(x$type == "far2"){
alpha1 <- x$info["alpha1",c("2.5%","50%", "97.5%")]
alpha2 <- x$info["alpha2",c("2.5%","50%", "97.5%")]

out.DF <- data.frame(
x = out_x,
ymin = x$sp_func(out_x, alpha1[1], alpha2[1]),
y = x$sp_func(out_x, alpha1[2], alpha2[2]),
ymax = x$sp_func(out_x, alpha1[3], alpha2[3])
)

}else if(x$type == "log_logistic"){
alpha1 <- x$info["alpha1",c("2.5%","50%", "97.5%")]
alpha2 <- x$info["alpha2",c("2.5%","50%", "97.5%")]

out.DF <- data.frame(
x = out_x,
ymin = x$sp_func(out_x, alpha1[1], alpha2[1]),
y = x$sp_func(out_x, alpha1[2], alpha2[2]),
ymax = x$sp_func(out_x, alpha1[3], alpha2[3])
)
}else{
warning('Expect model type to be one of the following: "far3", "far2", "log_logistic"')
}

out.DF
}

#' Compute confidence interval for mixture model
#'
#' @param x - serosv mixture_model object
Expand Down Expand Up @@ -216,6 +269,9 @@ compute_ci.mixture_model <- function(x,ci = 0.95, ...){
#' @return confidence interval dataframe with n_group x 3 cols, the columns are `group`, `sp_df`, `foi_df`
#' @export
compute_ci.age_time_model <- function(x, ci=0.95, le = 100, ...){
# resolve no visible binding note
df <- monotonized_info <- monotonized_ci_mod <- age <- info <- fit <- se.fit <- sp_df <- foi_df <- NULL

# check which type of model user wants to visualize
modtype <- if (is.null(list(...)[["modtype"]])) "monotonized" else list(...)$modtype
assert_that(
Expand Down
Loading
Loading