From 9860396f94ce14fefe99d717265863092fe95640 Mon Sep 17 00:00:00 2001 From: Preston Burns Date: Wed, 26 Feb 2025 02:09:17 +0000 Subject: [PATCH 1/7] update baseline values setting to be consistently used --- R/create_nepexplorer_app.R | 2 +- R/creatinine_data_fcn.R | 3 +- R/validation_checks_util.R | 102 +++++++++++++++++++++++++++++++++ data-raw/meta_nepExplorer.csv | 56 +++++++++--------- data-raw/meta_nepExplorer2.csv | 4 +- data/meta_nepExplorer.rda | Bin 1399 -> 1383 bytes docs/articles/get_started.html | 61 ++++++++------------ docs/articles/index.html | 48 ++++++---------- inst/examples/standalone_app.R | 15 ++--- vignettes/get_started.Rmd | 4 +- 10 files changed, 182 insertions(+), 113 deletions(-) create mode 100644 R/validation_checks_util.R diff --git a/R/create_nepexplorer_app.R b/R/create_nepexplorer_app.R index aad3908..3429407 100644 --- a/R/create_nepexplorer_app.R +++ b/R/create_nepexplorer_app.R @@ -41,7 +41,7 @@ if (is.null(mapping)) { "visit_col" = "VISIT", "visitn_col" = "VISITN", "baseline_flag" = "BLFL", - "baseline_values" = "Y", + "baseline_values" = list("Y" = "Y"), "normal_col_high" = "STNRHI", "id_col" = "USUBJID", "age_col" = "AGE", diff --git a/R/creatinine_data_fcn.R b/R/creatinine_data_fcn.R index 2d0b78a..2c96149 100644 --- a/R/creatinine_data_fcn.R +++ b/R/creatinine_data_fcn.R @@ -15,10 +15,9 @@ creatinine_data_fcn <- function(df, settings) { .data[[settings$measure_col]], .data[[settings$value_col]], .data[[settings$baseline_flag]]) - #get baseline creatinine levels for each subject for hover text baseline_creat <- creatinine_data %>% - filter(.data[[settings$baseline_flag]] == settings$baseline_values) %>% + filter(.data[[settings$baseline_flag]] == settings$baseline_values$Y) %>% select(.data[[settings$id_col]], BASELINE = .data[[settings$value_col]]) diff --git a/R/validation_checks_util.R b/R/validation_checks_util.R new file mode 100644 index 0000000..c013c60 --- /dev/null +++ b/R/validation_checks_util.R @@ -0,0 +1,102 @@ +#' Expected Settings List +#' +#' This list defines the expected settings for different data domains. +#' Each domain contains specific columns and values that are required for data processing. +#' +#' @format A list with the following structure: +#' \describe{ +#' \item{labs}{A list containing the expected settings for laboratory data: +#' \describe{ +#' \item{id_col}{Column name for the subject identifier (e.g., "USUBJID").} +#' \item{measure_col}{Column name for the measure parameter (e.g., "PARAM").} +#' \item{measure_values}{A list of measure values and their corresponding descriptions +#' (e.g., list("CREAT" = "Creatinine")).} +#' \item{value_col}{Column name for the measurement value (e.g., "STRESN").} +#' \item{unit_col}{Column name for the measurement unit (e.g., "STRESU").} +#' \item{studyday_col}{Column name for the study day (e.g., "DY").} +#' \item{visit_col}{Column name for the visit description (e.g., "VISIT").} +#' \item{visitn_col}{Column name for the visit number (e.g., "VISITN").} +#' \item{baseline_flag}{Column name for the baseline flag (e.g., "BLFL").} +#' \item{baseline_values}{Value indicating baseline (e.g., "Y").} +#' \item{normal_col_high}{Column name for the upper limit of normal range +#' (e.g., "STNRHI").} +#' } +#' } +#' \item{dm}{A list containing the expected settings for demographic data: +#' \describe{ +#' \item{id_col}{Column name for the subject identifier (e.g., "USUBJID").} +#' } +#' } +#' \item{vitals}{A list containing the expected settings for vital signs data: +#' \describe{ +#' \item{id_col}{Column name for the subject identifier (e.g., "USUBJID").} +#' \item{measure_col}{Column name for the measure parameter (e.g., "PARAM").} +#' } +#' } +#' } +expected_settings <- list(labs = list("id_col" = "USUBJID", "measure_col" = "PARAM", + "measure_values" = list("CREAT" = "Creatinine"), + "value_col" = "STRESN", + "unit_col" = "STRESU", + "studyday_col" = "DY", + "visit_col" = "VISIT", + "visitn_col" = "VISITN", + "baseline_flag" = "BLFL", + "baseline_values" = "Y", + "normal_col_high" = "STNRHI"), + dm = list("id_col" = "USUBJID"), + vitals = list("id_col" = "USUBJID", "measure_col" = "PARAM")) + +#' Check Required Settings in a List +#' +#' This function checks if a given list contains all the required elements specified in another list. +#' It allows for additional elements at any nesting level and prints out a message listing the missing settings, if any. +#' +#' @param required_list A list specifying the required elements. +#' @param actual_list A list to be checked against the required elements. +#' @param parent_key (Optional) A string used internally to track the full key path during recursion. +#' Default is an empty string. +#' @return A list of missing elements, if any. +check_required_settings <- function(required_list, actual_list, parent_key = "") { + missing_elements <- list() + + for (name in names(required_list)) { + full_key <- if (parent_key == "") name else paste(parent_key, name, sep = "$") + + if (!name %in% names(actual_list)) { + missing_elements[[full_key]] <- required_list[[name]] + } else if (is.list(required_list[[name]])) { + if (!is.list(actual_list[[name]])) { + missing_elements[[full_key]] <- required_list[[name]] + } else { + nested_missing <- check_required_settings(required_list[[name]], actual_list[[name]], full_key) + missing_elements <- c(missing_elements, nested_missing) + } + } + } + + return(missing_elements) +} + +#' Check for Missing Columns in Data +#' +#' This function checks for any columns specified in the settings object that are not present in the data. +#' It returns a message listing the missing columns, if any. +#' +#' @param data_domain A string specifying the data domain to check (e.g., "labs", "vitals", "dm"). +#' @param settings_list A list containing the settings for each data domain. +#' @param data_list A list containing the data for each domain. +#' @return A string message listing the missing columns, or NULL if all expected columns are present. +check_expected_cols <- function(data_domain, settings_list, data_list) { + + expected_cols <- settings_list[[data_domain]][!grepl("_values$", names(settings_list[[data_domain]]))] %>% unlist() + + missing_cols <- setdiff(expected_cols, colnames(data_list[[data_domain]])) + + if (length(missing_cols) > 0) { + return(paste("The following columns were specified in", data_domain, "settings but not found in the", + data_domain, "dataset: ", paste(missing_cols, collapse = ", "))) + } else { + return(NULL) + } +} diff --git a/data-raw/meta_nepExplorer.csv b/data-raw/meta_nepExplorer.csv index 08a6770..4dd7a52 100644 --- a/data-raw/meta_nepExplorer.csv +++ b/data-raw/meta_nepExplorer.csv @@ -1,28 +1,28 @@ -text_key,domain,col_key,field_key,type,label,description,multiple,standard_adam,standard_sdtm -id_col,vitals,id_col,,column,ID column,Unique subject identifier variable name.,FALSE,USUBJID,USUBJID -value_col,vitals,value_col,,column,Value column,Vitals result variable name.,FALSE,AVAL,VSSTRESN -measure_col,vitals,measure_col,,column,Measure column,Vitals measure variable name,FALSE,PARAM,VSTEST -studyday_col,vitals,studyday_col,,column,Study Day column,Visit day variable name,FALSE,ADY,VSDY -visit_col,vitals,visit_col,,column,Visit column,Visit variable name,FALSE,AVISIT,VISIT -visitn_col,vitals,visitn_col,,column,Visit Number column,Visit number variable name,FALSE,AVISITN,VISITNUM -measure_values--SYSBP,vitals,measure_col,SYSBP,field,Systolic value,Value used for Systolic Blood Pressure in the specified measure column,FALSE,Systolic Blood Pressure (mmHg),Systolic Blood Pressure -measure_values--DIABP,vitals,measure_col,DIABP,field,Diastolic value,Value used for Diastolic Blood Pressure in the specified measure column,FALSE,Diastolic Blood Pressure (mmHg),Diastolic Blood Pressure -unit_col,vitals,unit_col,,column,Unit column,Unit of measure variable name,FALSE,,VSSTRESU -vs_baseline_flag,vitals,vs_baseline_flag,,column,Vital Signs Baseline Flag,Column for assigning VS baseline visits,FALSE,ABLFL, -vs_baseline_values--Y,vitals,vs_baseline_flag,Y,field,Vital Signs Baseline Flag value,Value used for VS Baseline in the specified baseline column,FALSE,Y, -measure_values--ALB,labs,measure_col,ALB,field,Albumin value,Value used for Albumin in the specified measure column,FALSE,Albumin (mg/dL),Albumin -measure_values--ALB/CREAT,labs,measure_col,ALB/CREAT,field,Albumin/Creatinine Ratio value,Value used for Albumin/Creatinine ratio in the specified measure column,FALSE,Albumin/Creatinine, -measure_values--BUN/CREAT,labs,measure_col,BUN/CREAT,field,Blood Urea Nitrogen/Creatinine Ratio value,Value used for Blood Urea Nitrogen/Creatinine ratio in the specified measure column,FALSE,Blood Urea Nitrogen/Creatinine, -measure_values--nepFC_BICARB,labs,measure_col,BICARB,field,Bicarbonate value,Value used for Bicarbonate in the specified measure column,FALSE,Bicarbonate (umol/L),Bicarbonate -measure_values--nepFC_BUN,labs,measure_col,BUN,field,Blood Urea Nitrogen value,Value used for Blood Urea Nitrogen in the specified measure column,FALSE,Blood Urea Nitrogen (mmol/L),Blood Urea Nitrogen -measure_values--nepFC_CA,labs,measure_col,CA,field,Calcium value,Value used for Calcium in the specified measure column,FALSE,Calcium (mmol/L),Calcium -measure_values--nepFC_CL,labs,measure_col,CL,field,Chloride value,Value used for Chloride in the specified measure column,FALSE,Chloride (mmol/L),Chloride -measure_values--CREAT,labs,measure_col,CREAT,field,Creatinine value,Value used for Creatinine in the specified measure column,FALSE,Creatinine (mg/dL),Creatinine -measure_values--CYSTC,labs,measure_col,CYSTC,field,Cystatin C value,Value used for Cystatin C in the specified measure column,FALSE,Cystatin C (umol/L),Cystatin C -measure_values--eGFR,labs,measure_col,eGFR,field,eGFR value,Value used for the Estimated Glomerular Filtration Rate in the specified measure column,FALSE,eGFR (mL/min),eGFR -measure_values--eGFRcys,labs,measure_col,eGFRcys,field,eGFRcys value,Value used for the Cystatic C-based Estimated Glomerular Filtration Rate in the specified measure column,FALSE,eGFRcys (mL/min),eGFRcys -measure_values--nepFC_PHOS,labs,measure_col,PHOS,field,Phosphate value,Value used for Phosphate in the specified measure column,FALSE,Phosphate (mmol/L),Phosphate -measure_values--nepFC_K,labs,measure_col,K,field,Potassium value,Value used for Potassium in the specified measure column,FALSE,Potassium (mmol/L),Potassium -measure_values--nepFC_SODIUM,labs,measure_col,SODIUM,field,Sodium value,Value used for Sodium in the specified measure column,FALSE,Sodium (mmol/L),Sodium -baseline_flag,labs,baseline_flag,,column,Baseline Flag column,Column for assigning baseline visits,FALSE,ABLFL,LBBLFL -baseline_values--Y,labs,baseline_flag,Y,field,Baseline Flag value,Value used for Baseline in the specified baseline column,FALSE,Y,Y +text_key,domain,col_key,field_key,type,label,description,multiple,standard_adam,standard_sdtm +id_col,vitals,id_col,,column,ID column,Unique subject identifier variable name.,FALSE,USUBJID,USUBJID +value_col,vitals,value_col,,column,Value column,Vitals result variable name.,FALSE,AVAL,VSSTRESN +measure_col,vitals,measure_col,,column,Measure column,Vitals measure variable name,FALSE,PARAM,VSTEST +studyday_col,vitals,studyday_col,,column,Study Day column,Visit day variable name,FALSE,ADY,VSDY +visit_col,vitals,visit_col,,column,Visit column,Visit variable name,FALSE,AVISIT,VISIT +visitn_col,vitals,visitn_col,,column,Visit Number column,Visit number variable name,FALSE,AVISITN,VISITNUM +measure_values--SYSBP,vitals,measure_col,SYSBP,field,Systolic value,Value used for Systolic Blood Pressure in the specified measure column,FALSE,Systolic Blood Pressure (mmHg),Systolic Blood Pressure +measure_values--DIABP,vitals,measure_col,DIABP,field,Diastolic value,Value used for Diastolic Blood Pressure in the specified measure column,FALSE,Diastolic Blood Pressure (mmHg),Diastolic Blood Pressure +unit_col,vitals,unit_col,,column,Unit column,Unit of measure variable name,FALSE,,VSSTRESU +baseline_flag,vitals,vs_baseline_flag,,column,Vital Signs Baseline Flag,Column for assigning VS baseline visits,FALSE,ABLFL, +baseline_values--Y,vitals,vs_baseline_flag,Y,field,Vital Signs Baseline Flag value,Value used for VS Baseline in the specified baseline column,FALSE,Y, +measure_values--ALB,labs,measure_col,ALB,field,Albumin value,Value used for Albumin in the specified measure column,FALSE,Albumin (mg/dL),Albumin +measure_values--ALB/CREAT,labs,measure_col,ALB/CREAT,field,Albumin/Creatinine Ratio value,Value used for Albumin/Creatinine ratio in the specified measure column,FALSE,Albumin/Creatinine, +measure_values--BUN/CREAT,labs,measure_col,BUN/CREAT,field,Blood Urea Nitrogen/Creatinine Ratio value,Value used for Blood Urea Nitrogen/Creatinine ratio in the specified measure column,FALSE,Blood Urea Nitrogen/Creatinine, +measure_values--nepFC_BICARB,labs,measure_col,BICARB,field,Bicarbonate value,Value used for Bicarbonate in the specified measure column,FALSE,Bicarbonate (umol/L),Bicarbonate +measure_values--nepFC_BUN,labs,measure_col,BUN,field,Blood Urea Nitrogen value,Value used for Blood Urea Nitrogen in the specified measure column,FALSE,Blood Urea Nitrogen (mmol/L),Blood Urea Nitrogen +measure_values--nepFC_CA,labs,measure_col,CA,field,Calcium value,Value used for Calcium in the specified measure column,FALSE,Calcium (mmol/L),Calcium +measure_values--nepFC_CL,labs,measure_col,CL,field,Chloride value,Value used for Chloride in the specified measure column,FALSE,Chloride (mmol/L),Chloride +measure_values--CREAT,labs,measure_col,CREAT,field,Creatinine value,Value used for Creatinine in the specified measure column,FALSE,Creatinine (mg/dL),Creatinine +measure_values--CYSTC,labs,measure_col,CYSTC,field,Cystatin C value,Value used for Cystatin C in the specified measure column,FALSE,Cystatin C (umol/L),Cystatin C +measure_values--eGFR,labs,measure_col,eGFR,field,eGFR value,Value used for the Estimated Glomerular Filtration Rate in the specified measure column,FALSE,eGFR (mL/min),eGFR +measure_values--eGFRcys,labs,measure_col,eGFRcys,field,eGFRcys value,Value used for the Cystatic C-based Estimated Glomerular Filtration Rate in the specified measure column,FALSE,eGFRcys (mL/min),eGFRcys +measure_values--nepFC_PHOS,labs,measure_col,PHOS,field,Phosphate value,Value used for Phosphate in the specified measure column,FALSE,Phosphate (mmol/L),Phosphate +measure_values--nepFC_K,labs,measure_col,K,field,Potassium value,Value used for Potassium in the specified measure column,FALSE,Potassium (mmol/L),Potassium +measure_values--nepFC_SODIUM,labs,measure_col,SODIUM,field,Sodium value,Value used for Sodium in the specified measure column,FALSE,Sodium (mmol/L),Sodium +baseline_flag,labs,baseline_flag,,column,Baseline Flag column,Column for assigning baseline visits,FALSE,ABLFL,LBBLFL +baseline_values--Y,labs,baseline_flag,Y,field,Baseline Flag value,Value used for Baseline in the specified baseline column,FALSE,Y,Y diff --git a/data-raw/meta_nepExplorer2.csv b/data-raw/meta_nepExplorer2.csv index 2485c67..327bf51 100644 --- a/data-raw/meta_nepExplorer2.csv +++ b/data-raw/meta_nepExplorer2.csv @@ -8,8 +8,8 @@ visitn_col,vitals,visitn_col,,column,Visit Number column,Visit number variable n measure_values--SYSBP,vitals,measure_col,SYSBP,field,Systolic value,Value used for Systolic Blood Pressure in the specified measure column,FALSE,Systolic Blood Pressure (mmHg),Systolic Blood Pressure measure_values--DIABP,vitals,measure_col,DIABP,field,Diastolic value,Value used for Diastolic Blood Pressure in the specified measure column,FALSE,Diastolic Blood Pressure (mmHg),Diastolic Blood Pressure unit_col,vitals,unit_col,,column,Unit column,Unit of measure variable name for Blood Pressure,FALSE,STRESU,VSSTRESU -vs_baseline_flag_col,vitals,vs_baseline_flag,,column,VS Baseline Flag,Column for assigning baseline visits for Blood Pressure,FALSE,ABLFL, -vs_baseline_values--POS,vitals,vs_baseline_flag_col,POS,field,VS Baseline Flag value,Value used for Baseline in the specified baseline column for Blood Pressure,FALSE,Y,Y +baseline_flag_col,vitals,vs_baseline_flag,,column,VS Baseline Flag,Column for assigning baseline visits for Blood Pressure,FALSE,ABLFL, +baseline_values--Y,vitals,vs_baseline_flag_col,Y,field,VS Baseline Flag value,Value used for Baseline in the specified baseline column for Blood Pressure,FALSE,Y,Y lab_unit_col,labs,lab_unit_col,,column,Unit column,Unit of measure variable name,FALSE,STRESU,STRESU measure_values--ALB,labs,measure_col,ALB,field,Albumin value,Value used for Albumin in the specified measure column,FALSE,Albumin (g/L),Albumin measure_values--BICARB,labs,measure_col,BICARB,field,Bicarbonate value,Value used for Bicarbonate in the specified measure column,FALSE,Bicarbonate (umol/L),Bicarbonate diff --git a/data/meta_nepExplorer.rda b/data/meta_nepExplorer.rda index 48f8e16517e732fbb68971a39d80a0f04efc9047..a7c010acbd825ebfcf190ddbea36b214d5d9b74f 100644 GIT binary patch delta 1370 zcmV-g1*Q7;3g-$ALRx4!F+o`-Q(0*y)bf!IAb)1uT+32HNjdUk!o~D^J z27m+94F;Mt4KfWhWB>+8nKTg7Ak=680004?000000Fe?APgB}N-l3$^O`xg7$5*tl1&3iJq0`|n3^;vq|*_IdQ66yGyu_{ z(SM1fO_b#v>_!Qqnsw}EqD04*cq{4icrN0p&BdQEJv4oN&VD?^b^mshI}US)n_`oYtI|J9oGfmh*i9gjW95S->tc}-mf#aLRb z*qaG7bd1$1rLka#+YL1gt)5PQ3b0MUBoas6L?oL9!TYN#S!Yv_EiUL=~e891D3s(&OdhOdR%=HjY`)%^jQjgnG~*AXYyjX#;^E7`MI z(vp!Bj?kvFk0hyAjI4?gOBSkr5G;Sc=42Rx=&tpMW=Rbx#v(Lz7W- zm=qe)Yedq8j^+s`9;a#*3A>YGGNEbwM4}&CQ$qV{Z8{ip*(1BFpwp)?F7w-)p^C9( zw5eoLE0}W-;!5f?N+n%E4GE5gQxfb-O2ieU6t0n31%~0HqjrHk%70!Gz)_hhN>F;U zkt~fUdo{U@rI4WDsS6?IlV1Uw8y<5)81Ito-g%2}A{+>Q7Urh$ALl+zDxKD&Jef|u z$)t|4$`Cu8u**YyADKi=5H}I1Iavm1!OZw(WncW4U zCRm5Jid|6WJZtf>FB`~qP~+o!59`0yr?j5UFqjRjMWNLifEo~Gwuj^4NQ`lQP$-rX zNivW&n{B&`lIUcxFqMK@n`uBUghq-@A`)hiW;J%#EqWcb zbF^zANM~$Fqe>b~Qe;n&o;-|8RVutJnDUpwN+Fonsm!Bq7CWSv+6s?H9x`y?MYb`l zhrdl5_%9bN4nqr{4Q@;1IcXul#nH-6Jx%E{L^ztvvLx0a&u^h=EbuJZk=AZ+^G&_N cn_IEN9MjRs-ccg8Qh(y^NT&)C4J6v0K;~<6*8l(j delta 1386 zcmV-w1(o{e3ik>QLRx4!F+o`-Q(0$8p~#UAAb)CgY|I@4qJcpwX(YgEZBNxOjXeTn z(?AA>jSU7yfMgnaji}H7)W{JhlhRKV+D}jb&;SgBKmZzK0003ZD34NMH6GPANW}F$ zM$`a3MvVY80002c0B~s0plCD(Oh9C4WB>-5WB>p#Kme&E5NR5oNr-5kr>c4;^(U#~ zJ%3G1^qLP)0UxS)O*F*uPf@1H>Ks>@CaP=OV?`1sIp;-vKc7Y1wL2H-&<{O9Ps94# zU7kav<)}L=X-5Sddo51%Jln;Cn|+v>@lyI|U;?_ANW&VNI8S7n(mZgwGdrZ*U>@kfUjqITP1wStJX(!4l= z*7B7;L#)zjY=m;s@WBbzgxp<1Fg&$AT3J~LKdA0<_kC?sZj^=^Y#b=S^UDHTeZkv+}?(yAxCF}q6 zlpOSrlJ-SJ^r7L9qBMRQp?`xr@i1d2>&jA;j62N6>8>JY3WV(D-i+v4eo-ig*xb~@+nf$exWiC2 zqJRfFFeK-rEk`gSks%6csMwPvnUSP1VsNBPa4k_OSFpO*R%tF`hJP{_<9)GNna<+k z>h;%jU!5jOG$lz&4@QzDk*yEgX6u1uC_Q?RvLBqAavCvXq|}BuNp_DczlSm&x*yuz zgzXXkc;wcp;chg^obK4Kg0K zQe-Ye=XPRY-7`eeETqc zsD{d@ud{53$rmL8iD4v@DFbP?+pw`Nc3WY5v?S#CB(XdWnSZ*oR#+M$cZ<8ScG6Cu zuAr$0Gu!X9&t$YtQYt%VF_y97?xbk-QvI?z%97NkTU!_={E0Xu+Kl_Dn;9-Qv&rw} zH*0&TQcdX=Vy}5}*dgx~#j9czhL-e-HKDA&Ov&)H$BlZ`O0S)p9Oe*Gh-fy{-J^#5 zZjxfuRC&4aCsS`CExn4!dyKuYbgT&>23a - Getting Started with nepExplorer • nepExplorer - - - - - + Skip to contents - -