diff --git a/R/ds.mdPattern.R b/R/ds.mdPattern.R index e553b3f1..af59498e 100644 --- a/R/ds.mdPattern.R +++ b/R/ds.mdPattern.R @@ -47,14 +47,14 @@ #' after login. If the \code{datasources} argument is not specified, the default set of #' connections will be used: see \code{\link[DSI]{datashield.connections_default}}. #' @return For type='split': A list with one element per study, each containing: -#' \itemize{ +#' \describe{ #' \item{pattern}{The missing data pattern matrix for that study} #' \item{valid}{Logical indicating if all patterns meet disclosure requirements} #' \item{message}{A message describing the validity status} #' } #' #' For type='combine': A list containing: -#' \itemize{ +#' \describe{ #' \item{pattern}{The pooled missing data pattern matrix across all studies} #' \item{valid}{Logical indicating if all pooled patterns meet disclosure requirements} #' \item{message}{A message describing the validity status} diff --git a/docker-compose_armadillo.yml b/docker-compose_armadillo.yml index 26bd8b85..37c44cda 100644 --- a/docker-compose_armadillo.yml +++ b/docker-compose_armadillo.yml @@ -3,7 +3,7 @@ services: hostname: armadillo ports: - 8080:8080 - image: datashield/armadillo_citest:5.9.4 + image: datashield/armadillo_citest:5.11.0 environment: LOGGING_CONFIG: 'classpath:logback-file.xml' AUDIT_LOG_PATH: '/app/logs/audit.log' @@ -16,6 +16,7 @@ services: default: hostname: default - image: datashield/rock-omicron-karma-permissive:devel + image: datashield/rock-quebrada-lamda:latest +# image: datashield/rserver-panda-lamda:devel environment: DEBUG: "FALSE" diff --git a/docker-compose_opal.yml b/docker-compose_opal.yml index 1a048f51..a62dec67 100644 --- a/docker-compose_opal.yml +++ b/docker-compose_opal.yml @@ -20,6 +20,6 @@ services: - MONGO_INITDB_ROOT_USERNAME=root - MONGO_INITDB_ROOT_PASSWORD=foobar rock: - image: datashield/rock-lemon-donkey-permissive:draft + image: datashield/rock-quebrada-lamda-permissive:latest environment: DEBUG: "FALSE" diff --git a/docs/404.html b/docs/404.html index 761ee0b9..76de734e 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ @@ -73,12 +73,12 @@
Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, Avraam D, Avraam D, Marcon Y, Bishop T, Gaye A, Escribà-Montagut X, Wheater S (2025). +
Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, Avraam D, Marcon Y, Bishop T, Gaye A, Escribà-Montagut X, Wheater S (????). dsBaseClient: 'DataSHIELD' Client Side Base Functions. -R package version 6.3.4. +R package version 6.3.5-9000.
@Manual{,
title = {dsBaseClient: 'DataSHIELD' Client Side Base Functions},
- author = {Paul Burton and Rebecca Wilson and Olly Butters and Patricia Ryser-Welch and Alex Westerberg and Leire Abarrategui and Roberto Villegas-Diaz and Demetris Avraam and Demetris Avraam and Yannick Marcon and Tom Bishop and Amadou Gaye and Xavier Escribà-Montagut and Stuart Wheater},
- note = {R package version 6.3.4},
+ author = {Paul Burton and Rebecca Wilson and Olly Butters and Patricia Ryser-Welch and Alex Westerberg and Leire Abarrategui and Roberto Villegas-Diaz and Demetris Avraam and Yannick Marcon and Tom Bishop and Amadou Gaye and Xavier Escribà-Montagut and Stuart Wheater},
+ note = {R package version 6.3.5-9000},
}
Gaye A, Marcon Y, Isaeva J, LaFlamme P, Turner A, Jones E, Minion J, Boyd A, Newby C, Nuotio M, Wilson R, Butters O, Murtagh B, Demir I, Doiron D, Giepmans L, Wallace S, Budin-Ljøsne I, Schmidt C, Boffetta P, Boniol M, Bota M, Carter K, deKlerk N, Dibben C, Francis R, Hiekkalinna T, Hveem K, Kvaløy K, Millar S, Perry I, Peters A, Phillips C, Popham F, Raab G, Reischl E, Sheehan N, Waldenberger M, Perola M, van den Heuvel E, Macleod J, Knoppers B, Stolk R, Fortier I, Harris J, Woffenbuttel B, Murtagh M, Ferretti V, Burton P (2014). “DataSHIELD: taking the analysis to the data, not the data to the analysis.” @@ -168,11 +164,11 @@
For a full list of development branches, checkout https://github.com/datashield/dsBaseClient/branches
[1] Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, Avraam D, Marcon Y, Bishop T, Gaye A, Escribà Montagut X, Wheater S (2025). dsBaseClient: ‘DataSHIELD’ Client Side Base Functions. R package version 6.3.4.
+[1] Burton P, Wilson R, Butters O, Ryser-Welch P, Westerberg A, Abarrategui L, Villegas-Diaz R, Avraam D, Marcon Y, Bishop T, Gaye A, Escribà Montagut X, Wheater S (2025). dsBaseClient: ‘DataSHIELD’ Client Side Base Functions. R package version 6.3.5.
[2] Gaye A, Marcon Y, Isaeva J, LaFlamme P, Turner A, Jones E, Minion J, Boyd A, Newby C, Nuotio M, Wilson R, Butters O, Murtagh B, Demir I, Doiron D, Giepmans L, Wallace S, Budin-Ljøsne I, Oliver Schmidt C, Boffetta P, Boniol M, Bota M, Carter K, deKlerk N, Dibben C, Francis R, Hiekkalinna T, Hveem K, Kvaløy K, Millar S, Perry I, Peters A, Phillips C, Popham F, Raab G, Reischl E, Sheehan N, Waldenberger M, Perola M, van den Heuvel E, Macleod J, Knoppers B, Stolk R, Fortier I, Harris J, Woffenbuttel B, Murtagh M, Ferretti V, Burton P (2014). “DataSHIELD: taking the analysis to the data, not the data to the analysis.” International Journal of Epidemiology, 43(6), 1929-1944. https://doi.org/10.1093/ije/dyu188.
[3] Wilson R, W. Butters O, Avraam D, Baker J, Tedds J, Turner A, Murtagh M, R. Burton P (2017). “DataSHIELD – New Directions and Dimensions.” Data Science Journal, 16(21), 1-21. https://doi.org/10.5334/dsj-2017-021.
[4] Avraam D, Wilson R, Aguirre Chan N, Banerjee S, Bishop T, Butters O, Cadman T, Cederkvist L, Duijts L, Escribà Montagut X, Garner H, Gonçalves G, González J, Haakma S, Hartlev M, Hasenauer J, Huth M, Hyde E, Jaddoe V, Marcon Y, Mayrhofer M, Molnar-Gabor F, Morgan A, Murtagh M, Nestor M, Nybo Andersen A, Parker S, Pinot de Moira A, Schwarz F, Strandberg-Larsen K, Swertz M, Welten M, Wheater S, Burton P (2024). “DataSHIELD: mitigating disclosure risk in a multi-site federated analysis platform.” Bioinformatics Advances, 5(1), 1-21. https://doi.org/10.1093/bioadv/vbaf046.
@@ -154,7 +154,6 @@dot-pool_md_patterns.RdInternal function to pool md.pattern results from multiple studies
+.pool_md_patterns(patterns_list, study_names)Pooled pattern matrix
+fixed.dummy.vars = TRUE,
baseline.level = 1 and forced.factor.levels = c(1,2,3,4,5).
The input vector is converted to the following matrix of dummy variables:
-| DV2 | DV3 | DV4 | DV5 | 0 |
| 0 | 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 0 | 0 |
| 0 | 0 | 1 | 0 | 0 |
| 0 | 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 1 | 0 |
For the same example if the baseline.level = 3 then the matrix is:
| DV2 | DV3 | DV4 | DV5 |
| 0 | 0 | 0 | 0 |
| 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 1 |
For the same example if the baseline.level = 3 then the matrix is:
| DV1 | DV2 | DV4 | DV5 |
| 1 | 0 | 0 | 0 |
| 0 | 1 | 0 | 0 |
| 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 1 | 0 |
| 1 | 0 | 0 | 0 |
| 0 | 0 | 0 | 0 |
| 0 | 0 | 1 | 0 |
| 0 | 0 | 0 | 1 |
In the first instance the first row of the matrix has zeros in all entries indicating
that the first data point belongs to level 1 (as the baseline level is equal to 1).
The second row has 1 at the first (DV2) column and zeros elsewhere,
@@ -229,11 +229,11 @@
ds.mdPattern.RdThis function is a client-side wrapper for the server-side mdPatternDS +function. It generates a missing data pattern matrix similar to mice::md.pattern but +with disclosure control applied to prevent revealing small cell counts.
+ds.mdPattern(x = NULL, type = "split", datasources = NULL)a character string specifying the name of a data frame or matrix on the +server-side containing the data to analyze.
a character string specifying the output type. If 'split' (default), +returns separate patterns for each study. If 'combine', attempts to pool patterns +across studies.
a list of DSConnection-class objects obtained
+after login. If the datasources argument is not specified, the default set of
+connections will be used: see datashield.connections_default.
For type='split': A list with one element per study, each containing:
The missing data pattern matrix for that study
Logical indicating if all patterns meet disclosure requirements
A message describing the validity status
For type='combine': A list containing:
The pooled missing data pattern matrix across all studies
Logical indicating if all pooled patterns meet disclosure requirements
A message describing the validity status
The function calls the server-side mdPatternDS function which uses +mice::md.pattern to analyze missing data patterns. Patterns with counts below the +disclosure threshold (default: nfilter.tab = 3) are suppressed to maintain privacy.
+Output Format: +- Each row represents a missing data pattern +- Pattern counts are shown in row names (e.g., "150", "25") +- Columns show 1 if the variable is observed, 0 if missing +- Last column shows the total number of missing values per pattern +- Last row shows the total number of missing values per variable
+Disclosure Control:
+Suppressed patterns (count below threshold) are indicated by: +- Row name: "suppressed(<N>)" where N is the threshold +- All pattern values set to NA +- Summary row also suppressed to prevent back-calculation
+Pooling Behavior (type='combine'):
+When pooling across studies, the function uses a conservative approach +for disclosure control:
+1. Identifies identical missing patterns across studies +2. EXCLUDES suppressed patterns from pooling - patterns suppressed in + ANY study are not included in the pooled count +3. Sums counts only for non-suppressed identical patterns +4. Re-validates pooled counts against disclosure threshold
+Important: This conservative approach means: +- Pooled counts may be underestimates if some studies had suppressed patterns +- This prevents disclosure through subtraction (e.g., if study A shows count=5 + and pool shows count=7, one could deduce study B has count=2, violating disclosure) +- Different patterns across studies are preserved separately in the pooled result
+if (FALSE) { # \dontrun{
+ ## Version 6, for version 5 see the Wiki
+
+ # Connecting to the Opal servers
+
+ require('DSI')
+ require('DSOpal')
+ require('dsBaseClient')
+
+ builder <- DSI::newDSLoginBuilder()
+ builder$append(server = "study1",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM1", driver = "OpalDriver")
+ builder$append(server = "study2",
+ url = "http://192.168.56.100:8080/",
+ user = "administrator", password = "datashield_test&",
+ table = "CNSIM.CNSIM2", driver = "OpalDriver")
+ logindata <- builder$build()
+
+ connections <- DSI::datashield.login(logins = logindata, assign = TRUE, symbol = "D")
+
+ # Get missing data patterns for each study separately
+ patterns_split <- ds.mdPattern(x = "D", type = "split", datasources = connections)
+
+ # View results for study1
+ print(patterns_split$study1$pattern)
+ # var1 var2 var3
+ # 150 1 1 1 0 <- 150 obs complete
+ # 25 0 1 1 1 <- 25 obs missing var1
+ # 25 0 0 25 <- Summary: 25 missing per variable
+
+ # Get pooled missing data patterns across studies
+ patterns_pooled <- ds.mdPattern(x = "D", type = "combine", datasources = connections)
+ print(patterns_pooled$pattern)
+
+ # Example with suppressed patterns:
+ # If study1 has a pattern with count=2 (suppressed) and study2 has same pattern
+ # with count=5 (valid), the pooled result will show count=5 (conservative approach)
+ # A warning will indicate: "Pooled counts may underestimate the true total"
+
+ # Clear the Datashield R sessions and logout
+ datashield.logout(connections)
+} # }
+
+ds.matrixTranspose()
Transposes a server-side matrix
Display missing data patterns with disclosure control
Developed by Paul Burton, Rebecca Wilson, Olly Butters, Patricia Ryser-Welch, Alex Westerberg, Leire Abarrategui, Roberto Villegas-Diaz, Demetris Avraam, Demetris Avraam, Yannick Marcon, Tom Bishop, Amadou Gaye, Xavier Escribà-Montagut, Stuart Wheater.
+Developed by Paul Burton, Rebecca Wilson, Olly Butters, Patricia Ryser-Welch, Alex Westerberg, Leire Abarrategui, Roberto Villegas-Diaz, Demetris Avraam, Yannick Marcon, Tom Bishop, Amadou Gaye, Xavier Escribà-Montagut, Stuart Wheater.