diff --git a/DESCRIPTION b/DESCRIPTION index f23ca5b6e..6496878a6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: OMA Title: Orchestrating Microbiome Analysis with Bioconductor -Version: 0.98.37 +Version: 0.98.38 Date: 2025-03-13 Authors@R: c( @@ -63,6 +63,8 @@ Suggests: igraph, IntegratedLearner, knitr, + lme4, + lmerTest, Maaslin2, maaslin3, mediation, diff --git a/inst/assets/_book.yml b/inst/assets/_book.yml index 5237f97ae..11c4862f8 100644 --- a/inst/assets/_book.yml +++ b/inst/assets/_book.yml @@ -60,9 +60,13 @@ book: - pages/cross_correlation.qmd - pages/multiassay_ordination.qmd - pages/integrated_learner.qmd - - part: "Machine learning & statistical modeling" + - part: "Machine learning" chapters: - pages/machine_learning.qmd + - part: "Time series" + chapters: + - pages/time_series.qmd + - pages/time_diversity.qmd - part: "Workflows" chapters: - pages/introductory_workflow.qmd diff --git a/inst/assets/bibliography.bib b/inst/assets/bibliography.bib index 5e7534847..c7bf399fd 100644 --- a/inst/assets/bibliography.bib +++ b/inst/assets/bibliography.bib @@ -2551,7 +2551,7 @@ @article{Marchesi2015 publisher = {Springer Science and Business Media LLC}, author = {Marchesi, Julian R. and Ravel, Jacques}, year = {2015}, - month = jul + month = jul } @article{Nickols2024, @@ -2563,3 +2563,18 @@ @article{Nickols2024 year = {2024}, month = dec } + +@article{Schmidt2018, + title = {The Human Gut Microbiome: From Association to Modulation}, + volume = {172}, + ISSN = {0092-8674}, + url = {http://dx.doi.org/10.1016/j.cell.2018.02.044}, + DOI = {10.1016/j.cell.2018.02.044}, + number = {6}, + journal = {Cell}, + publisher = {Elsevier BV}, + author = {Schmidt, Thomas S.B. and Raes, Jeroen and Bork, Peer}, + year = {2018}, + month = mar, + pages = {1198–1215} +} diff --git a/inst/pages/beta_diversity.qmd b/inst/pages/beta_diversity.qmd index 5332066a7..e57a1c2aa 100644 --- a/inst/pages/beta_diversity.qmd +++ b/inst/pages/beta_diversity.qmd @@ -177,7 +177,7 @@ between two points is always the shortest possible distance. ::: -## Divergence +## Divergence {#sec-divergence} Divergence measure refers to a difference in community composition between the given sample(s) and a reference sample. This can be @@ -479,7 +479,6 @@ tse_sub <- runMDS( Then let's do the same with rarefaction: - ```{r} #| label = "MDS plot based on the Bray-Curtis distances using rarefaction." diff --git a/inst/pages/time_diversity.qmd b/inst/pages/time_diversity.qmd new file mode 100644 index 000000000..70d556a63 --- /dev/null +++ b/inst/pages/time_diversity.qmd @@ -0,0 +1,327 @@ +# Diversity and time divergence {#sec-time-diversity} + +```{r setup, echo = FALSE, results = "asis"} +library(rebook) +chapterPreamble() +``` + +```{r include = FALSE} +# global knitr options +knitr::opts_chunk$set( + message = FALSE, + fig.width = 10, + dpi = 300, + dev = "png", + dev.args = list(type = "cairo-png") +) +``` + +In this chapter, we use an example dataset from probiotic/placebo intervention +study. The dataset includes gut microbiota samples from 22 subjects and 2 time +points. The first time point was sampled before introducing a treatment. + +```{r} +#| label: load_data + +library(microbiomeDataSets) + +mae <- peerj32() +tse <- getWithColData(mae, 1L) +tse[["time_point"]] <- tse[["time"]] |> as.factor() +``` + +## Alpha diversity + +[@sec-alpha-diversity] introduced methods and main idea of alpha diversity. +Here we calculate Shannon diversity index. + +```{r} +#| label: alpha1 + +library(mia) + +tse <- addAlpha(tse, index = "shannon") +``` + +In addition to visualizing the diversity in respect to sample groups, we want +to analyse how much the diversity changes between time points. That is why +we calculate the difference between time points for each subject. + +```{r} +#| label: alpha2 + +library(dplyr) + +df <- colData(tse) |> + as.data.frame() |> + # Sort based on time + arrange(time) |> + # Group based on subject so that we calculate the difference between each + # subject's samples + group_by(subject) |> + # Calculate difference + mutate(diff = diff(shannon)) |> + ungroup() |> + DataFrame() + +# Sort to original order and add difference info to colData +df <- df[match(tse[["sample"]], df[["sample"]]), , drop = FALSE] +rownames(df) <- colnames(tse) +colData(tse) <- df + +colData(tse) |> head() +``` + +Now we can continue to visualize the alpha diversities. First we create a simple +boxplot showing Shannon index in respect to groups and time. + +```{r} +#| label: alpha3 + +library(ggplot2) + +# Get colData +df <- colData(tse) |> + as.data.frame() + +# Create a second df which is sorted +df2 <- df |> + arrange(subject, time_point, group) + +# Create a jitter +jitter_position <- position_jitterdodge( + dodge.width = 0.75, + jitter.width = 0, + jitter.height = 0 + ) + +# Create a plot +p <- ggplot(mapping = aes(x = time_point, y = shannon, fill = group)) + + # We disable outlier plotting as we plot points with jitter. Otherwise + # outliers would be duplicated + geom_boxplot(data = df, outlier.shape = NA) + + # Add jitter plot + geom_point(data = df2, aes( + group = interaction(subject, group)), + position = jitter_position) + +p +``` + +The boxplot above shows alpha diversity in each group and time point. In the +dataset, each subject was sampled in each time point, however, from the plot +we cannot see which samples correspond to which subject. That is why we +might be interested on linking the sample-pairs to highlight +the subject-specific deviation between time points. + +```{r} +#| label: alpha4 + +# Add line to link subjects +p_path <- p + + geom_line( + data = df2, + aes(group = interaction(subject, group)), + position = jitter_position, + alpha = 0.3 + ) +p_path +``` + +We can further refine the plot by adding the magnitude of change with a color. + +```{r} +#| label: alpha5 + +# Add line linking subjects +p_path <- p + + geom_line( + data = df2, + aes(group = interaction(subject, group), color = diff), + position = jitter_position + ) + + scale_color_gradient2( + low="blue", mid="white", high="red", + limits = c(-max(abs(df[["diff"]])), max(abs(df[["diff"]])))) + +p_path +``` + +The visualization does not show clear effect of treatment. We can still test +if we can get support on these null findings with statistical tests. The +simplest test might be the Wilcoxon test that we have to apply for each +treatment group separately. Moreover, we have to apply the paired version of +test as the samples are from same subjects, thus allowing each patient to serve +as their own control. + +We first test if placebo treatment had effect on alpha diversity. + +```{r} +#| label: alpha6 + +# Get data on placebo samples +df <- tse[, tse[["group"]] == "Placebo"] |> colData() +# Get first and second time points separately +before <- df[df[["time_point"]] == 1, "shannon"] +after <- df[df[["time_point"]] == 2, "shannon"] + +# Apply paired Wilcoxon test +wilcox.test(before, after, paired = TRUE) +``` + +Based on results, alpha diversity in second time point equals to first time +point for the placebo group, which is expected result. We can then do the same +test for group that got probiotics. + +```{r} +#| label: alpha7 + +# Get data on placebo samples +df <- tse[, tse[["group"]] == "LGG"] |> colData() +# Get first and second time points separately +before <- df[df[["time_point"]] == 1, "shannon"] +after <- df[df[["time_point"]] == 2, "shannon"] + +# Apply paired Wilcoxon test +wilcox.test(before, after, paired = TRUE) +``` + +Wilcoxon test confirms the results also for probiotics group: probiotic +treatment do not increase or decrease diversity of microbioal community. + +More elegant test would be model-based approach. Below, we fit a simple linear +mixed effects model to examine the impact of time and treatment group on +Shannon diversity. In this model, both time and group are treated as fixed +effects, meaning that we assume their influence on Shannon diversity is +consistent across all subjects. Additionally, we include subject as a random +effect to account for the repeated measurements from the same individuals. + +If you are not familiar with R formulas, here’s an explanation of how to +interpret the components: + +- The left-hand side of the formula represents the predicted variable +(in this case, shannon diversity). + +- The `+` sign in the formula means that we are adding the terms on the +right-hand side of the formula together (i.e., combining them as separate +effects). + +- The `*` sign indicates an interaction between the terms on either side. This +means that we are not only testing the individual effects of time and group, but +also how these two factors interact with each other to influence Shannon +diversity. Specifically, it allows us to assess how the effect of treatment +group differs between time point 1 and time point 2. + +```{r} +#| label: alpha8 + +library(lme4) +library(lmerTest) + +# Get sample metadata +df <- colData(tse) |> + as.data.frame() + +# Fit the model +model <- lmer(shannon ~ time * group + (1 | subject), data = df) + +# Display summary with p-values +summary(model) +``` + +The results indicate that none of the variables in the model show statistically +significant effects. Specifically, "time" represents the overall effect of time +on Shannon diversity, while "groupPlacebo" represents the overall difference in +Shannon diversity between the Placebo group and the LGG group, averaged across +both time points. + +The most interesting variable in the results is the "time:groupPlacebo" +interaction term. This term investigates whether the effect of time on Shannon +diversity differs between the Placebo group and the LGG group when comparing the +second time point to the first one. + +A significant interaction would indicate that the change in diversity over time +is different between the two groups. However, in this case, the interaction is +not statistically significant, suggesting that the treatment effect did not +differ significantly between the Placebo group and the LGG group between the two +time points. + +## Time divergence + +[@sec-divergence] introduced divergence which means that we calculate +dissimilarity between samples. We can do the same and calculate dissimilarity +between time points for each subject. Below we calculate Bray-Curtis +dissimilarity between consecutive time points. + +```{r} +#| label: divergence1 + +library(miaTime) + +tse <- transformAssay(tse, method = "relabundance") +tse <- addStepwiseDivergence( + tse, + time.col = "time", + group = "subject", + assay.type = "relabundance", + method = "bray" +) +``` + +Then we can visualize the results with a box plot. + +```{r} +#| label: divergence2 + +library(scater) + +plotColData(tse, x = "group", y = "divergence", show_boxplot = TRUE) +``` + +Figure above summarizes the divergence. Each point represent a subject: +dissimilarity between samples from the two time points. Divergence between +consecutive time points seems to be equal between treatment groups. + +## Ordination + +Ordination plots, e.g., Principal Coordinate Analysis (PCoA) plot, is a common +visualization technique to find patterns from the data. Below, we perform PCoA +and visualize the results as showed in [@#sec-community-similarity]. + +Note that it is important to sort the data based on time so that the arrows that +we add in the next step have correct direction. + +```{r} +#| label: pcoa + +# Sort data based on time +tse <- tse[, order(tse[["time"]])] + +# Perform PCoA +tse <- addMDS(tse, assay.type = "relabundance", method = "bray") +# Create ordination plot +p <- plotReducedDim( + tse, dimred = "MDS", colour_by = "group", + # Add other fields that are used later to add paths + other_fields = c("subject", "time") +) +p +``` + +After initializing a scatter plot, we can add arrows that show how the microbial +communities evolved over time. + +```{r} +#| label: pcoa2 + +# Add arrows to connect samples from subjects +p <- p + geom_path( + mapping = aes(group = subject, ), + arrow = arrow(length = unit(0.2, "cm")), + alpha = 0.5 +) +p +``` + +From the PCoA plot, we cannot observe clear patterns. As a final conclusion, it +seems that probiotics did not have effect on microbiome compared to placebo. diff --git a/inst/pages/time_series.qmd b/inst/pages/time_series.qmd new file mode 100644 index 000000000..8d32ff6ce --- /dev/null +++ b/inst/pages/time_series.qmd @@ -0,0 +1,21 @@ +# Time series {#sec-time-series} + +```{r setup, echo = FALSE, results = "asis"} +library(rebook) +chapterPreamble() +``` + +Microbial communities are dynamic, constantly evolving over time. However, +despite temporal fluctuations driven by environmental and other factors, the +microbiome retains its core characteristics [@Schmidt2018]. + +Compared to cross-sectional studies, a longitudinal study design allows +researchers to control time-related factors, enabling a more focused +investigation of specific research questions. However, longitudinal studies are +usually more complex than cross-sectional. Analyzing such data includes specific +aspects that need to be taken into account such as the dependence between +samples collected at multiple time points. + +Most of the methods introduced earlier in this book can be applied to temporal +studies with appropriate modifications and considerations. These are discussed +in [@sec-time-diversity]. \ No newline at end of file