Skip to content

[Features] add backbone  #6

@tdelcey

Description

@tdelcey

hi,

Build_dynamic_network() allows the user to extract the network backbone with the biblionetwork's functions that calculate the weigths of edges and normalize them. A good feature will be to add other methods, in particular "statistical" one from the backbone package.

Using the strategy you used to implement the functions of biblionetwork, the implementation of backbone's functions is straigthforward. Here is an example:

build_dynamic_networks_bb <- function(nodes,
                                      directed_edges,
                                      source_id,
                                      target_id,
                                      time_variable = NULL,
                                      time_window = NULL,
                                      backbone_method = c("statistical", "structured"),
                                      statistical_method = c("sdsm", "fdsm", "fixedfill", "fixedfrow", "fixedcol"),
                                      alpha = alpha,
                                      cooccurrence_method = c("coupling_angle", "coupling_strength", "coupling_similarity"),
                                      edges_threshold = 1,
                                      overlapping_window = FALSE,
                                      compute_size = FALSE,
                                      keep_singleton = FALSE,
                                      filter_components = FALSE,
                                      ...,
                                      verbose = TRUE) {
  size <- node_size <- N <- method <- NULL
  
  
  # Making sure the table is a datatable
  nodes <- data.table::data.table(nodes)
  directed_edges <- data.table::data.table(directed_edges)
  backbone_methods = c("statistical", "structured")
  cooccurrence_methods <-
    c("coupling_angle",
      "coupling_strength",
      "coupling_similarity")
  statistical_methods <-
    c("sdsm", "fdsm", "fixedfill", "fixedfrow", "fixedcol")
  
  
  
  if (length(backbone_method) > 1) {
    cli::cli_abort(
      c(
        "You did not choose any method for extracting the backbone. You have to choose between: ",
        "*" = "\"statistical\";",
        "*" = "\"structured\"."
      )
    )
  }
  
  if (!backbone_method %in% backbone_methods) {
    cli::cli_abort(
      c(
        "You did not choose any method for extracting the backbone. You have to choose between: ",
        "*" = "\"statistical\";",
        "*" = "\"structured\";"
      )
    )
  }
  
  
  if (backbone_method == "structured") {
    # Checking various problems: lacking method,
    if (length(cooccurrence_method) > 1) {
      cli::cli_abort(
        c(
          "You did not choose any method for cooccurrence computation. You have to choose between: ",
          "*" = "\"coupling_angle\";",
          "*" = "\"coupling_strength\";",
          "*" = "\"coupling_similarity\"."
        )
      )
    }
    
    if (!cooccurrence_method %in% cooccurrence_methods) {
      cli::cli_abort(
        c(
          "You did not choose an existing method for cooccurrence computation. You have to choose between: ",
          "*" = "\"coupling_angle\";",
          "*" = "\"coupling_strength\";",
          "*" = "\"coupling_similarity\"."
        )
      )
    }
    
  }
  
  if (backbone_method == "statistical") {
    # Checking various problems: lacking method,
    if (length(statistical_method) > 1) {
      cli::cli_abort(
        c(
          "You did not choose any method for cooccurrence computation. You have to choose between: ",
          "*" = "\"sdsm\";",
          "*" = "\"fdsm\";",
          "*" = "\"fixedfill\".",
          "*" = "\"fixedfrow\".",
          "*" = "\"fixedcol\"."
        )
      )
    }
    
    if (!statistical_method %in% statistical_methods) {
      cli::cli_abort(
        c(
          "You did not choose an existing method for cooccurrence computation. You have to choose between: ",
          "*" = "\"sdsm\";",
          "*" = "\"fdsm\";",
          "*" = "\"fixedfill\".",
          "*" = "\"fixedfrow\".",
          "*" = "\"fixedcol\"."
        )
      )
    }
    
    if (is.null(alpha)) {
      cli::cli_abort("You did not choose an alpha for the chosen statistical backbone.")
    }
    
    if (nodes[, .N, source_id, env = list(source_id = source_id)][N > 1, .N] > 0) {
      cli::cli_alert_warning(
        "Some identifiers in your column {.field {source_id}} in your nodes table are not unique. You need only one row per node."
      )
    }
    
    if (!is.null(time_window) & is.null(time_variable)) {
      cli::cli_abort(
        "You cannot have a {.emph time_window} if you don't give any column with a temporal variable.
                          Put a column in {.emph time_variable} or remove the {.emph time_window}."
      )
    }
    
    # giving information on the method
    
    if (verbose == TRUE) {
      if (length(statistical_method > 0))
        cli::cli_alert_info(
          paste(
            "We extract the network backbone using the",
            statistical_method,
            "method."
          )
        )
      # cli::cli_alert_info("The method use for co-occurence is the {.emph {cooccurrence_method}} method.")
      # cli::cli_alert_info("The edge threshold is: {.val {edges_threshold}}.")
      if (keep_singleton == FALSE)
        cli::cli_alert_info("We remove the nodes that are alone with no edge. \n\n")
    }
    
    # let's extract the information we need
    Nodes_coupling <- data.table::copy(nodes)
    Nodes_coupling[, source_id := as.character(source_id),
                   env = list(source_id = source_id)]
    
    if (is.null(time_variable)) {
      time_variable <- "fake_column"
      Nodes_coupling[, time_variable := 1,
                     env = list(time_variable = time_variable)]
    }
    
    if (!target_id %in% colnames(Nodes_coupling) &
        compute_size == TRUE)
    {
      cli::cli_abort(
        "You don't have the column {.field {target_id}} in your nodes table. Set {.emph compute_size} to {.val FALSE}."
      )
    }
    
    if (compute_size == TRUE) {
      Nodes_coupling[, target_id := as.character(target_id),
                     env = list(target_id = target_id)]
    }
    
    Edges <- data.table::copy(directed_edges)
    Edges <- Edges[, .SD, .SDcols = c(source_id, target_id)]
    Edges[, c(source_id, target_id) := lapply(.SD, as.character), .SDcols = c(source_id, target_id)]
    
    ######################### Dynamics networks *********************
    
    # Find the time_window
    Nodes_coupling <-
      Nodes_coupling[order(time_variable), env = list(time_variable = time_variable)]
    Nodes_coupling[, time_variable := as.integer(time_variable),
                   env = list(time_variable = time_variable)]
    
    first_year <- Nodes_coupling[, min(as.integer(time_variable)),
                                 env = list(time_variable = time_variable)]
    last_year <- Nodes_coupling[, max(as.integer(time_variable)),
                                env = list(time_variable = time_variable)]
    
    if (!is.null(time_window)) {
      if (last_year - first_year + 1 < time_window) {
        cli::cli_alert_warning(
          "Your time window is larger than the number of distinct values of {.field {time_variable}}"
        )
      }
    }
    
    if (is.null(time_window)) {
      all_years <- first_year
      time_window <- last_year - first_year + 1
    } else {
      if (overlapping_window == TRUE) {
        last_year <- last_year - time_window + 1
        all_years <- first_year:last_year
      } else {
        all_years <- seq(first_year, last_year, by = time_window)
        if (all_years[length(all_years)] + (time_window - 1) > last_year) {
          cli::cli_warn(
            "Your last network is shorter than the other(s) because the cutting by time window does not give a round count.
                The last time unity in your data is {.val {last_year}}, but the upper limit of your last time window is
                {.val {all_years[length(all_years)] + (time_window - 1)}}."
          )
        }
      }
    }
    
    # Prepare our list
    tbl_coup_list <- list()
    
    for (Year in all_years) {
      nodes_of_the_year <-
        Nodes_coupling[time_variable >= Year &
                         time_variable < (Year + time_window),
                       env = list(time_variable = time_variable, Year = Year)]
      
      if (time_variable != "fake_column") {
        nodes_of_the_year[, time_window := paste0(Year, "-", Year + time_window - 1),
                          env = list(Year = Year)]
        if (verbose == TRUE)
          cli::cli_h1(
            "Creation of the network for the {.val {Year}}-{.val {Year + time_window - 1}} window."
          )
      } else {
        nodes_of_the_year <- nodes_of_the_year[, -c("fake_column")]
      }
      
      edges_of_the_year <-
        Edges[source_id %in% nodes_of_the_year[, source_id],
              env = list(source_id = source_id)]
      
      
      
      # size of nodes
      if (compute_size == TRUE) {
        nb_cit <-
          edges_of_the_year[source_id %in% nodes_of_the_year[, source_id], .N, target_id,
                            env = list(source_id = source_id, target_id = target_id)]
        colnames(nb_cit)[colnames(nb_cit) == "N"] <- "node_size"
        
        if ("node_size" %in% colnames(Nodes_coupling) == TRUE)
        {
          cli::cli_warn(
            "You already have a column name {.field node_size}. The content of the column will be replaced."
          )
        }
        nodes_of_the_year <-
          data.table::merge.data.table(nodes_of_the_year,
                                       nb_cit,
                                       by = target_id,
                                       all.x = TRUE)
        
        nodes_of_the_year[is.na(node_size), node_size := 0]
      }
      
      
      
      # backbone
      
      if (backbone_method == "statistical") {
        backbone_functions <-
          data.table::data.table(
            biblio_function = c(
              rlang::expr(backbone::sdsm),
              rlang::expr(backbone::fdsm),
              rlang::expr(backbone::fixedfrow),
              rlang::expr(backbone::fixedcol),
              rlang::expr(backbone::fixedfill)
            ),
            method = c("sdsm",
                       "fdsm",
                       "fixedfrow",
                       "fixedcol",
                       "fixedfill")
          )
        
        backbone_functions <-
          backbone_functions[method == statistical_method][["biblio_function"]][[1]]
        
        # Evaluate the expression with actual data and parameters
        tryCatch({
          edges_of_the_year <-
            rlang::expr((!!backbone_functions)(
              B = as.data.frame(edges_of_the_year),
              alpha = rlang::inject(alpha)
            )) %>%
            eval() %>%
            as.data.table()
          
          edges_bb[, source_id := from]
          edges_bb[, target_it := to]
          
          
        }, error = function(e) {
          stop("Failed to apply backbone extraction with ",
               backbone_method,
               ": ",
               e$message)
        })
      }
      
      
      # coupling
      if (backbone_method == "structured") {
        biblio_functions <-
          data.table::data.table(
            biblio_function = c(
              rlang::expr(biblionetwork::biblio_coupling),
              rlang::expr(biblionetwork::coupling_strength),
              rlang::expr(biblionetwork::coupling_similarity)
            ),
            method = c(
              "coupling_angle",
              "coupling_strength",
              "coupling_similarity"
            )
          )
        biblio_function <-
          biblio_functions[method == cooccurrence_method][["biblio_function"]][[1]]
        edges_of_the_year <-
          rlang::expr((!!biblio_function)(
            dt = edges_of_the_year,
            source = rlang::inject(source_id),
            ref = rlang::inject(target_id),
            weight_threshold = rlang::inject(edges_threshold)
          )
          ) %>%
          eval()
      }
      
      
      
      # remove nodes with no edges
      if (keep_singleton == FALSE) {
        nodes_of_the_year <-
          nodes_of_the_year[source_id %in% edges_of_the_year$from |
                              source_id %in% edges_of_the_year$to, env = list(source_id = source_id)]
      }
      
      # make tbl
      if (length(all_years) == 1) {
        tbl_coup_list <- tidygraph::tbl_graph(
          nodes = nodes_of_the_year,
          edges = edges_of_the_year,
          directed = FALSE,
          node_key = source_id
        )
      } else {
        tbl_coup_list[[paste0(Year, "-", Year + time_window - 1)]] <-
          tidygraph::tbl_graph(
            nodes = nodes_of_the_year,
            edges = edges_of_the_year,
            directed = FALSE,
            node_key = source_id
          )
      }
    }
    if (filter_components == TRUE) {
      tbl_coup_list <- filter_components(tbl_coup_list, ...)
    }
    return (tbl_coup_list)
  }
}

#' @rdname build_dynamic_networks
#' @export

build_network <- function(nodes,
                          directed_edges,
                          source_id,
                          target_id,
                          cooccurrence_method = c("coupling_angle", "coupling_strength", "coupling_similarity"),
                          edges_threshold = 1,
                          compute_size = FALSE,
                          keep_singleton = FALSE,
                          filter_components = FALSE,
                          ...) {
  graph <- build_dynamic_networks(
    nodes = nodes,
    directed_edges = directed_edges,
    source_id = source_id,
    target_id = target_id,
    cooccurrence_method = cooccurrence_method,
    edges_threshold = edges_threshold,
    compute_size = compute_size,
    keep_singleton = keep_singleton,
    filter_components = FALSE,
    ...,
    verbose = FALSE
  )
  if (filter_components == TRUE)
    graph <- filter_components(graph, ...)
  return(graph)
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions