From a398d360ea484a73694e370077279a832c2cebce Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Fri, 9 Jan 2026 15:10:21 -0500 Subject: [PATCH] Avoid underscored function variants --- R/data_preparation.R | 6 +++--- R/discretize.R | 8 ++++---- R/information_theory.R | 2 +- R/models_lib.R | 6 +++--- R/target_profiling.R | 17 +++++++++-------- 5 files changed, 20 insertions(+), 19 deletions(-) diff --git a/R/data_preparation.R b/R/data_preparation.R index 32343a7..6b92f08 100644 --- a/R/data_preparation.R +++ b/R/data_preparation.R @@ -23,7 +23,7 @@ auto_grouping <- function(data, input, target, n_groups, model="kmeans", seed=99 df_categ=categ_analysis(data, input , target) - d=select_(df_categ, "perc_target", "perc_rows") + d=dplyr::select(df_categ, dplyr::all_of(c("perc_target", "perc_rows"))) set.seed(seed) if(model=="kmeans") { @@ -44,12 +44,12 @@ auto_grouping <- function(data, input, target, n_groups, model="kmeans", seed=99 df_categ[, var_rec]=paste("group_", cluster_vec, sep = "") ## See new profiling based on new groups - data_rec=merge(select_(data, input, target), select_(df_categ, input, var_rec), by=input) + data_rec=merge(dplyr::select(data, dplyr::all_of(c(input, target))), dplyr::select(df_categ, dplyr::all_of(c(input, var_rec))), by=input) recateg_results=categ_analysis(data_rec, var_rec, target) l_res=list() l_res$recateg_results=recateg_results - l_res$df_equivalence=arrange_(unique(select_(data_rec, input, var_rec)), var_rec) + l_res$df_equivalence=dplyr::arrange(unique(dplyr::select(data_rec, dplyr::all_of(c(input, var_rec)))), .data[[var_rec]]) l_res$fit_cluster=fit_cluster return(l_res) diff --git a/R/discretize.R b/R/discretize.R index e1226fc..da0ed69 100644 --- a/R/discretize.R +++ b/R/discretize.R @@ -40,10 +40,10 @@ discretize_df <- function(data, data_bins, stringsAsFactors=T) if(stringsAsFactors) { - data_2b=data %>% mutate_at(vars(vars_num), conv_factor) - data_3=data_2b %>% mutate_at(vars(vars_num), funs(factor(replace(., is.na(.), "NA.")))) + data_2b=data %>% dplyr::mutate(dplyr::across(dplyr::all_of(vars_num), conv_factor)) + data_3=data_2b %>% dplyr::mutate(dplyr::across(dplyr::all_of(vars_num), ~factor(replace(., is.na(.), "NA.")))) } else { - data_3=data %>% mutate_at(vars(vars_num), funs(ifelse(is.na(.), "NA.", .))) + data_3=data %>% dplyr::mutate(dplyr::across(dplyr::all_of(vars_num), ~ifelse(is.na(.), "NA.", .))) } message(sprintf("Variables processed: %s", paste(vars_num, collapse = ", "))) @@ -213,7 +213,7 @@ convert_df_to_categoric <- function(data, n_bins) data_cat=discretize_df(data = data, data_bins = d_cuts, stringsAsFactors = F) # Converting remaining variables - data_cat_2=data_cat %>% mutate_all(as.character) + data_cat_2=data_cat %>% dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) return(data_cat_2) } diff --git a/R/information_theory.R b/R/information_theory.R index 5c25f71..bb8db87 100644 --- a/R/information_theory.R +++ b/R/information_theory.R @@ -23,7 +23,7 @@ entropy_2 <- function(input, target) # get partial entropy df_tbl=as.data.frame.matrix(tbl) - res_entropy=data.frame(t(df_tbl)) %>% mutate_all(funs(entropy(., unit = "log2"))) %>% head(.,1) + res_entropy=data.frame(t(df_tbl)) %>% dplyr::mutate(dplyr::across(dplyr::everything(), ~entropy(., unit = "log2"))) %>% head(.,1) # computing total entropy total_en=sum(probs_input*res_entropy) diff --git a/R/models_lib.R b/R/models_lib.R index 0fbd571..44f60b9 100644 --- a/R/models_lib.R +++ b/R/models_lib.R @@ -139,7 +139,7 @@ desc_groups <- function(data, group_var, group_func=mean, add_all_data_row=T) stat=status(data) vars_to_keep=stat[stat$type %in% c("integer", "numeric") & stat$variable != group_var, "variable"] - grp_mean=data %>% group_by_(group_var) %>% summarise_each_(funs(group_func), vars_to_keep) %>% mutate_each_(funs(round(.,2)), vars_to_keep) + grp_mean=data %>% dplyr::group_by(dplyr::across(dplyr::all_of(group_var))) %>% dplyr::summarise(dplyr::across(dplyr::all_of(vars_to_keep), group_func), .groups = "drop") %>% dplyr::mutate(dplyr::across(dplyr::all_of(vars_to_keep), ~round(., 2))) grp_mean=data.frame(grp_mean) grp_mean[,group_var]=as.character(grp_mean[,group_var]) @@ -149,7 +149,7 @@ desc_groups <- function(data, group_var, group_func=mean, add_all_data_row=T) # vars_to_keep have all num variables (excluding group_var and factor/char). Calculate 'All_Data' means per column data_num=select(data, one_of(vars_to_keep)) - b=as.data.frame(data_num) %>% summarise_each(funs(group_func)) + b=as.data.frame(data_num) %>% dplyr::summarise(dplyr::across(dplyr::everything(), group_func)) ## putting all together: the sumarization per group plus the total per column all_results=rbind(a, b) @@ -194,7 +194,7 @@ desc_groups_rank <- function(data, group_var, group_func=mean) vars_to_group=all_col[all_col!=group_var] # mutate each does the group by only for variables defined in vars_to_group - d_group_rank=d_group %>% mutate_each_(funs(dense_rank(desc(.))), vars_to_group) + d_group_rank=d_group %>% dplyr::mutate(dplyr::across(dplyr::all_of(vars_to_group), ~dplyr::dense_rank(dplyr::desc(.)))) return(d_group_rank) } diff --git a/R/target_profiling.R b/R/target_profiling.R index 02dbb47..b002b5d 100644 --- a/R/target_profiling.R +++ b/R/target_profiling.R @@ -71,7 +71,7 @@ get_target_plot <- function(data, input, target, plot_type) histdens_target <- function(data, input, target) { - cdf=group_by_(data, target) %>% summarise_(var.mean=interp(~mean(v, na.rm=T), v=as.name(input))) + cdf=data %>% dplyr::group_by(dplyr::across(dplyr::all_of(target))) %>% dplyr::summarise(var.mean = mean(.data[[input]], na.rm=TRUE), .groups = "drop") cdf$var.mean=round(cdf$var.mean, 2) @@ -167,13 +167,14 @@ categ_analysis_logic <- function(data, input, target) tot_pos=sum(data[,target]==pred_class) ## profiling - grp=group_by_(data, input) %>% summarise_( - mean_target=interp(~round(mean(var==pred_class, na.rm = TRUE), 3), var = as.name(target)), - sum_target=interp(~sum(var==pred_class, na.rm = TRUE), var = as.name(target)), - perc_target=interp(~round(sum(var==pred_class, na.rm = TRUE)/tot_pos,3), var = as.name(target)), - q_rows=~n(), - perc_rows=~round(n()/nrow(data), 3) - ) %>% arrange(-mean_target) + grp=data %>% dplyr::group_by(dplyr::across(dplyr::all_of(input))) %>% dplyr::summarise( + mean_target=round(mean(.data[[target]]==pred_class, na.rm = TRUE), 3), + sum_target=sum(.data[[target]]==pred_class, na.rm = TRUE), + perc_target=round(sum(.data[[target]]==pred_class, na.rm = TRUE)/tot_pos,3), + q_rows=dplyr::n(), + perc_rows=round(dplyr::n()/nrow(data), 3), + .groups = "drop" + ) %>% dplyr::arrange(-mean_target) #colnames(grp)[colnames(grp)=='sum_target']=paste("sum", target, sep="_") #colnames(grp)[colnames(grp)=='perc_target']=paste("perc", target, sep="_")