From a1ee6a828137e06e24bd8ecc81e856470dcb69b3 Mon Sep 17 00:00:00 2001 From: Garrick Aden-Buie Date: Thu, 31 Jan 2019 12:15:43 -0500 Subject: [PATCH] Add pre_sort() and sorting_hat() --- DESCRIPTION | 11 ++++++++--- NAMESPACE | 8 ++++++-- R/sorting_hat.R | 32 ++++++++++++++++++++++++++++++++ man/pipe.Rd | 12 ++++++++++++ man/pre_sort.Rd | 14 ++++++++++++++ man/sorting_hat.Rd | 14 ++++++++++++++ 6 files changed, 86 insertions(+), 5 deletions(-) create mode 100644 R/sorting_hat.R create mode 100644 man/pipe.Rd create mode 100644 man/pre_sort.Rd create mode 100644 man/sorting_hat.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 6dcf326..88a5661 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,10 +2,13 @@ Package: tricks Title: Performs statistical tricks Version: 0.0.0.9000 Authors@R: - person(given = "Travis", + c(person(given = "Travis", family = "Gerke", role = c("aut", "cre"), - email = "travis.gerke@moffitt.org") + email = "travis.gerke@moffitt.org"), + person("Garrick", + "Aden-Buie", + email = "garrick.aden-buie@moffitt.org")) Description: What the package does (one paragraph) License: What license it uses Encoding: UTF-8 @@ -18,6 +21,8 @@ Suggests: Language: en-US Imports: magrittr, - broom + broom, + purrr, + dplyr URL: https://github.com/GerkeLab/tricks BugReports: https://github.com/GerkeLab/tricks/issues diff --git a/NAMESPACE b/NAMESPACE index 884a631..2b50e3c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,6 @@ -# Generated by roxygen2: fake comment so roxygen2 overwrites silently. -exportPattern("^[^\\.]") +# Generated by roxygen2: do not edit by hand + +export("%>%") +export(pre_sort) +export(sorting_hat) +importFrom(magrittr,"%>%") diff --git a/R/sorting_hat.R b/R/sorting_hat.R new file mode 100644 index 0000000..c47a14e --- /dev/null +++ b/R/sorting_hat.R @@ -0,0 +1,32 @@ +#' Gather information about the data variables +#' +#' @param df The input data frame +#' @export +sorting_hat <- function(df) { + x <- purrr::map(df, class) %>% + purrr::map_dfr(~ data.frame(type = ., stringsAsFactors = FALSE), .id = "col_name") %>% + dplyr::mutate( + n_unique = purrr::map_int(df, ~ length(unique(.))), + n_complete = purrr::map_int(df, ~ length(.x[!is.na(.x)])) + ) + + x$cv <- purrr::map_dbl(x$col_name, ~ cov(df[[.]])) + + x +} + +cov <- function(x) { + if (!inherits(x, "numeric")) return(NA_real_) + sd(x, TRUE)/mean(x, na.rm = TRUE) +} + +#' Pre-sorting processing of data +#' +#' Cast character columns to factors. +#' +#' @param df The input data frame +#' @export +pre_sort <- function(df) { + df %>% + dplyr::mutate_if(is.character, as.factor) +} diff --git a/man/pipe.Rd b/man/pipe.Rd new file mode 100644 index 0000000..b7daf6a --- /dev/null +++ b/man/pipe.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils-pipe.R +\name{\%>\%} +\alias{\%>\%} +\title{Pipe operator} +\usage{ +lhs \%>\% rhs +} +\description{ +See \code{magrittr::\link[magrittr]{\%>\%}} for details. +} +\keyword{internal} diff --git a/man/pre_sort.Rd b/man/pre_sort.Rd new file mode 100644 index 0000000..42b2106 --- /dev/null +++ b/man/pre_sort.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sorting_hat.R +\name{pre_sort} +\alias{pre_sort} +\title{Pre-sorting processing of data} +\usage{ +pre_sort(df) +} +\arguments{ +\item{df}{The input data frame} +} +\description{ +Cast character columns to factors. +} diff --git a/man/sorting_hat.Rd b/man/sorting_hat.Rd new file mode 100644 index 0000000..c9579f5 --- /dev/null +++ b/man/sorting_hat.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sorting_hat.R +\name{sorting_hat} +\alias{sorting_hat} +\title{Gather information about the data variables} +\usage{ +sorting_hat(df) +} +\arguments{ +\item{df}{The input data frame} +} +\description{ +Gather information about the data variables +}