Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 53 additions & 21 deletions R/ggslopegraph2.R
Original file line number Diff line number Diff line change
@@ -1,22 +1,43 @@
#' @title Plot a Slopegraph a la Tufte using dplyr and ggplot2
#'
#' @description Takes a dataframe as input, with three named columns being used to plot. Makes the required adjustments to the ggplot2 parameters and returns the plot.
#' @description Creates a "slopegraph" as conceptualized by Edward Tufte. Slopegraphs are minimalist
#' and efficient presentations of your data that can simultaneously convey the relative rankings,
#' the actual numeric values, and the changes and directionality of the data over time.
#' Takes a dataframe as input, with three named columns being used to draw the plot.
#' Makes the required adjustments to the ggplot2 parameters and returns the plot.
#'
#' @param dataframe a dataframe or an object that can be coerced to a dataframe. Basic error checking is performed.
#' @param times a column inside the dataframe that will be plotted on the x axis. Traditionally this is some measure of time. The function accepts a column of class ordered, factor or character.
#' @param measurement a column inside the dataframe that will be plotted on the y axis. Traditionally this is some measure such as a percentage. Currently the function accepts a column of type integer or numeric.
#' @param dataframe a dataframe or an object that can be coerced to a dataframe.
#' Basic error checking is performed, to include ensuring that the named columns
#' exist in the dataframe. See the \code{\link{cancer2}} dataset for an example of
#' how the dataframe should be organized.
#' @param times a column inside the dataframe that will be plotted on the x axis.
#' Traditionally this is some measure of time. The function accepts a column of class
#' ordered, factor or character. NOTE if your variable is currently a "date" class
#' you must convert before using the function with \code{as.character(variablename)}.
#' @param measurement a column inside the dataframe that will be plotted on the y axis.
#' Traditionally this is some measure such as a percentage. Currently the function
#' accepts a column of type integer or numeric. The slopegraph will be most effective
#' when the measurements are not too disparate.
#' @param grouping a column inside the dataframe that will be used to group and distinguish measurements.
#' @param title Optionally the title to be displayed. title = NULL will remove it entirely. title = "" will provide and empty title but retain the spacing.
#' @param subtitle Optionally the sub-title to be displayed. subtitle = NULL will remove it entirely. subtitle = "" will provide and empty title but retain the sapcing.
#' @param caption Optionally the caption to be displayed. caption = NULL will remove it entirely. caption = "" will provide and empty title but retain the sapcing.
#' @param title Optionally the title to be displayed. title = NULL will remove it entirely. title = "" will provide an empty title but retain the spacing.
#' @param subtitle Optionally the sub-title to be displayed. subtitle = NULL will remove it entirely. subtitle = "" will provide an empty subtitle but retain the spacing.
#' @param caption Optionally the caption to be displayed. caption = NULL will remove it entirely. caption = "" will provide an empty title but retain the spacing.
#' @param xtextsize Optionally the font size for the X axis labels to be displayed. xtextsize = 12 is the default must be a numeric. Note that X & Y axis text are on different scales
#' @param ytextsize Optionally the font size for the Y axis labels to be displayed. ytextsize = 3 is the default must be a numeric. Note that X & Y axis text are on different scales
#' @param titletextsize Optionally the font size for the title to be displayed. titletextsize = 14 is the default must be a numeric.
#' @param subtitletextsize Optionally the font size for the subtitle to be displayed. subtitletextsize = 10 is the default must be a numeric.
#' @param captiontextsize Optionally the font size for the caption to be displayed. captiontextsize = 8 is the default must be a numeric.
#' @param linethickness Optionally the thickness of the plotted lines. linethickness = 1 is the default must be a numeric.
#' @param datatextsize Optionally the font size of the plotted data points. datatextsize = 2.5 is the default must be a numeric.
#' @param linecolor Optionally the color of the plotted lines. By default it will use the ggplot2 color palette for coloring by group. The user may override with one valid color of their choice e.g. "black" must be character.
#' @param linecolor Optionally the color of the plotted lines. By default it will use
#' the ggplot2 color palette for coloring by \code{Grouping}. The user may override
#' with \bold{one} valid color of their choice e.g. "black" (see colors() for choices)
#' \bold{OR}
#' they may provide a vector of colors such as c("gray", "red", "green", "gray", "blue")
#' \bold{OR} a named vector like c("Green" = "gray", "Liberal" = "red", "NDP" = "green",
#' "Others" = "gray", "PC" = "blue"). Any input must be character, and the length
#' of a vector \bold{should} equal the number of levels in \code{Grouping}. If the
#' user does not provide enough colors they will be recycled.
#'
#' @return A \code{\link[ggplot2]{ggplot}} object.
#' @author Chuck Powell
Expand All @@ -33,11 +54,20 @@
#' linethickness = 1,
#' subtitle = NULL,
#' caption = NULL)
#' # demonstrating linecolor recyclng
#' ggslopegraph2(cancer2, Year, Survival, Type,
#' title = "Estimates of Percent Survival Rates",
#' subtitle = "Based on: Edward Tufte, Beautiful Evidence, 174, 176.",
#' caption = "https://www.edwardtufte.com/bboard/q-and-a-fetch-msg?msg_id=0003nk",
#' linecolor = c("black", "red", "grey"),
#' linethickness = .5)
#'
#'
#' @seealso For a observation-by-time data frame interface, see \code{\link{ggslopegraph}}.
#' @import ggplot2
#' @importFrom dplyr filter mutate group_by summarise %>% n
#' @importFrom dplyr filter mutate group_by summarise %>% n enquo
#' @importFrom ggrepel geom_text_repel
#' @importFrom methods is
#' @export
ggslopegraph2 <-
function(
Expand Down Expand Up @@ -100,15 +130,15 @@ function(
stop(paste0("'", Nmeasurement, "' is not the name of a variable in '", Ndataframe, "'"), call. = FALSE)
}
if (!deparse(substitute(grouping)) %in% names(dataframe)) {
stop(paste0("'", deparse(substitute(grouping)), "' is not the name of a variable in '", Ndataframe, "'"), call. = FALSE)
stop(paste0("'", Ngrouping, "' is not the name of a variable in '", Ndataframe, "'"), call. = FALSE)
}
if (!class(dataframe[[Nmeasurement]]) %in% c("integer","numeric")) {
stop(paste0("Sorry I need the measured variable '", Nmeasurement, "' to be a number"), call. = FALSE)
}
if (!"ordered" %in% class(dataframe[[Ntimes]])) { # keep checking
if (!"character" %in% class(dataframe[[Ntimes]])) { # keep checking
if ("factor" %in% class(dataframe[[Ntimes]])) { # impose order
warning("Converting to an ordered factor", call. = FALSE)
message(paste0("\nConverting '", Ntimes, "' to an ordered factor\n"))
dataframe[[Ntimes]] <- factor(dataframe[[Ntimes]], ordered = TRUE)
} else {
stop(paste0("Sorry I need the variable '", Ntimes, "' to be of class character, factor or ordered"), call. = FALSE)
Expand All @@ -120,19 +150,24 @@ function(
measurement <- enquo(measurement)
grouping <- enquo(grouping)

if (linecolor != "ByGroup" ) {
line_geom <- list(geom_line(aes_(), size = linethickness, color = linecolor))
if (length(linecolor) > 1) {
if (length(linecolor) < length(unique(dataframe[[Ngrouping]]))) {
message(paste0("\nYou gave me ", length(linecolor), " colors I'm recycling because you have ", length(unique(dataframe[[Ngrouping]])), " ", Ngrouping, "\n"))
linecolor <- rep(linecolor, length.out = length(unique(dataframe[[Ngrouping]])))
}
line_geom <- list(geom_line(aes_(color = grouping), size = linethickness), scale_color_manual(values = linecolor))
} else {
line_geom <- list(geom_line(aes_(color = grouping, alpha = 1), size = linethickness))
if (linecolor == "ByGroup") {
line_geom <- list(geom_line(aes_(color = grouping, alpha = 1), size = linethickness))
} else {
line_geom <- list(geom_line(aes_(), size = linethickness, color = linecolor))
}
}

dataframe %>%
filter(!is.na(!! times), !is.na(!! measurement), !is.na(!! grouping)) %>%
# mutate(!!quo_name(times) := factor(!!times), !!quo_name(measurement) := factor(!!measurement)) %>%
ggplot(aes_(group=grouping, y=measurement, x=times)) +
line_geom +
# geom_line(aes_(), size = linethickness, color = "black") +
# geom_line(aes_(color = grouping, alpha = 1), size = linethickness) +
geom_text_repel(data = dataframe %>% filter(!! times == min(!! times)),
aes_(label = grouping) ,
hjust = "left",
Expand All @@ -156,6 +191,3 @@ function(
)
}

# title = "Estimates of Percent Survival Rates"
# subtitle = "Based on: Edward Tufte, Beautiful Evidence, 174, 176."
# caption = "https://www.edwardtufte.com/bboard/q-and-a-fetch-msg?msg_id=0003nk"