-
Notifications
You must be signed in to change notification settings - Fork 117
Description
When using scales::label_number_auto() on a named vector, very large (>1e6) or small (<1e-3) values can cause the labeller to return names(x) instead of formatted numbers:
small <- c(0.0000142, 0.0000012, 0.0000850) |> setNames(c("A", "B", "C"))
medium <- c(0.5043490, 0.4010748, 0.4772737) |> setNames(c("A", "B", "C"))
large <- c(620389.30, 1116839.1, 519534.12) |> setNames(c("A", "B", "C"))
labeller <- scales::label_number_auto()
# `small` and `large` print using their names:
labeller(small)
#> [1] "A" "B" "C"
labeller(medium)
#> A B C
#> "0.5043490" "0.4010748" "0.4772737"
labeller(large)
#> [1] "A" "B" "C"
# Works fine when they don't have names:
labeller(unname(small))
#> [1] "1.42e-05" "1.20e-06" "8.50e-05"
labeller(unname(medium))
#> [1] "0.5043490" "0.4010748" "0.4772737"
labeller(unname(large))
#> [1] "620 389" "1 116 839" "519 534"
# Or when their names are longer than the values they would replace:
small |> setNames(c("some really long name", "foo", "medium name")) |> labeller()
#> [1] "some really long name" "foo" "medium name"
large |> setNames(c("some really long name", "foo", "medium name")) |> labeller()
#> [1] "620 389" "foo" "519 534"Created on 2025-10-26 with reprex v2.1.1
My real-world example
You can obviously avoid this issue by un-naming the vectors or by not giving them names in the first place. In my case, however, I wasn't even aware the vector had names. The output of rstatix::cor_test (and base::cor.test) both add a name to the statistic column:
library(tidyverse)
# Subset of my dataset:
Gambling <- tibble(
Age = c( 43,42,37,71,59,48,70,25,34,34,68,38,55,21,44,45,59,50,61,26,55,60,20,38,53,70,56,52,29,78,40,59,59,32,24,28,32,45,21,24,49,74,35,46,58,46,56,64,50,26,45,38,51,50,45,52,27,37,45,33,43,47,32,41,52,36,41,60,26,50,26,35,23,54,23,37,21,61,57,61,49,60,30,41,32,38,60,44,65,64,42,38,21,78,43,70,41,41,64,71,25,47,36,49,29,31,60,32,53,26,37,49,37,62,53,73,36,59,36,55,62,37,53,38,36,54,24,44,47,53,62,58,55,46,68,60,36,46,64,25,51,41,57,50,58,26,45,55,49,29,67,42,64,67,40,30,75,45,28,57,38,42,40,40,67,55,47,57,75,43,18,65,46,56,26,19,40,36,44,59,31,50,53,32,51,46,24,32,55,58,50,24,45,40,30,21,54,43,34,56 ),
LTW = c( -5000,-3000,-100,-10000,0,-2000,-1500,300,-1000,500,-5000,-1000,-4,10,15000,-40,0,-1000,-100,-100,-1000,-500,-20,-10,-200,-100,30000,-100,-1000,7100,-25,-100,200,-100,-500,375,500,-1000,-1000,-500,-125,-200,-5000,-500,-300,-5000,-10000,1000,500,-500,-2000,0,-5000,-3000,-1500,-500,-1000,-200,-2000,-500,-1000,-50,-200,-50,-3000,-1000,3000,-3000,-50,-1000,-200,-400,3500,-200,20,-5000,-40,-300,50000,3000,400,-2000,-100,126000,-100,4000,-250,-1000,-100,-5000,-2000,-5000,NA,0,-300,-200,1200,10000,-10000,-20,-500,-2500,-300,-5000,-400,0,-5000,2,3000,-1000,2000,30000,250,-15000,-100,-100,-5000,750,-1000,1000,-1000,-60,-5000,-3500,-5000,-10000,-1000,-1500,-1000,-100,-3000,150000,-900,-5000,-1000,-10000,-200,-200,-100,-250,15000,-400,2000,0,-100,-2000,50,NA,5000,-150,-1000,350,-1000,-400,-100,10000,15000,-10000,0,10000,-5,-500,-100,2000,300,-2000,400,-4000,-7000,0,50,-5000,-80,-10000,5000,-6500,-200,-5000,40,-1000,-200,15000,5000,100,-2500,-500,3000,-300,1000,-4000,500,-200,-100,-4500,-500,NA,-2500,-200,-200,20000 ),
Gambled = c( 120,50,0,200,0,30,100,20,50,25,1000,200,0,10,200,20,10,4,60,60,0,100,20,0,50,10,0,0,400,40,10,0,40,50,5,200,50,0,50,100,20,0,40,0,20,100,500,0,0,10,3,120,20,0,10,0,250,200,80,20,20,20,8,0,300,4,60,300,0,20,0,6,0,0,0,150,20,3,300,10,30,75,100,60,0,0,30,20,0,0,10,200,0,20,0,200,200,1000,10,0,10,400,200,60,20,50,150,0,20,10,80,0,0,200,0,0,0,0,5,0,12,20,100,5,40,1000,0,0,10,0,100,300,200,100,60,200,7,0,0,10,0,0,100,0,0,60,0,0,0,20,60,20,0,0,2,100,100,0,100,100,0,60,40,100,50,6,10,20,12,0,30,30,0,60,200,1000,0,25,0,0,0,20,1000,0,200,2,400,20,25,30,0,3,0,20,20,50,40,20,5,300 ),
AG = c( 43,42,37,71,59,48,70,25,34,34,68,38,55,21,44,45,59,50,61,26,55,60,20,38,53,70,56,52,29,78,40,59,59,32,24,28,32,45,21,24,49,74,35,46,58,46,56,64,50,26,45,38,51,50,45,52,27,37,45,33,43,47,32,41,52,36,41,60,26,50,26,35,23,54,23,37,21,61,57,61,49,60,30,41,32,38,60,44,65,64,42,38,21,78,43,70,41,41,64,71,25,47,36,49,29,31,60,32,53,26,37,49,37,62,53,73,36,59,36,55,62,37,53,38,36,54,24,44,47,53,62,58,55,46,68,60,36,46,64,25,51,41,57,50,58,26,45,55,49,29,67,42,64,67,40,30,75,45,28,57,38,42,40,40,67,55,47,57,75,43,18,65,46,56,26,19,40,36,44,59,31,50,53,32,51,46,24,32,55,58,50,24,45,40,30,21,54,43,34,56 ),
)
# Run some correlation tests based on pairs of them:
cor_pairs <- lst(c("Age", "LTW"), c("Age", "Gambled"), c("AG", "LTW"))
cor_tests <- cor_pairs |>
map(\(pair) rstatix::cor_test(Gambling, vars = all_of(pair), method = "spearman")) |>
bind_rows()
# Output is a normal tibble:
cor_tests
#> # A tibble: 3 <d7> 6
#> var1 var2 cor statistic p method
#> <chr> <chr> <dbl> <dbl> <dbl> <chr>
#> 1 Age LTW -0.091 1390229. 0.203 Spearman
#> 2 Age Gambled -0.0029 1337115. 0.968 Spearman
#> 3 AG LTW -0.091 1390229. 0.203 Spearman
# ...except for the fact that `statistic` has names on it!
cor_tests |> pull(statistic) |> str()
#> Named num [1:3] 1390229 1337115 1390229
#> - attr(*, "names")= chr [1:3] "S" "S" "S"
# Try and print them as siunitx \num{...} inside a kable for Pandoc to handle:
labeller <- scales::label_number_auto()
cor_tests |>
mutate(across(c(cor, statistic, p), \(x) str_c("\\num{", labeller(x), "}"))) |>
knitr::kable() |>
print()
#>
#>
#> |var1 |var2 |cor |statistic |p |method |
#> |:----|:-------|:-------------|:---------|:-----------|:--------|
#> |Age |LTW |\num{-0.0910} |\num{S} |\num{0.203} |Spearman |
#> |Age |Gambled |\num{-0.0029} |\num{S} |\num{0.968} |Spearman |
#> |AG |LTW |\num{-0.0910} |\num{S} |\num{0.203} |Spearman |
# ^
# Not a valid number, LaTeX crashes!Created on 2025-10-26 with reprex v2.1.1
Looking at the code, this appears to be due to the fact that label_number_auto() falls back to the deprecated format_format when max_magnitude > 1e6 or min_magnitude < 1e-3, which defaults to using the vector's names. Then, those are selected between based on which is shorter.
Based on the documentation of label_number_auto(), it looks like the correct behaviour here should be for it to select between format_number and format_scientific, not format_format. Thatwould definitely be a breaking change, though, so I'm not sure if it's the best idea to simply swap it out (I'm not sure why anybody would ever want this to be the behaviour of a function meant to give numeric labels, but that doesn't make it any less breaking...).
Depending on what the policy here is on making those sorts of changes, I'm happy to submit a PR for it, if desired. Let me know. 🙂