From 4073bcb4f9abd8a547804ff97b3722256d0ed8bd Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 15 Oct 2024 13:57:55 +0200 Subject: [PATCH 1/4] add `keep.zeroes` option --- R/stat-bin.R | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/R/stat-bin.R b/R/stat-bin.R index c085f818a2..65ea8b73b8 100644 --- a/R/stat-bin.R +++ b/R/stat-bin.R @@ -55,6 +55,7 @@ stat_bin <- function(mapping = NULL, data = NULL, closed = c("right", "left"), pad = FALSE, na.rm = FALSE, + keep.zeroes = "all", orientation = NA, show.legend = NA, inherit.aes = TRUE) { @@ -77,6 +78,7 @@ stat_bin <- function(mapping = NULL, data = NULL, pad = pad, na.rm = na.rm, orientation = orientation, + keep.zeroes = keep.zeroes, ... ) ) @@ -89,6 +91,10 @@ stat_bin <- function(mapping = NULL, data = NULL, StatBin <- ggproto("StatBin", Stat, setup_params = function(self, data, params) { params$flipped_aes <- has_flipped_aes(data, params, main_is_orthogonal = FALSE) + params$keep.zeroes <- arg_match0( + params$keep.zeroes %||% "all", + c("all", "none", "inner"), arg_nm = "keep.zeroes" + ) has_x <- !(is.null(data$x) && is.null(params$x)) has_y <- !(is.null(data$y) && is.null(params$y)) @@ -139,7 +145,7 @@ StatBin <- ggproto("StatBin", Stat, compute_group = function(data, scales, binwidth = NULL, bins = NULL, center = NULL, boundary = NULL, closed = c("right", "left"), pad = FALSE, - breaks = NULL, flipped_aes = FALSE, + breaks = NULL, flipped_aes = FALSE, keep.zeroes = "all", # The following arguments are not used, but must # be listed so parameters are computed correctly origin = NULL, right = NULL, drop = NULL) { @@ -163,6 +169,14 @@ StatBin <- ggproto("StatBin", Stat, boundary = boundary, closed = closed) } bins <- bin_vector(data[[x]], bins, weight = data$weight, pad = pad) + + keep <- switch( + keep.zeroes, + none = bins$count != 0, + inner = inner_runs(bins$count != 0), + TRUE + ) + bins <- vec_slice(bins, keep) bins$flipped_aes <- flipped_aes flip_data(bins, flipped_aes) }, @@ -174,3 +188,12 @@ StatBin <- ggproto("StatBin", Stat, dropped_aes = "weight" # after statistical transformation, weights are no longer available ) +inner_runs <- function(x) { + rle <- vec_unrep(x) + nruns <- nrow(rle) + inner <- rep(TRUE, nruns) + i <- unique(c(1, nruns)) + inner[i] <- inner[i] & rle$key[i] + rep(inner, rle$times) +} + From 053f2a53b91f941a74d38a03b442897865ecc87a Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 15 Oct 2024 14:05:05 +0200 Subject: [PATCH 2/4] add test --- tests/testthat/test-stat-bin.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/testthat/test-stat-bin.R b/tests/testthat/test-stat-bin.R index 9b55054604..aaae00871a 100644 --- a/tests/testthat/test-stat-bin.R +++ b/tests/testthat/test-stat-bin.R @@ -118,6 +118,20 @@ test_that("stat_bin() provides width (#3522)", { expect_equal(out$xmax - out$xmin, rep(binwidth, 10)) }) +test_that("stat_bin(keep.zeroes) options work as intended", { + p <- ggplot(data.frame(x = c(1, 2, 2, 3, 5, 6, 6, 7)), aes(x)) + + scale_x_continuous(limits = c(-1, 9)) + + ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "all")) + expect_equal(ld$x, -1:9) + + ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "inner")) + expect_equal(ld$x, c(1:7)) + + ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "none")) + expect_equal(ld$x, c(1:3, 5:7)) +}) + # Underlying binning algorithm -------------------------------------------- test_that("bins() computes fuzz with non-finite breaks", { From d6fbb67fba866bbff196bc76303ca991aedf435e Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 15 Oct 2024 14:11:00 +0200 Subject: [PATCH 3/4] document --- R/stat-bin.R | 4 ++++ man/geom_histogram.Rd | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/R/stat-bin.R b/R/stat-bin.R index 65ea8b73b8..9c571ae519 100644 --- a/R/stat-bin.R +++ b/R/stat-bin.R @@ -26,6 +26,10 @@ #' or left edges of bins are included in the bin. #' @param pad If `TRUE`, adds empty bins at either end of x. This ensures #' frequency polygons touch 0. Defaults to `FALSE`. +#' @param keep.zeroes Treatment of zero count bins. If `"all"` (default), such +#' bins are kept as-is. If `"none"`, all zero count bins are filtered out. +#' If `"inner"` only zero count bins at the flanks are filtered out, but not +#' in the middle. #' @eval rd_computed_vars( #' count = "number of points in bin.", #' density = "density of points in bin, scaled to integrate to 1.", diff --git a/man/geom_histogram.Rd b/man/geom_histogram.Rd index 1f290dbcdc..500ae4853c 100644 --- a/man/geom_histogram.Rd +++ b/man/geom_histogram.Rd @@ -46,6 +46,7 @@ stat_bin( closed = c("right", "left"), pad = FALSE, na.rm = FALSE, + keep.zeroes = "all", orientation = NA, show.legend = NA, inherit.aes = TRUE @@ -172,6 +173,11 @@ or left edges of bins are included in the bin.} \item{pad}{If \code{TRUE}, adds empty bins at either end of x. This ensures frequency polygons touch 0. Defaults to \code{FALSE}.} + +\item{keep.zeroes}{Treatment of zero count bins. If \code{"all"} (default), such +bins are kept as-is. If \code{"none"}, all zero count bins are filtered out. +If \code{"inner"} only zero count bins at the flanks are filtered out, but not +in the middle.} } \description{ Visualise the distribution of a single continuous variable by dividing From e2c058718bada18c06154a199c234e14a1e4e1ad Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Tue, 15 Oct 2024 14:11:42 +0200 Subject: [PATCH 4/4] add news bullet --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index f369879868..41fd59e717 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # ggplot2 (development version) +* Added `keep.zeroes` argument to `stat_bin()` (@teunbrand, #3449) * Fixed bug where the `ggplot2::`-prefix did not work with `stage()` (@teunbrand, #6104). * New `get_labs()` function for retrieving completed plot labels