diff --git a/NEWS.md b/NEWS.md index c1d127b555..83e411a4dd 100644 --- a/NEWS.md +++ b/NEWS.md @@ -188,6 +188,7 @@ * The ellipsis argument is now checked in `fortify()`, `get_alt_text()`, `labs()` and several guides (@teunbrand, #3196). * `stat_summary_bin()` no longer ignores `width` parameter (@teunbrand, #4647). +* Added `keep.zeroes` argument to `stat_bin()` (@teunbrand, #3449) # ggplot2 3.5.1 diff --git a/R/stat-bin.R b/R/stat-bin.R index c085f818a2..9c571ae519 100644 --- a/R/stat-bin.R +++ b/R/stat-bin.R @@ -26,6 +26,10 @@ #' or left edges of bins are included in the bin. #' @param pad If `TRUE`, adds empty bins at either end of x. This ensures #' frequency polygons touch 0. Defaults to `FALSE`. +#' @param keep.zeroes Treatment of zero count bins. If `"all"` (default), such +#' bins are kept as-is. If `"none"`, all zero count bins are filtered out. +#' If `"inner"` only zero count bins at the flanks are filtered out, but not +#' in the middle. #' @eval rd_computed_vars( #' count = "number of points in bin.", #' density = "density of points in bin, scaled to integrate to 1.", @@ -55,6 +59,7 @@ stat_bin <- function(mapping = NULL, data = NULL, closed = c("right", "left"), pad = FALSE, na.rm = FALSE, + keep.zeroes = "all", orientation = NA, show.legend = NA, inherit.aes = TRUE) { @@ -77,6 +82,7 @@ stat_bin <- function(mapping = NULL, data = NULL, pad = pad, na.rm = na.rm, orientation = orientation, + keep.zeroes = keep.zeroes, ... ) ) @@ -89,6 +95,10 @@ stat_bin <- function(mapping = NULL, data = NULL, StatBin <- ggproto("StatBin", Stat, setup_params = function(self, data, params) { params$flipped_aes <- has_flipped_aes(data, params, main_is_orthogonal = FALSE) + params$keep.zeroes <- arg_match0( + params$keep.zeroes %||% "all", + c("all", "none", "inner"), arg_nm = "keep.zeroes" + ) has_x <- !(is.null(data$x) && is.null(params$x)) has_y <- !(is.null(data$y) && is.null(params$y)) @@ -139,7 +149,7 @@ StatBin <- ggproto("StatBin", Stat, compute_group = function(data, scales, binwidth = NULL, bins = NULL, center = NULL, boundary = NULL, closed = c("right", "left"), pad = FALSE, - breaks = NULL, flipped_aes = FALSE, + breaks = NULL, flipped_aes = FALSE, keep.zeroes = "all", # The following arguments are not used, but must # be listed so parameters are computed correctly origin = NULL, right = NULL, drop = NULL) { @@ -163,6 +173,14 @@ StatBin <- ggproto("StatBin", Stat, boundary = boundary, closed = closed) } bins <- bin_vector(data[[x]], bins, weight = data$weight, pad = pad) + + keep <- switch( + keep.zeroes, + none = bins$count != 0, + inner = inner_runs(bins$count != 0), + TRUE + ) + bins <- vec_slice(bins, keep) bins$flipped_aes <- flipped_aes flip_data(bins, flipped_aes) }, @@ -174,3 +192,12 @@ StatBin <- ggproto("StatBin", Stat, dropped_aes = "weight" # after statistical transformation, weights are no longer available ) +inner_runs <- function(x) { + rle <- vec_unrep(x) + nruns <- nrow(rle) + inner <- rep(TRUE, nruns) + i <- unique(c(1, nruns)) + inner[i] <- inner[i] & rle$key[i] + rep(inner, rle$times) +} + diff --git a/man/geom_histogram.Rd b/man/geom_histogram.Rd index a241aa2ba4..32f9c39610 100644 --- a/man/geom_histogram.Rd +++ b/man/geom_histogram.Rd @@ -46,6 +46,7 @@ stat_bin( closed = c("right", "left"), pad = FALSE, na.rm = FALSE, + keep.zeroes = "all", orientation = NA, show.legend = NA, inherit.aes = TRUE @@ -172,6 +173,11 @@ or left edges of bins are included in the bin.} \item{pad}{If \code{TRUE}, adds empty bins at either end of x. This ensures frequency polygons touch 0. Defaults to \code{FALSE}.} + +\item{keep.zeroes}{Treatment of zero count bins. If \code{"all"} (default), such +bins are kept as-is. If \code{"none"}, all zero count bins are filtered out. +If \code{"inner"} only zero count bins at the flanks are filtered out, but not +in the middle.} } \description{ Visualise the distribution of a single continuous variable by dividing diff --git a/tests/testthat/test-stat-bin.R b/tests/testthat/test-stat-bin.R index 5baedf9223..a114748daf 100644 --- a/tests/testthat/test-stat-bin.R +++ b/tests/testthat/test-stat-bin.R @@ -118,6 +118,20 @@ test_that("stat_bin() provides width (#3522)", { expect_equal(out$xmax - out$xmin, rep(binwidth, 10)) }) +test_that("stat_bin(keep.zeroes) options work as intended", { + p <- ggplot(data.frame(x = c(1, 2, 2, 3, 5, 6, 6, 7)), aes(x)) + + scale_x_continuous(limits = c(-1, 9)) + + ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "all")) + expect_equal(ld$x, -1:9) + + ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "inner")) + expect_equal(ld$x, c(1:7)) + + ld <- layer_data(p + geom_histogram(binwidth = 1, keep.zeroes = "none")) + expect_equal(ld$x, c(1:3, 5:7)) +}) + # Underlying binning algorithm -------------------------------------------- test_that("bins() computes fuzz with non-finite breaks", {