diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 4ff3cc728f7f7..d36ddc075b91e 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1420,6 +1420,7 @@ Groupby/Resample/Rolling - Bug in :meth:`DataFrame.expanding` in which the ``axis`` argument was not being respected during aggregations (:issue:`23372`) - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` which caused missing values when the input function can accept a :class:`DataFrame` but renames it (:issue:`23455`). - Bug in :func:`pandas.core.groupby.GroupBy.nth` where column order was not always preserved (:issue:`20760`) +- Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.rank` with ``method='dense'`` and ``pct=True`` when a group has only one member would raise a ``ZeroDivisionError`` (:issue:`23666`). Reshaping ^^^^^^^^^ diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 523d43f893aad..abac9f147848e 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -587,7 +587,7 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, # rankings, so we assign them percentages of NaN. if out[i, 0] != out[i, 0] or out[i, 0] == NAN: out[i, 0] = NAN - else: + elif grp_sizes[i, 0] != 0: out[i, 0] = out[i, 0] / grp_sizes[i, 0] {{endif}} {{endfor}} diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index e7e91572c56d1..aaac614761083 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -290,3 +290,18 @@ def test_rank_empty_group(): result = df.groupby(column).rank(pct=True) expected = DataFrame({"B": [0.5, np.nan, 1.0]}) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("input_key,input_value,output_value", [ + ([1, 2], [1, 1], [1.0, 1.0]), + ([1, 1, 2, 2], [1, 2, 1, 2], [0.5, 1.0, 0.5, 1.0]), + ([1, 1, 2, 2], [1, 2, 1, np.nan], [0.5, 1.0, 1.0, np.nan]), + ([1, 1, 2], [1, 2, np.nan], [0.5, 1.0, np.nan]) +]) +def test_rank_zero_div(input_key, input_value, output_value): + # GH 23666 + df = DataFrame({"A": input_key, "B": input_value}) + + result = df.groupby("A").rank(method="dense", pct=True) + expected = DataFrame({"B": output_value}) + tm.assert_frame_equal(result, expected)