diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index 47ed073d12f1c..ddd3a8cf1ecb7 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -17,6 +17,7 @@ Fixed regressions - Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`) - Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`) - Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`) +- Fixed regression in :meth:`.GroupBy.apply` where ``nan`` values were dropped even with ``dropna=False`` (:issue:`43205`) - Fixed regression in :meth:`merge` where ``on`` columns with ``ExtensionDtype`` or ``bool`` data types were cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`) - Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`) - Fixed regression in :meth:`read_parquet` where the ``fastparquet`` engine would not work properly with fastparquet 0.7.0 (:issue:`43075`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5a70db517ad12..6e8fbae77b4f2 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1012,7 +1012,11 @@ def reset_identity(values): if not not_indexed_same: result = concat(values, axis=self.axis) - ax = self.filter(lambda x: True).axes[self.axis] + ax = ( + self.filter(lambda x: True).axes[self.axis] + if self.dropna + else self._selected_obj._get_axis(self.axis) + ) # this is a very unfortunate situation # we can't use reindex to restore the original order diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 2007e60dbc5d0..fcb5e5a1ee9f5 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1102,9 +1102,10 @@ def test_apply_by_cols_equals_apply_by_rows_transposed(): tm.assert_frame_equal(by_cols, df) -def test_apply_dropna_with_indexed_same(): +@pytest.mark.parametrize("dropna", [True, False]) +def test_apply_dropna_with_indexed_same(dropna): # GH 38227 - + # GH#43205 df = DataFrame( { "col": [1, 2, 3, 4, 5], @@ -1112,15 +1113,8 @@ def test_apply_dropna_with_indexed_same(): }, index=list("xxyxz"), ) - result = df.groupby("group").apply(lambda x: x) - expected = DataFrame( - { - "col": [1, 4, 5], - "group": ["a", "b", "b"], - }, - index=list("xxz"), - ) - + result = df.groupby("group", dropna=dropna).apply(lambda x: x) + expected = df.dropna() if dropna else df.iloc[[0, 3, 1, 2, 4]] tm.assert_frame_equal(result, expected)