From 93b0dc4dc498282476f8c2f20be015aa334e902d Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 12 Feb 2022 14:21:03 -0800 Subject: [PATCH 1/3] BUG: Series[dt64].__setitem__ with all-false mask incorrectly upcasting --- pandas/core/internals/blocks.py | 44 +++++++++++++++----- pandas/tests/frame/indexing/test_where.py | 12 +++++- pandas/tests/series/indexing/test_setitem.py | 15 +++++++ 3 files changed, 58 insertions(+), 13 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d5ea257af353e..03f4fda01327e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1477,26 +1477,48 @@ def putmask(self, mask, new) -> list[Block]: new = self._maybe_squeeze_arg(new) mask = self._maybe_squeeze_arg(mask) + if not mask.any(): + return [self] + try: # Caller is responsible for ensuring matching lengths values._putmask(mask, new) except (TypeError, ValueError) as err: _catch_deprecated_value_error(err) - if is_interval_dtype(self.dtype): - # Discussion about what we want to support in the general - # case GH#39584 - blk = self.coerce_to_target_dtype(orig_new) - return blk.putmask(orig_mask, orig_new) + if self.ndim == 1 or self.shape[0] == 1: - elif isinstance(self, NDArrayBackedExtensionBlock): - # NB: not (yet) the same as - # isinstance(values, NDArrayBackedExtensionArray) - blk = self.coerce_to_target_dtype(orig_new) - return blk.putmask(orig_mask, orig_new) + if is_interval_dtype(self.dtype): + # Discussion about what we want to support in the general + # case GH#39584 + blk = self.coerce_to_target_dtype(orig_new) + return blk.putmask(orig_mask, orig_new) + + elif isinstance(self, NDArrayBackedExtensionBlock): + # NB: not (yet) the same as + # isinstance(values, NDArrayBackedExtensionArray) + blk = self.coerce_to_target_dtype(orig_new) + return blk.putmask(orig_mask, orig_new) + + else: + raise else: - raise + # Same pattern we use in Block.putmask + is_array = isinstance(orig_new, (np.ndarray, ExtensionArray)) + + res_blocks = [] + nbs = self._split() + for i, nb in enumerate(nbs): + n = orig_new + if is_array: + # we have a different value per-column + n = orig_new[:, i : i + 1] + + submask = orig_mask[:, i : i + 1] + rbs = nb.putmask(submask, n) + res_blocks.extend(rbs) + return res_blocks return [self] diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 8214d8781a05a..ca050a7d7db4a 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -988,8 +988,16 @@ def _check_where_equivalences(df, mask, other, expected): res = df.mask(~mask, other) tm.assert_frame_equal(res, expected) - # Note: we cannot do the same with frame.mask(~mask, other, inplace=True) - # bc that goes through Block.putmask which does *not* downcast. + # Note: frame.mask(~mask, other, inplace=True) takes some more work bc + # Block.putmask does *not* downcast. The change to 'expected' here + # is specific to the cases in test_where_dt64_2d. + df = df.copy() + df.mask(~mask, other, inplace=True) + if not mask.all(): + # with mask.all(), Block.putmask is a no-op, so does not downcast + expected = expected.copy() + expected["A"] = expected["A"].astype(object) + tm.assert_frame_equal(df, expected) def test_where_dt64_2d(): diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index c270584342491..dd83ac49eda86 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1625,3 +1625,18 @@ def test_setitem_bool_indexer_dont_broadcast_length1_values(size, mask, item, bo expected = Series(np.arange(size, dtype=float)) expected[selection] = item tm.assert_series_equal(ser, expected) + + +def test_setitem_empty_mask_dont_upcast_dt64(): + dti = date_range("2016-01-01", periods=3) + ser = Series(dti) + orig = ser.copy() + mask = np.zeros(3, dtype=bool) + + ser[mask] = "foo" + assert ser.dtype == dti.dtype # no-op -> dont upcast + tm.assert_series_equal(ser, orig) + + ser.mask(mask, "foo", inplace=True) + assert ser.dtype == dti.dtype # no-op -> dont upcast + tm.assert_series_equal(ser, orig) From 6b401550c3d422c86ebeeae1adaa9591b411097e Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Feb 2022 10:27:50 -0800 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index a35ca589065d8..6ab7b6b0f03f5 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -324,6 +324,7 @@ Indexing - Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`) - Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`) - Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtpye :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`) +- Bug in :meth:`Series.__setitem__` with ``datetime64[ns]`` dtype, an all-``False`` boolean mask, and an incompatible value incorrectly casting to ``object`` instead of retaining ``datetime64[ns]`` dtype (:issue:`45967`) - Bug in :meth:`Series.mask` with ``inplace=True`` or setting values with a boolean mask with small integer dtypes incorrectly raising (:issue:`45750`) - Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`) - Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`) From faf22ed1e1f0326c58b45ebe87429596f289be58 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 14 Feb 2022 18:28:09 -0800 Subject: [PATCH 3/3] compat for old numpy --- pandas/core/array_algos/putmask.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py index 03fb2d78cb5d4..d0779b48ae094 100644 --- a/pandas/core/array_algos/putmask.py +++ b/pandas/core/array_algos/putmask.py @@ -81,7 +81,11 @@ def putmask_without_repeat( # TODO: this prob needs some better checking for 2D cases nlocs = mask.sum() if nlocs > 0 and is_list_like(new) and getattr(new, "ndim", 1) == 1: - if nlocs == len(new): + shape = np.shape(new) + # np.shape compat for if setitem_datetimelike_compat + # changed arraylike to list e.g. test_where_dt64_2d + + if nlocs == shape[-1]: # GH#30567 # If length of ``new`` is less than the length of ``values``, # `np.putmask` would first repeat the ``new`` array and then @@ -90,7 +94,7 @@ def putmask_without_repeat( # to place in the masked locations of ``values`` np.place(values, mask, new) # i.e. values[mask] = new - elif mask.shape[-1] == len(new) or len(new) == 1: + elif mask.shape[-1] == shape[-1] or shape[-1] == 1: np.putmask(values, mask, new) else: raise ValueError("cannot assign mismatch length to masked array")