From ba94748ede7bdd721e24900122e23bca50aa1471 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 12 Jun 2020 11:29:56 -0500 Subject: [PATCH 1/5] BUG: Fixed Series.replace for EA with casting Closes https://github.com/pandas-dev/pandas/issues/34530 --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/internals/blocks.py | 6 +++++- pandas/tests/extension/base/methods.py | 6 ++++++ pandas/tests/series/methods/test_replace.py | 5 +++++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 92f7c0f6b59a3..17ba643f18ed2 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1003,6 +1003,7 @@ Reshaping - Bug where :meth:`Index.astype` would lose the name attribute when converting from ``Float64Index`` to ``Int64Index``, or when casting to an ``ExtensionArray`` dtype (:issue:`32013`) - :meth:`Series.append` will now raise a ``TypeError`` when passed a DataFrame or a sequence containing Dataframe (:issue:`31413`) - :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`) +- Bug in :meth:`~DataFrame.replace` raising an ``AssertionError`` when replacing values in an extension dtype with values of a different dtype (:issue:`34530`) - Bug on inplace operation of a Series that was adding a column to the DataFrame from where it was originally dropped from (using inplace=True) (:issue:`30484`) - Bug in :meth:`DataFrame.apply` where callback was called with :class:`Series` parameter even though ``raw=True`` requested. (:issue:`32423`) - Bug in :meth:`DataFrame.pivot_table` losing timezone information when creating a :class:`MultiIndex` level from a column with timezone-aware dtype (:issue:`32558`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e2a778f729470..8e7cf105ef487 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -745,7 +745,11 @@ def replace( if is_object_dtype(self): raise - assert not self._can_hold_element(value), value + if not self.is_extension: + # TODO: https://github.com/pandas-dev/pandas/issues/32586 + # Need an ExtensionArray._can_hold_element to indicate whether + # a scalar value can be placed in the array. + assert not self._can_hold_element(value), value # try again with a compatible block block = self.astype(object) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 874a8dfd4253f..1282e30b83dd4 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -452,3 +452,9 @@ def test_equals(self, data, na_value, as_series, box): # other types assert data.equals(None) is False assert data[[0]].equals(data[0]) is False + + def test_replace_nonsense(self, data): + # https://github.com/pandas-dev/pandas/issues/34530 + ser = pd.Series(data) + ser.replace("", "") # no exception + ser.to_frame().replace("", "") # no exception diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 330c682216f53..8f57cf3191d5d 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -402,3 +402,8 @@ def test_replace_only_one_dictlike_arg(self): msg = "Series.replace cannot use dict-value and non-None to_replace" with pytest.raises(ValueError, match=msg): ser.replace(to_replace, value) + + def test_replace_extension_other(self): + # https://github.com/pandas-dev/pandas/issues/34530 + ser = pd.Series(pd.array([1, 2, 3], dtype="Int64")) + ser.replace("", "") # no exception From 6d08740c541a165bcca72ee0edd17ea26a36318d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 12 Jun 2020 11:56:59 -0500 Subject: [PATCH 2/5] PERF: Fixed perf regression in TimedeltaIndex.get_loc Closes https://github.com/pandas-dev/pandas/issues/34510 --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/arrays/datetimelike.py | 10 ++++++++-- pandas/core/indexes/timedeltas.py | 3 +-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 17ba643f18ed2..1173370e0d02d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -760,6 +760,7 @@ Deprecations Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- Fixed performance regression in :meth:`TimedeltaIndex.get_loc` (:issue:`34510`) - Performance improvement in :class:`Timedelta` constructor (:issue:`30543`) - Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) - Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 8af23815b54ef..1fea6ca1b8a3d 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -776,15 +776,19 @@ def _validate_shift_value(self, fill_value): return self._unbox(fill_value) - def _validate_scalar(self, value, msg: str, cast_str: bool = False): + def _validate_scalar( + self, value, msg: Optional[str] = None, cast_str: bool = False + ): """ Validate that the input value can be cast to our scalar_type. Parameters ---------- value : object - msg : str + msg : str, optional. Message to raise in TypeError on invalid input. + If not provided, `value` is cast to a str and used + as the message. cast_str : bool, default False Whether to try to parse string input to scalar_type. @@ -807,6 +811,8 @@ def _validate_scalar(self, value, msg: str, cast_str: bool = False): value = self._scalar_type(value) # type: ignore else: + if msg is None: + msg = str(value) raise TypeError(msg) return value diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index ce3ff17814a25..c5ad0bf0d5ddf 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -212,9 +212,8 @@ def get_loc(self, key, method=None, tolerance=None): if not is_scalar(key): raise InvalidIndexError(key) - msg = str(key) try: - key = self._data._validate_scalar(key, msg, cast_str=True) + key = self._data._validate_scalar(key, cast_str=True) except TypeError as err: raise KeyError(key) from err From 1eb3469b70d523fed5779170b3f773c04375105e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 12 Jun 2020 11:58:37 -0500 Subject: [PATCH 3/5] Revert "PERF: Fixed perf regression in TimedeltaIndex.get_loc" This reverts commit 6d08740c541a165bcca72ee0edd17ea26a36318d. --- doc/source/whatsnew/v1.1.0.rst | 1 - pandas/core/arrays/datetimelike.py | 10 ++-------- pandas/core/indexes/timedeltas.py | 3 ++- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 1173370e0d02d..17ba643f18ed2 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -760,7 +760,6 @@ Deprecations Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- Fixed performance regression in :meth:`TimedeltaIndex.get_loc` (:issue:`34510`) - Performance improvement in :class:`Timedelta` constructor (:issue:`30543`) - Performance improvement in :class:`Timestamp` constructor (:issue:`30543`) - Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1fea6ca1b8a3d..8af23815b54ef 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -776,19 +776,15 @@ def _validate_shift_value(self, fill_value): return self._unbox(fill_value) - def _validate_scalar( - self, value, msg: Optional[str] = None, cast_str: bool = False - ): + def _validate_scalar(self, value, msg: str, cast_str: bool = False): """ Validate that the input value can be cast to our scalar_type. Parameters ---------- value : object - msg : str, optional. + msg : str Message to raise in TypeError on invalid input. - If not provided, `value` is cast to a str and used - as the message. cast_str : bool, default False Whether to try to parse string input to scalar_type. @@ -811,8 +807,6 @@ def _validate_scalar( value = self._scalar_type(value) # type: ignore else: - if msg is None: - msg = str(value) raise TypeError(msg) return value diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index c5ad0bf0d5ddf..ce3ff17814a25 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -212,8 +212,9 @@ def get_loc(self, key, method=None, tolerance=None): if not is_scalar(key): raise InvalidIndexError(key) + msg = str(key) try: - key = self._data._validate_scalar(key, cast_str=True) + key = self._data._validate_scalar(key, msg, cast_str=True) except TypeError as err: raise KeyError(key) from err From 10087282bace7dab1525c75e6a5e2c8c1f71957e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 12 Jun 2020 12:42:00 -0500 Subject: [PATCH 4/5] revert base --- pandas/tests/extension/base/methods.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 1282e30b83dd4..874a8dfd4253f 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -452,9 +452,3 @@ def test_equals(self, data, na_value, as_series, box): # other types assert data.equals(None) is False assert data[[0]].equals(data[0]) is False - - def test_replace_nonsense(self, data): - # https://github.com/pandas-dev/pandas/issues/34530 - ser = pd.Series(data) - ser.replace("", "") # no exception - ser.to_frame().replace("", "") # no exception From 7ad9c1e60ec74ead03c867369ac55c8ac78d4f01 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 15 Jun 2020 13:05:29 -0500 Subject: [PATCH 5/5] move note --- doc/source/whatsnew/v1.0.5.rst | 2 ++ doc/source/whatsnew/v1.1.0.rst | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.5.rst b/doc/source/whatsnew/v1.0.5.rst index 7dfac54279e6f..fdf08dd381050 100644 --- a/doc/source/whatsnew/v1.0.5.rst +++ b/doc/source/whatsnew/v1.0.5.rst @@ -24,6 +24,8 @@ Note this disables the ability to read Parquet files from directories on S3 again (:issue:`26388`, :issue:`34632`), which was added in the 1.0.4 release, but is now targeted for pandas 1.1.0. +- Fixed regression in :meth:`~DataFrame.replace` raising an ``AssertionError`` when replacing values in an extension dtype with values of a different dtype (:issue:`34530`) + .. _whatsnew_105.bug_fixes: Bug fixes diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9dcf754a5d7b0..0c746b197c5b8 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1030,7 +1030,6 @@ Reshaping - Bug where :meth:`Index.astype` would lose the name attribute when converting from ``Float64Index`` to ``Int64Index``, or when casting to an ``ExtensionArray`` dtype (:issue:`32013`) - :meth:`Series.append` will now raise a ``TypeError`` when passed a DataFrame or a sequence containing Dataframe (:issue:`31413`) - :meth:`DataFrame.replace` and :meth:`Series.replace` will raise a ``TypeError`` if ``to_replace`` is not an expected type. Previously the ``replace`` would fail silently (:issue:`18634`) -- Bug in :meth:`~DataFrame.replace` raising an ``AssertionError`` when replacing values in an extension dtype with values of a different dtype (:issue:`34530`) - Bug on inplace operation of a Series that was adding a column to the DataFrame from where it was originally dropped from (using inplace=True) (:issue:`30484`) - Bug in :meth:`DataFrame.apply` where callback was called with :class:`Series` parameter even though ``raw=True`` requested. (:issue:`32423`) - Bug in :meth:`DataFrame.pivot_table` losing timezone information when creating a :class:`MultiIndex` level from a column with timezone-aware dtype (:issue:`32558`)