From 948bf6d0693bfecbaf1a4aa60bf539d84380b36a Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 2 Feb 2023 18:59:42 +0100 Subject: [PATCH 1/5] DOC: remove inplace usage from docstring examples --- pandas/core/arrays/categorical.py | 7 ----- pandas/core/frame.py | 18 ------------ pandas/core/indexes/base.py | 7 ----- pandas/core/series.py | 46 ++----------------------------- 4 files changed, 2 insertions(+), 76 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f2e54185c11ff..7197994bfbcb1 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1673,13 +1673,6 @@ def sort_values( [5, 2, 2, 1, 1] Categories (3, int64): [1, 2, 5] - Inplace sorting can be done as well: - - >>> c.sort_values(inplace=True) - >>> c - [1, 1, 2, 2, 5] - Categories (3, int64): [1, 2, 5] - >>> >>> c = pd.Categorical([1, 2, 2, 1, 5]) 'sort_values' behaviour with NaNs. Note that 'na_position' diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 36a7ef7cd6d9e..44550169f1f20 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4512,17 +4512,6 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: 3 4 4 4 5 2 - Use ``inplace=True`` to modify the original DataFrame. - - >>> df.eval('C = A + B', inplace=True) - >>> df - A B C - 0 1 10 11 - 1 2 8 10 - 2 3 6 9 - 3 4 4 8 - 4 5 2 7 - Multiple columns can be assigned to using multi-line expressions: >>> df.eval( @@ -6372,13 +6361,6 @@ def dropna( name toy born 1 Batman Batmobile 1940-04-25 2 Catwoman Bullwhip NaT - - Keep the DataFrame with valid entries in the same variable. - - >>> df.dropna(inplace=True) - >>> df - name toy born - 1 Batman Batmobile 1940-04-25 """ if (how is not no_default) and (thresh is not no_default): raise TypeError( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 881e83313ced5..40e6a593678da 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1745,13 +1745,6 @@ def set_names( ( 'cobra', 2018), ( 'cobra', 2019)], ) - >>> idx.set_names(['kind', 'year'], inplace=True) - >>> idx - MultiIndex([('python', 2018), - ('python', 2019), - ( 'cobra', 2018), - ( 'cobra', 2019)], - names=['kind', 'year']) >>> idx.set_names('species', level=0) MultiIndex([('python', 2018), ('python', 2019), diff --git a/pandas/core/series.py b/pandas/core/series.py index 63420309f33fc..05c5f18aa1faa 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1494,17 +1494,6 @@ def reset_index( 3 4 Name: foo, dtype: int64 - To update the Series in place, without generating a new one - set `inplace` to True. Note that it also requires ``drop=True``. - - >>> s.reset_index(inplace=True, drop=True) - >>> s - 0 1 - 1 2 - 2 3 - 3 4 - Name: foo, dtype: int64 - The `level` parameter is interesting for Series with a multi-level index. @@ -2242,11 +2231,9 @@ def drop_duplicates( Name: animal, dtype: object The value ``False`` for parameter 'keep' discards all sets of - duplicated entries. Setting the value of 'inplace' to ``True`` performs - the operation inplace and returns ``None``. + duplicated entries. - >>> s.drop_duplicates(keep=False, inplace=True) - >>> s + >>> s.drop_duplicates(keep=False) 1 cow 3 beetle 5 hippo @@ -3490,17 +3477,6 @@ def sort_values( 0 NaN dtype: float64 - Sort values inplace - - >>> s.sort_values(ascending=False, inplace=True) - >>> s - 3 10.0 - 4 5.0 - 2 3.0 - 1 1.0 - 0 NaN - dtype: float64 - Sort values putting NAs first >>> s.sort_values(na_position='first') @@ -3750,16 +3726,6 @@ def sort_index( 1 c dtype: object - Sort Inplace - - >>> s.sort_index(inplace=True) - >>> s - 1 c - 2 b - 3 a - 4 d - dtype: object - By default NaNs are put at the end, but use `na_position` to place them at the beginning @@ -5601,14 +5567,6 @@ def dropna( 1 2.0 dtype: float64 - Keep the Series with valid entries in the same variable. - - >>> ser.dropna(inplace=True) - >>> ser - 0 1.0 - 1 2.0 - dtype: float64 - Empty strings are not considered NA values. ``None`` is considered an NA value. From 4e5fdb7aabf6fd961565bb1df5b8bb2a00462725 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 2 Feb 2023 21:25:02 +0100 Subject: [PATCH 2/5] Remove copy examples --- pandas/core/frame.py | 7 ------- pandas/core/generic.py | 11 ----------- 2 files changed, 18 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 44550169f1f20..496a69bc1eeba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4988,13 +4988,6 @@ def align( 1 2 5 2 3 6 - Now, update the labels without copying the underlying data. - - >>> df.set_axis(['i', 'ii'], axis='columns', copy=False) - i ii - 0 1 4 - 1 2 5 - 2 3 6 """ ) @Substitution( diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8fa86e80e1a44..6211248129ee6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6216,17 +6216,6 @@ def astype( dtype: category Categories (2, int64): [2 < 1] - Note that using ``copy=False`` and changing data on a new - pandas object may propagate changes: - - >>> s1 = pd.Series([1, 2]) - >>> s2 = s1.astype('int64', copy=False) - >>> s2[0] = 10 - >>> s1 # note that s1[0] has changed too - 0 10 - 1 2 - dtype: int64 - Create a series of dates: >>> ser_date = pd.Series(pd.date_range('20200101', periods=3)) From 201f87795bfc281fa5b0663c9c82ef6afffea01e Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 2 Feb 2023 23:14:01 +0100 Subject: [PATCH 3/5] Fix some warnings --- pandas/compat/numpy/__init__.py | 1 + pandas/core/arrays/masked.py | 13 ++++++++++--- pandas/core/dtypes/astype.py | 5 ++++- pandas/core/dtypes/cast.py | 4 +++- pandas/core/indexes/base.py | 12 +++++++++--- pandas/tests/io/parser/test_c_parser_only.py | 17 ++++++++++------- pandas/tests/series/test_constructors.py | 4 ++-- 7 files changed, 39 insertions(+), 17 deletions(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 60ec74553a207..6f31358dabe86 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -9,6 +9,7 @@ np_version_under1p21 = _nlv < Version("1.21") np_version_under1p22 = _nlv < Version("1.22") np_version_gte1p22 = _nlv >= Version("1.22") +np_version_gte1p24 = _nlv >= Version("1.24") is_numpy_dev = _nlv.dev is not None _min_numpy_ver = "1.20.3" diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 8324d4b2618f1..e38886ebfa16b 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -428,10 +428,14 @@ def to_numpy( "for this dtype." ) # don't pass copy to astype -> always need a copy since we are mutating - data = self._data.astype(dtype) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + data = self._data.astype(dtype) data[self._mask] = na_value else: - data = self._data.astype(dtype, copy=copy) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + data = self._data.astype(dtype, copy=copy) return data @doc(ExtensionArray.tolist) @@ -464,7 +468,10 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: # if we are astyping to another nullable masked dtype, we can fastpath if isinstance(dtype, BaseMaskedDtype): # TODO deal with NaNs for FloatingArray case - data = self._data.astype(dtype.numpy_dtype, copy=copy) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + # TODO: Is rounding what we want long term? + data = self._data.astype(dtype.numpy_dtype, copy=copy) # mask is copied depending on whether the data was copied, and # not directly depending on the `copy` keyword mask = self._mask if data is self._data else self._mask.copy() diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index e5b0b5658534f..59828ea9bb823 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -9,6 +9,7 @@ TYPE_CHECKING, overload, ) +import warnings import numpy as np @@ -153,7 +154,9 @@ def _astype_float_to_int_nansafe( # GH#45151 if not (values >= 0).all(): raise ValueError(f"Cannot losslessly cast from {values.dtype} to {dtype}") - return values.astype(dtype, copy=copy) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + return values.astype(dtype, copy=copy) def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> ArrayLike: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3be89f6da2bd8..14b57a5f4642c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1606,7 +1606,9 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n ) casted = np.array(arr, dtype=dtype, copy=False) else: - casted = arr.astype(dtype, copy=False) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + casted = arr.astype(dtype, copy=False) except OverflowError as err: raise OverflowError( "The elements provided in the data cannot all be " diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 13afd6e5be664..efc35d2175b4f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3874,9 +3874,15 @@ def _get_fill_indexer( # but that doesn't appear to be enforced # error: "IndexEngine" has no attribute "get_indexer_with_fill" engine = self._engine - return engine.get_indexer_with_fill( # type: ignore[union-attr] - target=target._values, values=self._values, method=method, limit=limit - ) + with warnings.catch_warnings(): + # TODO: We need to fix this. Casting to int64 in cython + warnings.filterwarnings("ignore") + return engine.get_indexer_with_fill( # type: ignore[union-attr] + target=target._values, + values=self._values, + method=method, + limit=limit, + ) if self.is_monotonic_increasing and target.is_monotonic_increasing: target_values = target._get_engine_target() diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index d5a7610ecb8a9..4c6bc3583676a 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -21,6 +21,7 @@ IS64, is_ci_environment, ) +from pandas.compat.numpy import np_version_gte1p24 from pandas.errors import ParserError import pandas.util._test_decorators as td @@ -114,14 +115,16 @@ def test_dtype_and_names_error(c_parser_only): 3.0 3 """ # fallback casting, but not castable + warning = RuntimeWarning if np_version_gte1p24 else None with pytest.raises(ValueError, match="cannot safely convert"): - parser.read_csv( - StringIO(data), - sep=r"\s+", - header=None, - names=["a", "b"], - dtype={"a": np.int32}, - ) + with tm.assert_produces_warning(warning, check_stacklevel=False): + parser.read_csv( + StringIO(data), + sep=r"\s+", + header=None, + names=["a", "b"], + dtype={"a": np.int32}, + ) @pytest.mark.parametrize( diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 7f65bce873126..99f7f06b2fc89 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -798,7 +798,7 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series): # Long-standing behavior (for Series, new in 2.0 for DataFrame) # has been to ignore the dtype on these; # not clear if this is what we want long-term - expected = frame_or_series(arr) + # expected = frame_or_series(arr) # GH#49599 as of 2.0 we raise instead of silently retaining float dtype msg = "Trying to coerce float values to integer" @@ -810,7 +810,7 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series): # pre-2.0, when we had NaNs, we silently ignored the integer dtype arr[0] = np.nan - expected = frame_or_series(arr) + # expected = frame_or_series(arr) msg = r"Cannot convert non-finite values \(NA or inf\) to integer" with pytest.raises(IntCastingNaNError, match=msg): From ce3c59010c292a851fb343b767ba2595e31a9266 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 3 Feb 2023 10:26:16 +0100 Subject: [PATCH 4/5] Fix docstring --- pandas/core/frame.py | 1 - pandas/core/indexes/base.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 496a69bc1eeba..4f13ead4005e7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4987,7 +4987,6 @@ def align( 0 1 4 1 2 5 2 3 6 - """ ) @Substitution( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index efc35d2175b4f..92217b940c11c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1745,6 +1745,7 @@ def set_names( ( 'cobra', 2018), ( 'cobra', 2019)], ) + >>> idx = idx.set_names(['kind', 'year']) >>> idx.set_names('species', level=0) MultiIndex([('python', 2018), ('python', 2019), From dc7b0280bbe867563536ed2a03be5545159a52fd Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 3 Feb 2023 10:26:58 +0100 Subject: [PATCH 5/5] Revert "Fix some warnings" This reverts commit 201f87795bfc281fa5b0663c9c82ef6afffea01e. --- pandas/compat/numpy/__init__.py | 1 - pandas/core/arrays/masked.py | 13 +++---------- pandas/core/dtypes/astype.py | 5 +---- pandas/core/dtypes/cast.py | 4 +--- pandas/core/indexes/base.py | 12 +++--------- pandas/tests/io/parser/test_c_parser_only.py | 17 +++++++---------- pandas/tests/series/test_constructors.py | 4 ++-- 7 files changed, 17 insertions(+), 39 deletions(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 6f31358dabe86..60ec74553a207 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -9,7 +9,6 @@ np_version_under1p21 = _nlv < Version("1.21") np_version_under1p22 = _nlv < Version("1.22") np_version_gte1p22 = _nlv >= Version("1.22") -np_version_gte1p24 = _nlv >= Version("1.24") is_numpy_dev = _nlv.dev is not None _min_numpy_ver = "1.20.3" diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index e38886ebfa16b..8324d4b2618f1 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -428,14 +428,10 @@ def to_numpy( "for this dtype." ) # don't pass copy to astype -> always need a copy since we are mutating - with warnings.catch_warnings(): - warnings.filterwarnings("ignore") - data = self._data.astype(dtype) + data = self._data.astype(dtype) data[self._mask] = na_value else: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore") - data = self._data.astype(dtype, copy=copy) + data = self._data.astype(dtype, copy=copy) return data @doc(ExtensionArray.tolist) @@ -468,10 +464,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: # if we are astyping to another nullable masked dtype, we can fastpath if isinstance(dtype, BaseMaskedDtype): # TODO deal with NaNs for FloatingArray case - with warnings.catch_warnings(): - warnings.filterwarnings("ignore") - # TODO: Is rounding what we want long term? - data = self._data.astype(dtype.numpy_dtype, copy=copy) + data = self._data.astype(dtype.numpy_dtype, copy=copy) # mask is copied depending on whether the data was copied, and # not directly depending on the `copy` keyword mask = self._mask if data is self._data else self._mask.copy() diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 59828ea9bb823..e5b0b5658534f 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -9,7 +9,6 @@ TYPE_CHECKING, overload, ) -import warnings import numpy as np @@ -154,9 +153,7 @@ def _astype_float_to_int_nansafe( # GH#45151 if not (values >= 0).all(): raise ValueError(f"Cannot losslessly cast from {values.dtype} to {dtype}") - with warnings.catch_warnings(): - warnings.filterwarnings("ignore") - return values.astype(dtype, copy=copy) + return values.astype(dtype, copy=copy) def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> ArrayLike: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 14b57a5f4642c..3be89f6da2bd8 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1606,9 +1606,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n ) casted = np.array(arr, dtype=dtype, copy=False) else: - with warnings.catch_warnings(): - warnings.filterwarnings("ignore") - casted = arr.astype(dtype, copy=False) + casted = arr.astype(dtype, copy=False) except OverflowError as err: raise OverflowError( "The elements provided in the data cannot all be " diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 92217b940c11c..3eef90343bdf6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3875,15 +3875,9 @@ def _get_fill_indexer( # but that doesn't appear to be enforced # error: "IndexEngine" has no attribute "get_indexer_with_fill" engine = self._engine - with warnings.catch_warnings(): - # TODO: We need to fix this. Casting to int64 in cython - warnings.filterwarnings("ignore") - return engine.get_indexer_with_fill( # type: ignore[union-attr] - target=target._values, - values=self._values, - method=method, - limit=limit, - ) + return engine.get_indexer_with_fill( # type: ignore[union-attr] + target=target._values, values=self._values, method=method, limit=limit + ) if self.is_monotonic_increasing and target.is_monotonic_increasing: target_values = target._get_engine_target() diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index 4c6bc3583676a..d5a7610ecb8a9 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -21,7 +21,6 @@ IS64, is_ci_environment, ) -from pandas.compat.numpy import np_version_gte1p24 from pandas.errors import ParserError import pandas.util._test_decorators as td @@ -115,16 +114,14 @@ def test_dtype_and_names_error(c_parser_only): 3.0 3 """ # fallback casting, but not castable - warning = RuntimeWarning if np_version_gte1p24 else None with pytest.raises(ValueError, match="cannot safely convert"): - with tm.assert_produces_warning(warning, check_stacklevel=False): - parser.read_csv( - StringIO(data), - sep=r"\s+", - header=None, - names=["a", "b"], - dtype={"a": np.int32}, - ) + parser.read_csv( + StringIO(data), + sep=r"\s+", + header=None, + names=["a", "b"], + dtype={"a": np.int32}, + ) @pytest.mark.parametrize( diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 99f7f06b2fc89..7f65bce873126 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -798,7 +798,7 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series): # Long-standing behavior (for Series, new in 2.0 for DataFrame) # has been to ignore the dtype on these; # not clear if this is what we want long-term - # expected = frame_or_series(arr) + expected = frame_or_series(arr) # GH#49599 as of 2.0 we raise instead of silently retaining float dtype msg = "Trying to coerce float values to integer" @@ -810,7 +810,7 @@ def test_constructor_floating_data_int_dtype(self, frame_or_series): # pre-2.0, when we had NaNs, we silently ignored the integer dtype arr[0] = np.nan - # expected = frame_or_series(arr) + expected = frame_or_series(arr) msg = r"Cannot convert non-finite values \(NA or inf\) to integer" with pytest.raises(IntCastingNaNError, match=msg):