Skip to content

Commit 7ab6598

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into sty-private
2 parents 049b551 + 70c056b commit 7ab6598

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+375
-360
lines changed

ci/code_checks.sh

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
187187
invgrep -R --include="*.py" -E "super\(\w*, (self|cls)\)" pandas
188188
RET=$(($RET + $?)) ; echo $MSG "DONE"
189189

190+
MSG='Check for use of builtin filter function' ; echo $MSG
191+
invgrep -R --include="*.py" -P '(?<!def)[\(\s]filter\(' pandas
192+
RET=$(($RET + $?)) ; echo $MSG "DONE"
193+
190194
# Check for the following code in testing: `np.testing` and `np.array_equal`
191195
MSG='Check for invalid testing' ; echo $MSG
192196
invgrep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/
@@ -238,10 +242,9 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
238242
invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas
239243
RET=$(($RET + $?)) ; echo $MSG "DONE"
240244

241-
# https://github.com/python/mypy/issues/7384
242-
# MSG='Check for missing error codes with # type: ignore' ; echo $MSG
243-
# invgrep -R --include="*.py" -P '# type: ignore(?!\[)' pandas
244-
# RET=$(($RET + $?)) ; echo $MSG "DONE"
245+
MSG='Check for missing error codes with # type: ignore' ; echo $MSG
246+
invgrep -R --include="*.py" -P '# type:\s?ignore(?!\[)' pandas
247+
RET=$(($RET + $?)) ; echo $MSG "DONE"
245248

246249
MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG
247250
invgrep -R --include=*.{py,pyx} '\.__class__' pandas

doc/source/user_guide/missing_data.rst

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -689,32 +689,6 @@ You can also operate on the DataFrame in place:
689689
690690
df.replace(1.5, np.nan, inplace=True)
691691
692-
.. warning::
693-
694-
When replacing multiple ``bool`` or ``datetime64`` objects, the first
695-
argument to ``replace`` (``to_replace``) must match the type of the value
696-
being replaced. For example,
697-
698-
.. code-block:: python
699-
700-
>>> s = pd.Series([True, False, True])
701-
>>> s.replace({'a string': 'new value', True: False}) # raises
702-
TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
703-
704-
will raise a ``TypeError`` because one of the ``dict`` keys is not of the
705-
correct type for replacement.
706-
707-
However, when replacing a *single* object such as,
708-
709-
.. ipython:: python
710-
711-
s = pd.Series([True, False, True])
712-
s.replace('a string', 'another string')
713-
714-
the original ``NDFrame`` object will be returned untouched. We're working on
715-
unifying this API, but for backwards compatibility reasons we cannot break
716-
the latter behavior. See :issue:`6354` for more details.
717-
718692
Missing data casting rules and indexing
719693
---------------------------------------
720694

doc/source/whatsnew/v1.2.0.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,8 @@ Performance improvements
214214

215215
Bug fixes
216216
~~~~~~~~~
217-
217+
- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
218+
-
218219

219220
Categorical
220221
^^^^^^^^^^^
@@ -311,6 +312,7 @@ Groupby/resample/rolling
311312
- Bug in :meth:`DataFrameGroupby.apply` would drop a :class:`CategoricalIndex` when grouped on. (:issue:`35792`)
312313
- Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`)
313314
- Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
315+
- Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
314316
-
315317

316318
Reshaping
@@ -337,6 +339,7 @@ ExtensionArray
337339
Other
338340
^^^^^
339341
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
342+
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
340343
-
341344

342345
.. ---------------------------------------------------------------------------

pandas/_typing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
# other
6363

6464
Dtype = Union[
65-
"ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool]]
65+
"ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool, object]]
6666
]
6767
DtypeObj = Union[np.dtype, "ExtensionDtype"]
6868
FilePathOrBuffer = Union[str, Path, IO[AnyStr], IOBase]

pandas/core/algorithms.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import operator
88
from textwrap import dedent
9-
from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union
9+
from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union, cast
1010
from warnings import catch_warnings, simplefilter, warn
1111

1212
import numpy as np
@@ -60,7 +60,7 @@
6060
from pandas.core.indexers import validate_indices
6161

6262
if TYPE_CHECKING:
63-
from pandas import DataFrame, Series
63+
from pandas import Categorical, DataFrame, Series
6464

6565
_shared_docs: Dict[str, str] = {}
6666

@@ -429,8 +429,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
429429
if is_categorical_dtype(comps):
430430
# TODO(extension)
431431
# handle categoricals
432-
# error: "ExtensionArray" has no attribute "isin" [attr-defined]
433-
return comps.isin(values) # type: ignore[attr-defined]
432+
return cast("Categorical", comps).isin(values)
434433

435434
comps, dtype = _ensure_data(comps)
436435
values, _ = _ensure_data(values, dtype=dtype)

pandas/core/array_algos/replace.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""
2+
Methods used by Block.replace and related methods.
3+
"""
4+
import operator
5+
import re
6+
from typing import Optional, Pattern, Union
7+
8+
import numpy as np
9+
10+
from pandas._typing import ArrayLike, Scalar
11+
12+
from pandas.core.dtypes.common import (
13+
is_datetimelike_v_numeric,
14+
is_numeric_v_string_like,
15+
is_scalar,
16+
)
17+
from pandas.core.dtypes.missing import isna
18+
19+
20+
def compare_or_regex_search(
21+
a: ArrayLike,
22+
b: Union[Scalar, Pattern],
23+
regex: bool = False,
24+
mask: Optional[ArrayLike] = None,
25+
) -> Union[ArrayLike, bool]:
26+
"""
27+
Compare two array_like inputs of the same shape or two scalar values
28+
29+
Calls operator.eq or re.search, depending on regex argument. If regex is
30+
True, perform an element-wise regex matching.
31+
32+
Parameters
33+
----------
34+
a : array_like
35+
b : scalar or regex pattern
36+
regex : bool, default False
37+
mask : array_like or None (default)
38+
39+
Returns
40+
-------
41+
mask : array_like of bool
42+
"""
43+
44+
def _check_comparison_types(
45+
result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern]
46+
):
47+
"""
48+
Raises an error if the two arrays (a,b) cannot be compared.
49+
Otherwise, returns the comparison result as expected.
50+
"""
51+
if is_scalar(result) and isinstance(a, np.ndarray):
52+
type_names = [type(a).__name__, type(b).__name__]
53+
54+
if isinstance(a, np.ndarray):
55+
type_names[0] = f"ndarray(dtype={a.dtype})"
56+
57+
raise TypeError(
58+
f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
59+
)
60+
61+
if not regex:
62+
op = lambda x: operator.eq(x, b)
63+
else:
64+
op = np.vectorize(
65+
lambda x: bool(re.search(b, x))
66+
if isinstance(x, str) and isinstance(b, (str, Pattern))
67+
else False
68+
)
69+
70+
# GH#32621 use mask to avoid comparing to NAs
71+
if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray):
72+
mask = np.reshape(~(isna(a)), a.shape)
73+
if isinstance(a, np.ndarray):
74+
a = a[mask]
75+
76+
if is_numeric_v_string_like(a, b):
77+
# GH#29553 avoid deprecation warnings from numpy
78+
return np.zeros(a.shape, dtype=bool)
79+
80+
elif is_datetimelike_v_numeric(a, b):
81+
# GH#29553 avoid deprecation warnings from numpy
82+
_check_comparison_types(False, a, b)
83+
return False
84+
85+
result = op(a)
86+
87+
if isinstance(result, np.ndarray) and mask is not None:
88+
# The shape of the mask can differ to that of the result
89+
# since we may compare only a subset of a's or b's elements
90+
tmp = np.zeros(mask.shape, dtype=np.bool_)
91+
tmp[mask] = result
92+
result = tmp
93+
94+
_check_comparison_types(result, a, b)
95+
return result

pandas/core/arrays/categorical.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,19 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject):
280280
['a', 'b', 'c', 'a', 'b', 'c']
281281
Categories (3, object): ['a', 'b', 'c']
282282
283+
Missing values are not included as a category.
284+
285+
>>> c = pd.Categorical([1, 2, 3, 1, 2, 3, np.nan])
286+
>>> c
287+
[1, 2, 3, 1, 2, 3, NaN]
288+
Categories (3, int64): [1, 2, 3]
289+
290+
However, their presence is indicated in the `codes` attribute
291+
by code `-1`.
292+
293+
>>> c.codes
294+
array([ 0, 1, 2, 0, 1, 2, -1], dtype=int8)
295+
283296
Ordered `Categoricals` can be sorted according to the custom order
284297
of the categories and can have a min and max value.
285298
@@ -2316,7 +2329,7 @@ def _concat_same_type(self, to_concat):
23162329

23172330
return union_categoricals(to_concat)
23182331

2319-
def isin(self, values):
2332+
def isin(self, values) -> np.ndarray:
23202333
"""
23212334
Check whether `values` are contained in Categorical.
23222335

pandas/core/arrays/datetimelike.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -468,10 +468,9 @@ def _ndarray(self) -> np.ndarray:
468468

469469
def _from_backing_data(self: _T, arr: np.ndarray) -> _T:
470470
# Note: we do not retain `freq`
471+
# error: Too many arguments for "NDArrayBackedExtensionArray"
471472
# error: Unexpected keyword argument "dtype" for "NDArrayBackedExtensionArray"
472-
# TODO: add my error code
473-
# https://github.com/python/mypy/issues/7384
474-
return type(self)(arr, dtype=self.dtype) # type: ignore
473+
return type(self)(arr, dtype=self.dtype) # type: ignore[call-arg]
475474

476475
# ------------------------------------------------------------------
477476

pandas/core/construction.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ def array(
335335
return result
336336

337337

338-
def extract_array(obj, extract_numpy: bool = False):
338+
def extract_array(obj: AnyArrayLike, extract_numpy: bool = False) -> ArrayLike:
339339
"""
340340
Extract the ndarray or ExtensionArray from a Series or Index.
341341
@@ -383,7 +383,9 @@ def extract_array(obj, extract_numpy: bool = False):
383383
if extract_numpy and isinstance(obj, ABCPandasArray):
384384
obj = obj.to_numpy()
385385

386-
return obj
386+
# error: Incompatible return value type (got "Index", expected "ExtensionArray")
387+
# error: Incompatible return value type (got "Series", expected "ExtensionArray")
388+
return obj # type: ignore[return-value]
387389

388390

389391
def sanitize_array(

pandas/core/dtypes/cast.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,7 +1488,7 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
14881488
if has_bools:
14891489
for t in types:
14901490
if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t):
1491-
return object
1491+
return np.dtype("object")
14921492

14931493
return np.find_common_type(types, [])
14941494

@@ -1550,7 +1550,7 @@ def construct_1d_arraylike_from_scalar(
15501550
elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"):
15511551
# we need to coerce to object dtype to avoid
15521552
# to allow numpy to take our string as a scalar value
1553-
dtype = object
1553+
dtype = np.dtype("object")
15541554
if not isna(value):
15551555
value = ensure_str(value)
15561556

0 commit comments

Comments
 (0)