Skip to content

Commit 8e01a8e

Browse files
jbrockmendelfeefladder
authored andcommitted
DEPR: Index inferring numeric dtype from ndarray[object] (pandas-dev#42870)
1 parent b8e572f commit 8e01a8e

File tree

21 files changed

+121
-41
lines changed

21 files changed

+121
-41
lines changed

pandas/_testing/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def box_expected(expected, box_cls, transpose=True):
219219
else:
220220
expected = pd.array(expected)
221221
elif box_cls is Index:
222-
expected = Index(expected)
222+
expected = Index._with_infer(expected)
223223
elif box_cls is Series:
224224
expected = Series(expected)
225225
elif box_cls is DataFrame:

pandas/core/arrays/categorical.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2031,7 +2031,9 @@ def _validate_listlike(self, value):
20312031
from pandas import Index
20322032

20332033
# tupleize_cols=False for e.g. test_fillna_iterable_category GH#41914
2034-
to_add = Index(value, tupleize_cols=False).difference(self.categories)
2034+
to_add = Index._with_infer(value, tupleize_cols=False).difference(
2035+
self.categories
2036+
)
20352037

20362038
# no assignments of values not in categories, but it's always ok to set
20372039
# something to np.nan
@@ -2741,6 +2743,7 @@ def factorize_from_iterable(values) -> tuple[np.ndarray, Index]:
27412743
# as values but its codes are by def [0, ..., len(n_categories) - 1]
27422744
cat_codes = np.arange(len(values.categories), dtype=values.codes.dtype)
27432745
cat = Categorical.from_codes(cat_codes, dtype=values.dtype)
2746+
27442747
categories = CategoricalIndex(cat)
27452748
codes = values.codes
27462749
else:

pandas/core/arrays/interval.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616

1717
from pandas._config import get_option
1818

19-
from pandas._libs import NaT
19+
from pandas._libs import (
20+
NaT,
21+
lib,
22+
)
2023
from pandas._libs.interval import (
2124
VALID_CLOSED,
2225
Interval,
@@ -225,6 +228,9 @@ def __new__(
225228
left, right, infer_closed = intervals_to_interval_bounds(
226229
data, validate_closed=closed is None
227230
)
231+
if left.dtype == object:
232+
left = lib.maybe_convert_objects(left)
233+
right = lib.maybe_convert_objects(right)
228234
closed = closed or infer_closed
229235

230236
return cls._simple_new(

pandas/core/dtypes/dtypes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ def validate_categories(categories, fastpath: bool = False) -> Index:
529529
f"Parameter 'categories' must be list-like, was {repr(categories)}"
530530
)
531531
elif not isinstance(categories, ABCIndex):
532-
categories = Index(categories, tupleize_cols=False)
532+
categories = Index._with_infer(categories, tupleize_cols=False)
533533

534534
if not fastpath:
535535

pandas/core/groupby/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ def _get_index() -> Index:
455455
if self.grouper.nkeys > 1:
456456
index = MultiIndex.from_tuples(keys, names=self.grouper.names)
457457
else:
458-
index = Index(keys, name=self.grouper.names[0])
458+
index = Index._with_infer(keys, name=self.grouper.names[0])
459459
return index
460460

461461
if isinstance(values[0], dict):

pandas/core/groupby/grouper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,7 @@ def group_index(self) -> Index:
646646
return self._group_index
647647

648648
uniques = self._codes_and_uniques[1]
649-
return Index(uniques, name=self.name)
649+
return Index._with_infer(uniques, name=self.name)
650650

651651
@cache_readonly
652652
def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:

pandas/core/indexes/base.py

Lines changed: 70 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,9 @@ def __new__(
471471
arr = com.asarray_tuplesafe(data, dtype=np.dtype("object"))
472472

473473
if dtype is None:
474-
arr = _maybe_cast_data_without_dtype(arr)
474+
arr = _maybe_cast_data_without_dtype(
475+
arr, cast_numeric_deprecated=True
476+
)
475477
dtype = arr.dtype
476478

477479
if kwargs:
@@ -504,6 +506,15 @@ def __new__(
504506
# other iterable of some kind
505507

506508
subarr = com.asarray_tuplesafe(data, dtype=np.dtype("object"))
509+
if dtype is None:
510+
# with e.g. a list [1, 2, 3] casting to numeric is _not_ deprecated
511+
# error: Incompatible types in assignment (expression has type
512+
# "Union[ExtensionArray, ndarray[Any, Any]]", variable has type
513+
# "ndarray[Any, Any]")
514+
subarr = _maybe_cast_data_without_dtype( # type: ignore[assignment]
515+
subarr, cast_numeric_deprecated=False
516+
)
517+
dtype = subarr.dtype
507518
return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
508519

509520
@classmethod
@@ -637,6 +648,26 @@ def _simple_new(cls: type[_IndexT], values, name: Hashable = None) -> _IndexT:
637648

638649
return result
639650

651+
@classmethod
652+
def _with_infer(cls, *args, **kwargs):
653+
"""
654+
Constructor that uses the 1.0.x behavior inferring numeric dtypes
655+
for ndarray[object] inputs.
656+
"""
657+
with warnings.catch_warnings():
658+
warnings.filterwarnings("ignore", ".*the Index constructor", FutureWarning)
659+
result = cls(*args, **kwargs)
660+
661+
if result.dtype == object and not result._is_multi:
662+
# error: Argument 1 to "maybe_convert_objects" has incompatible type
663+
# "Union[ExtensionArray, ndarray[Any, Any]]"; expected
664+
# "ndarray[Any, Any]"
665+
values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type]
666+
if values.dtype.kind in ["i", "u", "f"]:
667+
return Index(values, name=result.name)
668+
669+
return result
670+
640671
@cache_readonly
641672
def _constructor(self: _IndexT) -> type[_IndexT]:
642673
return type(self)
@@ -2609,7 +2640,7 @@ def fillna(self, value=None, downcast=None):
26092640
if downcast is None:
26102641
# no need to care metadata other than name
26112642
# because it can't have freq if
2612-
return Index(result, name=self.name)
2643+
return Index._with_infer(result, name=self.name)
26132644
return self._view()
26142645

26152646
def dropna(self: _IndexT, how: str_t = "any") -> _IndexT:
@@ -4009,7 +4040,7 @@ def _reindex_non_unique(
40094040
if isinstance(self, ABCMultiIndex):
40104041
new_index = type(self).from_tuples(new_labels, names=self.names)
40114042
else:
4012-
new_index = Index(new_labels, name=self.name)
4043+
new_index = Index._with_infer(new_labels, name=self.name)
40134044
return new_index, indexer, new_indexer
40144045

40154046
# --------------------------------------------------------------------
@@ -4450,9 +4481,12 @@ def _wrap_joined_index(self: _IndexT, joined: ArrayLike, other: _IndexT) -> _Ind
44504481

44514482
if isinstance(self, ABCMultiIndex):
44524483
name = self.names if self.names == other.names else None
4484+
# error: Incompatible return value type (got "MultiIndex",
4485+
# expected "_IndexT")
4486+
return self._constructor(joined, name=name) # type: ignore[return-value]
44534487
else:
44544488
name = get_op_result_name(self, other)
4455-
return self._constructor(joined, name=name)
4489+
return self._constructor._with_infer(joined, name=name)
44564490

44574491
# --------------------------------------------------------------------
44584492
# Uncategorized Methods
@@ -4805,7 +4839,7 @@ def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
48054839
to_concat_vals = [x._values for x in to_concat]
48064840

48074841
result = concat_compat(to_concat_vals)
4808-
return Index(result, name=name)
4842+
return Index._with_infer(result, name=name)
48094843

48104844
def putmask(self, mask, value) -> Index:
48114845
"""
@@ -5752,7 +5786,7 @@ def map(self, mapper, na_action=None):
57525786
):
57535787
return self._constructor(new_values, **attributes)
57545788

5755-
return Index(new_values, **attributes)
5789+
return Index._with_infer(new_values, **attributes)
57565790

57575791
# TODO: De-duplicate with map, xref GH#32349
57585792
@final
@@ -6228,7 +6262,7 @@ def insert(self, loc: int, item) -> Index:
62286262
# Use Index constructor to ensure we get tuples cast correctly.
62296263
item = Index([item], dtype=self.dtype)._values
62306264
idx = np.concatenate((arr[:loc], item, arr[loc:]))
6231-
return Index(idx, name=self.name)
6265+
return Index._with_infer(idx, name=self.name)
62326266

62336267
def drop(self, labels, errors: str_t = "raise") -> Index:
62346268
"""
@@ -6313,8 +6347,8 @@ def _arith_method(self, other, op):
63136347

63146348
result = op(Series(self), other)
63156349
if isinstance(result, tuple):
6316-
return (Index(result[0]), Index(result[1]))
6317-
return Index(result)
6350+
return (Index._with_infer(result[0]), Index(result[1]))
6351+
return Index._with_infer(result)
63186352

63196353
@final
63206354
def _unary_method(self, op):
@@ -6637,7 +6671,7 @@ def ensure_index(index_like: AnyArrayLike | Sequence, copy: bool = False) -> Ind
66376671

66386672
if isinstance(index_like, ABCSeries):
66396673
name = index_like.name
6640-
return Index(index_like, name=name, copy=copy)
6674+
return Index._with_infer(index_like, name=name, copy=copy)
66416675

66426676
if is_iterator(index_like):
66436677
index_like = list(index_like)
@@ -6653,10 +6687,9 @@ def ensure_index(index_like: AnyArrayLike | Sequence, copy: bool = False) -> Ind
66536687

66546688
return MultiIndex.from_arrays(index_like)
66556689
else:
6656-
return Index(index_like, copy=copy, tupleize_cols=False)
6690+
return Index._with_infer(index_like, copy=copy, tupleize_cols=False)
66576691
else:
6658-
6659-
return Index(index_like, copy=copy)
6692+
return Index._with_infer(index_like, copy=copy)
66606693

66616694

66626695
def ensure_has_len(seq):
@@ -6717,14 +6750,26 @@ def maybe_extract_name(name, obj, cls) -> Hashable:
67176750
return name
67186751

67196752

6720-
def _maybe_cast_data_without_dtype(subarr: np.ndarray) -> ArrayLike:
6753+
_cast_depr_msg = (
6754+
"In a future version, passing an object-dtype arraylike to pd.Index will "
6755+
"not infer numeric values to numeric dtype (matching the Series behavior). "
6756+
"To retain the old behavior, explicitly pass the desired dtype or use the "
6757+
"desired Index subclass"
6758+
)
6759+
6760+
6761+
def _maybe_cast_data_without_dtype(
6762+
subarr: np.ndarray, cast_numeric_deprecated: bool = True
6763+
) -> ArrayLike:
67216764
"""
67226765
If we have an arraylike input but no passed dtype, try to infer
67236766
a supported dtype.
67246767
67256768
Parameters
67266769
----------
67276770
subarr : np.ndarray[object]
6771+
cast_numeric_deprecated : bool, default True
6772+
Whether to issue a FutureWarning when inferring numeric dtypes.
67286773
67296774
Returns
67306775
-------
@@ -6739,6 +6784,17 @@ def _maybe_cast_data_without_dtype(subarr: np.ndarray) -> ArrayLike:
67396784
convert_interval=True,
67406785
dtype_if_all_nat=np.dtype("datetime64[ns]"),
67416786
)
6787+
if result.dtype.kind in ["i", "u", "f"]:
6788+
if not cast_numeric_deprecated:
6789+
# i.e. we started with a list, not an ndarray[object]
6790+
return result
6791+
6792+
warnings.warn(
6793+
"In a future version, the Index constructor will not infer numeric "
6794+
"dtypes when passed object-dtype sequences (matching Series behavior)",
6795+
FutureWarning,
6796+
stacklevel=3,
6797+
)
67426798
if result.dtype.kind in ["b", "c"]:
67436799
return subarr
67446800
result = ensure_wrapped_if_datetimelike(result)

pandas/core/indexes/multi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2150,7 +2150,7 @@ def append(self, other):
21502150
try:
21512151
return MultiIndex.from_tuples(new_tuples, names=self.names)
21522152
except (TypeError, IndexError):
2153-
return Index(new_tuples)
2153+
return Index._with_infer(new_tuples)
21542154

21552155
def argsort(self, *args, **kwargs) -> np.ndarray:
21562156
return self._values.argsort(*args, **kwargs)

pandas/core/strings/accessor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ def cons_row(x):
322322
out = out.get_level_values(0)
323323
return out
324324
else:
325-
return Index(result, name=name)
325+
return Index._with_infer(result, name=name)
326326
else:
327327
index = self._orig.index
328328
# This is a mess.

pandas/core/tools/datetimes.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ def _box_as_indexlike(
226226
if is_datetime64_dtype(dt_array):
227227
tz = "utc" if utc else None
228228
return DatetimeIndex(dt_array, tz=tz, name=name)
229-
return Index(dt_array, name=name)
229+
return Index(dt_array, name=name, dtype=dt_array.dtype)
230230

231231

232232
def _convert_and_box_cache(
@@ -517,7 +517,7 @@ def _to_datetime_with_unit(arg, unit, name, tz, errors: str) -> Index:
517517
"""
518518
to_datetime specalized to the case where a 'unit' is passed.
519519
"""
520-
arg = getattr(arg, "_values", arg)
520+
arg = getattr(arg, "_values", arg) # TODO: extract_array
521521

522522
# GH#30050 pass an ndarray to tslib.array_with_unit_to_datetime
523523
# because it expects an ndarray argument
@@ -529,7 +529,7 @@ def _to_datetime_with_unit(arg, unit, name, tz, errors: str) -> Index:
529529

530530
if errors == "ignore":
531531
# Index constructor _may_ infer to DatetimeIndex
532-
result = Index(arr, name=name)
532+
result = Index._with_infer(arr, name=name)
533533
else:
534534
result = DatetimeIndex(arr, name=name)
535535

0 commit comments

Comments
 (0)