Skip to content

Commit 51675d0

Browse files
authored
DEPR: inconsistent series[i:j] slicing with Int64Index GH#45162 (#45324)
1 parent f2a0125 commit 51675d0

File tree

22 files changed

+145
-37
lines changed

22 files changed

+145
-37
lines changed

doc/source/whatsnew/v1.5.0.rst

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,50 @@ Other API changes
9494

9595
Deprecations
9696
~~~~~~~~~~~~
97+
98+
.. _whatsnew_150.deprecations.int_slicing_series:
99+
100+
In a future version, integer slicing on a :class:`Series` with a :class:`Int64Index` or :class:`RangeIndex` will be treated as *label-based*, not positional. This will make the behavior consistent with other :meth:`Series.__getitem__` and :meth:`Series.__setitem__` behaviors (:issue:`45162`).
101+
102+
For example:
103+
104+
.. ipython:: python
105+
106+
ser = pd.Series([1, 2, 3, 4, 5], index=[2, 3, 5, 7, 11])
107+
108+
In the old behavior, ``ser[2:4]`` treats the slice as positional:
109+
110+
*Old behavior*:
111+
112+
.. code-block:: ipython
113+
114+
In [3]: ser[2:4]
115+
Out[3]:
116+
5 3
117+
7 4
118+
dtype: int64
119+
120+
In a future version, this will be treated as label-based:
121+
122+
*Future behavior*:
123+
124+
.. code-block:: ipython
125+
126+
In [4]: ser.loc[2:4]
127+
Out[4]:
128+
2 1
129+
3 2
130+
dtype: int64
131+
132+
To retain the old behavior, use ``series.iloc[i:j]``. To get the future behavior,
133+
use ``series.loc[i:j]``.
134+
135+
Slicing on a :class:`DataFrame` will not be affected.
136+
137+
.. _whatsnew_150.deprecations.other:
138+
139+
Other Deprecations
140+
^^^^^^^^^^^^^^^^^^
97141
- Deprecated the keyword ``line_terminator`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv`, use ``lineterminator`` instead; this is for consistency with :func:`read_csv` and the standard library 'csv' module (:issue:`9568`)
98142
- Deprecated :meth:`DataFrame.iteritems`, :meth:`Series.iteritems`, :meth:`HDFStore.iteritems` in favor of :meth:`DataFrame.items`, :meth:`Series.items`, :meth:`HDFStore.items` (:issue:`45321`)
99143
-

pandas/core/indexes/base.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
ABCDatetimeIndex,
111111
ABCMultiIndex,
112112
ABCPeriodIndex,
113+
ABCRangeIndex,
113114
ABCSeries,
114115
ABCTimedeltaIndex,
115116
)
@@ -3989,7 +3990,7 @@ def _validate_positional_slice(self, key: slice) -> None:
39893990
self._validate_indexer("positional", key.stop, "iloc")
39903991
self._validate_indexer("positional", key.step, "iloc")
39913992

3992-
def _convert_slice_indexer(self, key: slice, kind: str_t):
3993+
def _convert_slice_indexer(self, key: slice, kind: str_t, is_frame: bool = False):
39933994
"""
39943995
Convert a slice indexer.
39953996
@@ -4000,6 +4001,9 @@ def _convert_slice_indexer(self, key: slice, kind: str_t):
40004001
----------
40014002
key : label of the slice bound
40024003
kind : {'loc', 'getitem'}
4004+
is_frame : bool, default False
4005+
Whether this is a slice called on DataFrame.__getitem__
4006+
as opposed to Series.__getitem__
40034007
"""
40044008
assert kind in ["loc", "getitem"], kind
40054009

@@ -4020,7 +4024,44 @@ def is_int(v):
40204024
called from the getitem slicers, validate that we are in fact
40214025
integers
40224026
"""
4027+
if self.is_integer():
4028+
if is_frame:
4029+
# unambiguously positional, no deprecation
4030+
pass
4031+
elif start is None and stop is None:
4032+
# label-based vs positional is irrelevant
4033+
pass
4034+
elif isinstance(self, ABCRangeIndex) and self._range == range(
4035+
len(self)
4036+
):
4037+
# In this case there is no difference between label-based
4038+
# and positional, so nothing will change.
4039+
pass
4040+
elif (
4041+
self.dtype.kind in ["i", "u"]
4042+
and self._is_strictly_monotonic_increasing
4043+
and len(self) > 0
4044+
and self[0] == 0
4045+
and self[-1] == len(self) - 1
4046+
):
4047+
# We are range-like, e.g. created with Index(np.arange(N))
4048+
pass
4049+
elif not is_index_slice:
4050+
# we're going to raise, so don't bother warning, e.g.
4051+
# test_integer_positional_indexing
4052+
pass
4053+
else:
4054+
warnings.warn(
4055+
"The behavior of `series[i:j]` with an integer-dtype index "
4056+
"is deprecated. In a future version, this will be treated "
4057+
"as *label-based* indexing, consistent with e.g. `series[i]` "
4058+
"lookups. To retain the old behavior, use `series.iloc[i:j]`. "
4059+
"To get the future behavior, use `series.loc[i:j]`.",
4060+
FutureWarning,
4061+
stacklevel=find_stack_level(),
4062+
)
40234063
if self.is_integer() or is_index_slice:
4064+
# Note: these checks are redundant if we know is_index_slice
40244065
self._validate_indexer("slice", key.start, "getitem")
40254066
self._validate_indexer("slice", key.stop, "getitem")
40264067
self._validate_indexer("slice", key.step, "getitem")

pandas/core/indexes/interval.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -754,7 +754,7 @@ def _index_as_unique(self) -> bool:
754754
"cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique"
755755
)
756756

757-
def _convert_slice_indexer(self, key: slice, kind: str):
757+
def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False):
758758
if not (key.step is None or key.step == 1):
759759
# GH#31658 if label-based, we require step == 1,
760760
# if positional, we disallow float start/stop
@@ -766,7 +766,7 @@ def _convert_slice_indexer(self, key: slice, kind: str):
766766
# i.e. this cannot be interpreted as a positional slice
767767
raise ValueError(msg)
768768

769-
return super()._convert_slice_indexer(key, kind)
769+
return super()._convert_slice_indexer(key, kind, is_frame=is_frame)
770770

771771
@cache_readonly
772772
def _should_fallback_to_positional(self) -> bool:

pandas/core/indexes/numeric.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,15 +268,15 @@ def _should_fallback_to_positional(self) -> bool:
268268
return False
269269

270270
@doc(Index._convert_slice_indexer)
271-
def _convert_slice_indexer(self, key: slice, kind: str):
271+
def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False):
272272
if is_float_dtype(self.dtype):
273273
assert kind in ["loc", "getitem"]
274274

275275
# We always treat __getitem__ slicing as label-based
276276
# translate to locations
277277
return self.slice_indexer(key.start, key.stop, key.step)
278278

279-
return super()._convert_slice_indexer(key, kind=kind)
279+
return super()._convert_slice_indexer(key, kind=kind, is_frame=is_frame)
280280

281281
@doc(Index._maybe_cast_slice_bound)
282282
def _maybe_cast_slice_bound(self, label, side: str, kind=lib.no_default):

pandas/core/indexing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2313,7 +2313,7 @@ def convert_to_index_sliceable(obj: DataFrame, key):
23132313
"""
23142314
idx = obj.index
23152315
if isinstance(key, slice):
2316-
return idx._convert_slice_indexer(key, kind="getitem")
2316+
return idx._convert_slice_indexer(key, kind="getitem", is_frame=True)
23172317

23182318
elif isinstance(key, str):
23192319

pandas/tests/extension/base/getitem.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,8 @@ def test_get(self, data):
313313
expected = s.iloc[[2, 3]]
314314
self.assert_series_equal(result, expected)
315315

316-
result = s.get(slice(2))
316+
with tm.assert_produces_warning(FutureWarning, match="label-based"):
317+
result = s.get(slice(2))
317318
expected = s.iloc[[0, 1]]
318319
self.assert_series_equal(result, expected)
319320

@@ -336,7 +337,9 @@ def test_get(self, data):
336337

337338
# GH 21257
338339
s = pd.Series(data)
339-
s2 = s[::2]
340+
with tm.assert_produces_warning(None):
341+
# GH#45324 make sure we aren't giving a spurious FutureWarning
342+
s2 = s[::2]
340343
assert s2.get(1) is None
341344

342345
def test_take_sequence(self, data):

pandas/tests/frame/indexing/test_indexing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1009,7 +1009,7 @@ def test_iloc_row_slice_view(self, using_array_manager):
10091009
exp_col = original[2].copy()
10101010
# TODO(ArrayManager) verify it is expected that the original didn't change
10111011
if not using_array_manager:
1012-
exp_col[4:8] = 0.0
1012+
exp_col._values[4:8] = 0.0
10131013
tm.assert_series_equal(df[2], exp_col)
10141014

10151015
def test_iloc_col(self):

pandas/tests/groupby/test_apply.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -367,11 +367,11 @@ def test_apply_frame_not_as_index_column_name(df):
367367

368368
def test_apply_frame_concat_series():
369369
def trans(group):
370-
return group.groupby("B")["C"].sum().sort_values()[:2]
370+
return group.groupby("B")["C"].sum().sort_values().iloc[:2]
371371

372372
def trans2(group):
373373
grouped = group.groupby(df.reindex(group.index)["B"])
374-
return grouped.sum().sort_values()[:2]
374+
return grouped.sum().sort_values().iloc[:2]
375375

376376
df = DataFrame(
377377
{
@@ -409,7 +409,7 @@ def test_apply_chunk_view():
409409
# Low level tinkering could be unsafe, make sure not
410410
df = DataFrame({"key": [1, 1, 1, 2, 2, 2, 3, 3, 3], "value": range(9)})
411411

412-
result = df.groupby("key", group_keys=False).apply(lambda x: x[:2])
412+
result = df.groupby("key", group_keys=False).apply(lambda x: x.iloc[:2])
413413
expected = df.take([0, 1, 3, 4, 6, 7])
414414
tm.assert_frame_equal(result, expected)
415415

pandas/tests/indexes/numeric/test_numeric.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -657,7 +657,7 @@ def test_uint_index_does_not_convert_to_float64(box):
657657
)
658658
tm.assert_index_equal(result.index, expected)
659659

660-
tm.assert_equal(result, series[:3])
660+
tm.assert_equal(result, series.iloc[:3])
661661

662662

663663
def test_float64_index_equals():

pandas/tests/indexing/test_floats.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,8 @@ def test_integer_positional_indexing(self, idx):
343343
"""
344344
s = Series(range(2, 6), index=range(2, 6))
345345

346-
result = s[2:4]
346+
with tm.assert_produces_warning(FutureWarning, match="label-based"):
347+
result = s[2:4]
347348
expected = s.iloc[2:4]
348349
tm.assert_series_equal(result, expected)
349350

0 commit comments

Comments
 (0)