Skip to content

Commit e647fac

Browse files
[backport 2.3.x] BUG: Fix copy semantics in __array__ (#60046) (#60189)
(cherry picked from commit eacf032) Co-authored-by: Joris Van den Bossche <[email protected]> Co-authored-by: Sebastian Berg <[email protected]>
1 parent ce56f2e commit e647fac

File tree

20 files changed

+269
-34
lines changed

20 files changed

+269
-34
lines changed

doc/source/whatsnew/v2.3.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ enhancement1
3232
Other enhancements
3333
^^^^^^^^^^^^^^^^^^
3434

35+
- The semantics for the ``copy`` keyword in ``__array__`` methods (i.e. called
36+
when using ``np.array()`` or ``np.asarray()`` on pandas objects) has been
37+
updated to work correctly with NumPy >= 2 (:issue:`57739`)
3538
- The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
3639
-
3740

pandas/core/arrays/arrow/array.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,16 @@ def __array__(
662662
self, dtype: NpDtype | None = None, copy: bool | None = None
663663
) -> np.ndarray:
664664
"""Correctly construct numpy arrays when passed to `np.asarray()`."""
665-
return self.to_numpy(dtype=dtype)
665+
if copy is False:
666+
# TODO: By using `zero_copy_only` it may be possible to implement this
667+
raise ValueError(
668+
"Unable to avoid copy while creating an array as requested."
669+
)
670+
elif copy is None:
671+
# `to_numpy(copy=False)` has the meaning of NumPy `copy=None`.
672+
copy = False
673+
674+
return self.to_numpy(dtype=dtype, copy=copy)
666675

667676
def __invert__(self) -> Self:
668677
# This is a bit wise op for integer types

pandas/core/arrays/categorical.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -577,11 +577,12 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
577577
raise ValueError("Cannot convert float NaN to integer")
578578

579579
elif len(self.codes) == 0 or len(self.categories) == 0:
580-
result = np.array(
581-
self,
582-
dtype=dtype,
583-
copy=copy,
584-
)
580+
# For NumPy 1.x compatibility we cannot use copy=None. And
581+
# `copy=False` has the meaning of `copy=None` here:
582+
if not copy:
583+
result = np.asarray(self, dtype=dtype)
584+
else:
585+
result = np.array(self, dtype=dtype)
585586

586587
else:
587588
# GH8628 (PERF): astype category codes instead of astyping array
@@ -1642,6 +1643,17 @@ def __array__(
16421643
"""
16431644
The numpy array interface.
16441645
1646+
Users should not call this directly. Rather, it is invoked by
1647+
:func:`numpy.array` and :func:`numpy.asarray`.
1648+
1649+
Parameters
1650+
----------
1651+
dtype : np.dtype or None
1652+
Specifies the the dtype for the array.
1653+
1654+
copy : bool or None, optional
1655+
See :func:`numpy.asarray`.
1656+
16451657
Returns
16461658
-------
16471659
numpy.array
@@ -1659,13 +1671,18 @@ def __array__(
16591671
>>> np.asarray(cat)
16601672
array(['a', 'b'], dtype=object)
16611673
"""
1674+
if copy is False:
1675+
raise ValueError(
1676+
"Unable to avoid copy while creating an array as requested."
1677+
)
1678+
16621679
ret = take_nd(self.categories._values, self._codes)
1663-
if dtype and np.dtype(dtype) != self.categories.dtype:
1664-
return np.asarray(ret, dtype)
16651680
# When we're a Categorical[ExtensionArray], like Interval,
16661681
# we need to ensure __array__ gets all the way to an
16671682
# ndarray.
1668-
return np.asarray(ret)
1683+
1684+
# `take_nd` should already make a copy, so don't force again.
1685+
return np.asarray(ret, dtype=dtype)
16691686

16701687
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
16711688
# for binary ops, use our custom dunder methods

pandas/core/arrays/datetimelike.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,14 @@ def __array__(
358358
) -> np.ndarray:
359359
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
360360
if is_object_dtype(dtype):
361+
if copy is False:
362+
raise ValueError(
363+
"Unable to avoid copy while creating an array as requested."
364+
)
361365
return np.array(list(self), dtype=object)
366+
367+
if copy is True:
368+
return np.array(self._ndarray, dtype=dtype)
362369
return self._ndarray
363370

364371
@overload

pandas/core/arrays/interval.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,6 +1574,11 @@ def __array__(
15741574
Return the IntervalArray's data as a numpy array of Interval
15751575
objects (with dtype='object')
15761576
"""
1577+
if copy is False:
1578+
raise ValueError(
1579+
"Unable to avoid copy while creating an array as requested."
1580+
)
1581+
15771582
left = self._left
15781583
right = self._right
15791584
mask = self.isna()

pandas/core/arrays/masked.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,17 @@ def __array__(
600600
the array interface, return my values
601601
We return an object array here to preserve our scalar values
602602
"""
603-
return self.to_numpy(dtype=dtype)
603+
if copy is False:
604+
if not self._hasna:
605+
# special case, here we can simply return the underlying data
606+
return np.array(self._data, dtype=dtype, copy=copy)
607+
raise ValueError(
608+
"Unable to avoid copy while creating an array as requested."
609+
)
610+
611+
if copy is None:
612+
copy = False # The NumPy copy=False meaning is different here.
613+
return self.to_numpy(dtype=dtype, copy=copy)
604614

605615
_HANDLED_TYPES: tuple[type, ...]
606616

pandas/core/arrays/numpy_.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ def dtype(self) -> NumpyEADtype:
150150
def __array__(
151151
self, dtype: NpDtype | None = None, copy: bool | None = None
152152
) -> np.ndarray:
153+
if copy is not None:
154+
# Note: branch avoids `copy=None` for NumPy 1.x support
155+
return np.array(self._ndarray, dtype=dtype, copy=copy)
153156
return np.asarray(self._ndarray, dtype=dtype)
154157

155158
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

pandas/core/arrays/period.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -407,8 +407,19 @@ def __array__(
407407
self, dtype: NpDtype | None = None, copy: bool | None = None
408408
) -> np.ndarray:
409409
if dtype == "i8":
410-
return self.asi8
411-
elif dtype == bool:
410+
# For NumPy 1.x compatibility we cannot use copy=None. And
411+
# `copy=False` has the meaning of `copy=None` here:
412+
if not copy:
413+
return np.asarray(self.asi8, dtype=dtype)
414+
else:
415+
return np.array(self.asi8, dtype=dtype)
416+
417+
if copy is False:
418+
raise ValueError(
419+
"Unable to avoid copy while creating an array as requested."
420+
)
421+
422+
if dtype == bool:
412423
return ~self._isnan
413424

414425
# This will raise TypeError for non-object dtypes

pandas/core/arrays/sparse/array.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -554,11 +554,20 @@ def from_spmatrix(cls, data: spmatrix) -> Self:
554554
def __array__(
555555
self, dtype: NpDtype | None = None, copy: bool | None = None
556556
) -> np.ndarray:
557-
fill_value = self.fill_value
558-
559557
if self.sp_index.ngaps == 0:
560558
# Compat for na dtype and int values.
561-
return self.sp_values
559+
if copy is True:
560+
return np.array(self.sp_values)
561+
else:
562+
return self.sp_values
563+
564+
if copy is False:
565+
raise ValueError(
566+
"Unable to avoid copy while creating an array as requested."
567+
)
568+
569+
fill_value = self.fill_value
570+
562571
if dtype is None:
563572
# Can NumPy represent this type?
564573
# If not, `np.result_type` will raise. We catch that

pandas/core/generic.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2150,9 +2150,15 @@ def __array__(
21502150
self, dtype: npt.DTypeLike | None = None, copy: bool_t | None = None
21512151
) -> np.ndarray:
21522152
values = self._values
2153-
arr = np.asarray(values, dtype=dtype)
2153+
if copy is None:
2154+
# Note: branch avoids `copy=None` for NumPy 1.x support
2155+
arr = np.asarray(values, dtype=dtype)
2156+
else:
2157+
arr = np.array(values, dtype=dtype, copy=copy)
2158+
21542159
if (
2155-
astype_is_view(values.dtype, arr.dtype)
2160+
copy is not True
2161+
and astype_is_view(values.dtype, arr.dtype)
21562162
and using_copy_on_write()
21572163
and self._mgr.is_single_block
21582164
):

0 commit comments

Comments
 (0)