Skip to content

Commit eacf032

Browse files
BUG: Fix copy semantics in __array__ (#60046)
Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent cbf6e42 commit eacf032

File tree

20 files changed

+255
-30
lines changed

20 files changed

+255
-30
lines changed

doc/source/whatsnew/v2.3.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ enhancement1
3232
Other enhancements
3333
^^^^^^^^^^^^^^^^^^
3434

35+
- The semantics for the ``copy`` keyword in ``__array__`` methods (i.e. called
36+
when using ``np.array()`` or ``np.asarray()`` on pandas objects) has been
37+
updated to work correctly with NumPy >= 2 (:issue:`57739`)
3538
- The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`)
3639
-
3740

pandas/core/arrays/arrow/array.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,16 @@ def __array__(
668668
self, dtype: NpDtype | None = None, copy: bool | None = None
669669
) -> np.ndarray:
670670
"""Correctly construct numpy arrays when passed to `np.asarray()`."""
671-
return self.to_numpy(dtype=dtype)
671+
if copy is False:
672+
# TODO: By using `zero_copy_only` it may be possible to implement this
673+
raise ValueError(
674+
"Unable to avoid copy while creating an array as requested."
675+
)
676+
elif copy is None:
677+
# `to_numpy(copy=False)` has the meaning of NumPy `copy=None`.
678+
copy = False
679+
680+
return self.to_numpy(dtype=dtype, copy=copy)
672681

673682
def __invert__(self) -> Self:
674683
# This is a bit wise op for integer types

pandas/core/arrays/categorical.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -579,11 +579,12 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
579579
raise ValueError("Cannot convert float NaN to integer")
580580

581581
elif len(self.codes) == 0 or len(self.categories) == 0:
582-
result = np.array(
583-
self,
584-
dtype=dtype,
585-
copy=copy,
586-
)
582+
# For NumPy 1.x compatibility we cannot use copy=None. And
583+
# `copy=False` has the meaning of `copy=None` here:
584+
if not copy:
585+
result = np.asarray(self, dtype=dtype)
586+
else:
587+
result = np.array(self, dtype=dtype)
587588

588589
else:
589590
# GH8628 (PERF): astype category codes instead of astyping array
@@ -1663,7 +1664,7 @@ def __array__(
16631664
Specifies the the dtype for the array.
16641665
16651666
copy : bool or None, optional
1666-
Unused.
1667+
See :func:`numpy.asarray`.
16671668
16681669
Returns
16691670
-------
@@ -1686,13 +1687,18 @@ def __array__(
16861687
>>> np.asarray(cat)
16871688
array(['a', 'b'], dtype=object)
16881689
"""
1690+
if copy is False:
1691+
raise ValueError(
1692+
"Unable to avoid copy while creating an array as requested."
1693+
)
1694+
16891695
ret = take_nd(self.categories._values, self._codes)
1690-
if dtype and np.dtype(dtype) != self.categories.dtype:
1691-
return np.asarray(ret, dtype)
16921696
# When we're a Categorical[ExtensionArray], like Interval,
16931697
# we need to ensure __array__ gets all the way to an
16941698
# ndarray.
1695-
return np.asarray(ret)
1699+
1700+
# `take_nd` should already make a copy, so don't force again.
1701+
return np.asarray(ret, dtype=dtype)
16961702

16971703
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
16981704
# for binary ops, use our custom dunder methods

pandas/core/arrays/datetimelike.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,14 @@ def __array__(
359359
) -> np.ndarray:
360360
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
361361
if is_object_dtype(dtype):
362+
if copy is False:
363+
raise ValueError(
364+
"Unable to avoid copy while creating an array as requested."
365+
)
362366
return np.array(list(self), dtype=object)
367+
368+
if copy is True:
369+
return np.array(self._ndarray, dtype=dtype)
363370
return self._ndarray
364371

365372
@overload

pandas/core/arrays/interval.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1622,6 +1622,11 @@ def __array__(
16221622
Return the IntervalArray's data as a numpy array of Interval
16231623
objects (with dtype='object')
16241624
"""
1625+
if copy is False:
1626+
raise ValueError(
1627+
"Unable to avoid copy while creating an array as requested."
1628+
)
1629+
16251630
left = self._left
16261631
right = self._right
16271632
mask = self.isna()

pandas/core/arrays/masked.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,17 @@ def __array__(
581581
the array interface, return my values
582582
We return an object array here to preserve our scalar values
583583
"""
584-
return self.to_numpy(dtype=dtype)
584+
if copy is False:
585+
if not self._hasna:
586+
# special case, here we can simply return the underlying data
587+
return np.array(self._data, dtype=dtype, copy=copy)
588+
raise ValueError(
589+
"Unable to avoid copy while creating an array as requested."
590+
)
591+
592+
if copy is None:
593+
copy = False # The NumPy copy=False meaning is different here.
594+
return self.to_numpy(dtype=dtype, copy=copy)
585595

586596
_HANDLED_TYPES: tuple[type, ...]
587597

pandas/core/arrays/numpy_.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ def dtype(self) -> NumpyEADtype:
150150
def __array__(
151151
self, dtype: NpDtype | None = None, copy: bool | None = None
152152
) -> np.ndarray:
153+
if copy is not None:
154+
# Note: branch avoids `copy=None` for NumPy 1.x support
155+
return np.array(self._ndarray, dtype=dtype, copy=copy)
153156
return np.asarray(self._ndarray, dtype=dtype)
154157

155158
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

pandas/core/arrays/period.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,8 +390,19 @@ def __array__(
390390
self, dtype: NpDtype | None = None, copy: bool | None = None
391391
) -> np.ndarray:
392392
if dtype == "i8":
393-
return self.asi8
394-
elif dtype == bool:
393+
# For NumPy 1.x compatibility we cannot use copy=None. And
394+
# `copy=False` has the meaning of `copy=None` here:
395+
if not copy:
396+
return np.asarray(self.asi8, dtype=dtype)
397+
else:
398+
return np.array(self.asi8, dtype=dtype)
399+
400+
if copy is False:
401+
raise ValueError(
402+
"Unable to avoid copy while creating an array as requested."
403+
)
404+
405+
if dtype == bool:
395406
return ~self._isnan
396407

397408
# This will raise TypeError for non-object dtypes

pandas/core/arrays/sparse/array.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -547,11 +547,20 @@ def from_spmatrix(cls, data: spmatrix) -> Self:
547547
def __array__(
548548
self, dtype: NpDtype | None = None, copy: bool | None = None
549549
) -> np.ndarray:
550-
fill_value = self.fill_value
551-
552550
if self.sp_index.ngaps == 0:
553551
# Compat for na dtype and int values.
554-
return self.sp_values
552+
if copy is True:
553+
return np.array(self.sp_values)
554+
else:
555+
return self.sp_values
556+
557+
if copy is False:
558+
raise ValueError(
559+
"Unable to avoid copy while creating an array as requested."
560+
)
561+
562+
fill_value = self.fill_value
563+
555564
if dtype is None:
556565
# Can NumPy represent this type?
557566
# If not, `np.result_type` will raise. We catch that

pandas/core/generic.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2015,8 +2015,17 @@ def __array__(
20152015
self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
20162016
) -> np.ndarray:
20172017
values = self._values
2018-
arr = np.asarray(values, dtype=dtype)
2019-
if astype_is_view(values.dtype, arr.dtype) and self._mgr.is_single_block:
2018+
if copy is None:
2019+
# Note: branch avoids `copy=None` for NumPy 1.x support
2020+
arr = np.asarray(values, dtype=dtype)
2021+
else:
2022+
arr = np.array(values, dtype=dtype, copy=copy)
2023+
2024+
if (
2025+
copy is not True
2026+
and astype_is_view(values.dtype, arr.dtype)
2027+
and self._mgr.is_single_block
2028+
):
20202029
# Check if both conversions can be done without a copy
20212030
if astype_is_view(self.dtypes.iloc[0], values.dtype) and astype_is_view(
20222031
values.dtype, arr.dtype

0 commit comments

Comments
 (0)