Skip to content

Commit 1a5ec4e

Browse files
committed
Merge branch 'main' into np-array-copy-keyword
2 parents f66cd05 + 1bf86a3 commit 1a5ec4e

39 files changed

+273
-51
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ Removal of prior version deprecations/changes
244244

245245
Performance improvements
246246
~~~~~~~~~~~~~~~~~~~~~~~~
247+
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
247248
- Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
248249
- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
249250
- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
@@ -252,11 +253,11 @@ Performance improvements
252253
- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
253254
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
254255
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
256+
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
255257
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
256258
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
257-
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
258-
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`?``)
259259
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
260+
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
260261

261262
.. ---------------------------------------------------------------------------
262263
.. _whatsnew_300.bug_fixes:
@@ -265,6 +266,7 @@ Bug fixes
265266
~~~~~~~~~
266267
- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
267268
- Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
269+
- Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
268270
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
269271

270272
Categorical
@@ -324,6 +326,7 @@ MultiIndex
324326

325327
I/O
326328
^^^
329+
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
327330
-
328331
-
329332

pandas/_libs/src/vendored/ujson/python/objToJSON.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ typedef struct __NpyArrContext {
7474
npy_intp ndim;
7575
npy_intp index[NPY_MAXDIMS];
7676
int type_num;
77-
PyArray_GetItemFunc *getitem;
7877

7978
char **rowLabels;
8079
char **columnLabels;
@@ -405,7 +404,6 @@ static void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
405404
}
406405

407406
npyarr->array = (PyObject *)obj;
408-
npyarr->getitem = (PyArray_GetItemFunc *)PyArray_DESCR(obj)->f->getitem;
409407
npyarr->dataptr = PyArray_DATA(obj);
410408
npyarr->ndim = PyArray_NDIM(obj) - 1;
411409
npyarr->curdim = 0;
@@ -492,7 +490,7 @@ static int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) {
492490
((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr;
493491
((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr;
494492
} else {
495-
GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array);
493+
GET_TC(tc)->itemValue = PyArray_GETITEM(arrayobj, npyarr->dataptr);
496494
}
497495

498496
npyarr->dataptr += npyarr->stride;

pandas/core/array_algos/quantile.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import numpy as np
66

7+
from pandas.compat.numpy import np_version_gt2
8+
79
from pandas.core.dtypes.missing import (
810
isna,
911
na_value_for_dtype,
@@ -102,7 +104,8 @@ def quantile_with_mask(
102104
interpolation=interpolation,
103105
)
104106

105-
result = np.array(result, copy=False)
107+
copy_false = None if np_version_gt2 else False
108+
result = np.array(result, copy=copy_false)
106109
result = result.T
107110

108111
return result
@@ -199,11 +202,12 @@ def _nanpercentile(
199202
_nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation)
200203
for (val, m) in zip(list(values), list(mask))
201204
]
205+
copy_false = None if np_version_gt2 else False
202206
if values.dtype.kind == "f":
203207
# preserve itemsize
204-
result = np.array(result, dtype=values.dtype, copy=False).T
208+
result = np.array(result, dtype=values.dtype, copy=copy_false).T
205209
else:
206-
result = np.array(result, copy=False).T
210+
result = np.array(result, copy=copy_false).T
207211
if (
208212
result.dtype != values.dtype
209213
and not mask.all()

pandas/core/arrays/arrow/array.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,9 @@ def __arrow_array__(self, type=None):
659659
"""Convert myself to a pyarrow ChunkedArray."""
660660
return self._pa_array
661661

662-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
662+
def __array__(
663+
self, dtype: NpDtype | None = None, copy: bool | None = None
664+
) -> np.ndarray:
663665
"""Correctly construct numpy arrays when passed to `np.asarray()`."""
664666
return self.to_numpy(dtype=dtype)
665667

pandas/core/arrays/base.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@
2727
lib,
2828
)
2929
from pandas.compat import set_function_name
30-
from pandas.compat.numpy import function as nv
30+
from pandas.compat.numpy import (
31+
function as nv,
32+
np_version_gt2,
33+
)
3134
from pandas.errors import AbstractMethodError
3235
from pandas.util._decorators import (
3336
Appender,
@@ -710,6 +713,8 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
710713
return self
711714
else:
712715
return self.copy()
716+
if np_version_gt2 and not copy:
717+
copy = None
713718

714719
if isinstance(dtype, ExtensionDtype):
715720
cls = dtype.construct_array_type()

pandas/core/arrays/categorical.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1659,7 +1659,9 @@ def _validate_codes_for_dtype(cls, codes, *, dtype: CategoricalDtype) -> np.ndar
16591659
# -------------------------------------------------------------
16601660

16611661
@ravel_compat
1662-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
1662+
def __array__(
1663+
self, dtype: NpDtype | None = None, copy: bool | None = None
1664+
) -> np.ndarray:
16631665
"""
16641666
The numpy array interface.
16651667

pandas/core/arrays/datetimelike.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,9 @@ def _formatter(self, boxed: bool = False) -> Callable[[object], str]:
353353
# ----------------------------------------------------------------
354354
# Array-Like / EA-Interface Methods
355355

356-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
356+
def __array__(
357+
self, dtype: NpDtype | None = None, copy: bool | None = None
358+
) -> np.ndarray:
357359
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
358360
if is_object_dtype(dtype):
359361
return np.array(list(self), dtype=object)

pandas/core/arrays/datetimes.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
tzconversion,
4444
)
4545
from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
46+
from pandas.compat.numpy import np_version_gt2
4647
from pandas.errors import PerformanceWarning
4748
from pandas.util._exceptions import find_stack_level
4849
from pandas.util._validators import validate_inclusive
@@ -649,12 +650,12 @@ def _resolution_obj(self) -> Resolution:
649650
# ----------------------------------------------------------------
650651
# Array-Like / EA-Interface Methods
651652

652-
def __array__(self, dtype=None) -> np.ndarray:
653+
def __array__(self, dtype=None, copy=None) -> np.ndarray:
653654
if dtype is None and self.tz:
654655
# The default for tz-aware is object, to preserve tz info
655656
dtype = object
656657

657-
return super().__array__(dtype=dtype)
658+
return super().__array__(dtype=dtype, copy=copy)
658659

659660
def __iter__(self) -> Iterator:
660661
"""
@@ -2421,7 +2422,8 @@ def objects_to_datetime64(
24212422
assert errors in ["raise", "coerce"]
24222423

24232424
# if str-dtype, convert
2424-
data = np.array(data, copy=False, dtype=np.object_)
2425+
copy_false = None if np_version_gt2 else False
2426+
data = np.array(data, dtype=np.object_, copy=copy_false)
24252427

24262428
result, tz_parsed = tslib.array_to_datetime(
24272429
data,

pandas/core/arrays/interval.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1564,7 +1564,9 @@ def is_non_overlapping_monotonic(self) -> bool:
15641564
# ---------------------------------------------------------------------
15651565
# Conversion
15661566

1567-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
1567+
def __array__(
1568+
self, dtype: NpDtype | None = None, copy: bool | None = None
1569+
) -> np.ndarray:
15681570
"""
15691571
Return the IntervalArray's data as a numpy array of Interval
15701572
objects (with dtype='object')

pandas/core/arrays/masked.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,9 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
594594

595595
__array_priority__ = 1000 # higher than ndarray so ops dispatch to us
596596

597-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
597+
def __array__(
598+
self, dtype: NpDtype | None = None, copy: bool | None = None
599+
) -> np.ndarray:
598600
"""
599601
the array interface, return my values
600602
We return an object array here to preserve our scalar values

0 commit comments

Comments
 (0)