Skip to content

Commit 1d082fd

Browse files
authored
Merge branch 'main' into ref-split-pattern
2 parents faf22ed + cf2dfa7 commit 1d082fd

File tree

93 files changed

+1442
-1150
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+1442
-1150
lines changed

.github/workflows/posix.yml

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ on:
1313
- "doc/**"
1414

1515
env:
16-
PYTEST_WORKERS: "auto"
1716
PANDAS_CI: 1
1817

1918
jobs:
@@ -25,33 +24,48 @@ jobs:
2524
timeout-minutes: 120
2625
strategy:
2726
matrix:
28-
settings: [
29-
[actions-38-downstream_compat.yaml, "not slow and not network and not single_cpu", "", "", "", "", ""],
30-
[actions-38-minimum_versions.yaml, "not single_cpu", "", "", "", "", ""],
31-
[actions-38.yaml, "not slow and not network and not single_cpu", "language-pack-it", "it_IT.utf8", "it_IT.utf8", "", ""],
32-
[actions-38.yaml, "not slow and not network and not single_cpu", "language-pack-zh-hans", "zh_CN.utf8", "zh_CN.utf8", "", ""],
33-
[actions-38.yaml, "not single_cpu", "", "", "", "", ""],
34-
[actions-pypy-38.yaml, "not slow and not single_cpu", "", "", "", "", "--max-worker-restart 0"],
35-
[actions-39.yaml, "not single_cpu", "", "", "", "", ""],
36-
[actions-310-numpydev.yaml, "not slow and not network and not single_cpu", "", "", "", "deprecate", "-W error"],
37-
[actions-310.yaml, "not single_cpu", "", "", "", "", ""],
38-
]
27+
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml]
28+
pattern: ["not single_cpu", "single_cpu"]
29+
include:
30+
- env_file: actions-38-downstream_compat.yaml
31+
pattern: "not slow and not network and not single_cpu"
32+
pytest_target: "pandas/tests/test_downstream.py"
33+
- env_file: actions-38-minimum_versions.yaml
34+
pattern: "not slow and not network and not single_cpu"
35+
- env_file: actions-38.yaml
36+
pattern: "not slow and not network and not single_cpu"
37+
extra_apt: "language-pack-it"
38+
lang: "it_IT.utf8"
39+
lc_all: "it_IT.utf8"
40+
- env_file: actions-38.yaml
41+
pattern: "not slow and not network and not single_cpu"
42+
extra_apt: "language-pack-zh-hans"
43+
lang: "zh_CN.utf8"
44+
lc_all: "zh_CN.utf8"
45+
- env_file: actions-pypy-38.yaml
46+
pattern: "not slow and not network and not single_cpu"
47+
test_args: "--max-worker-restart 0"
48+
- env_file: actions-310-numpydev.yaml
49+
pattern: "not slow and not network and not single_cpu"
50+
pandas_testing_mode: "deprecate"
51+
test_args: "-W error"
3952
fail-fast: false
4053
env:
41-
ENV_FILE: ci/deps/${{ matrix.settings[0] }}
42-
PATTERN: ${{ matrix.settings[1] }}
43-
EXTRA_APT: ${{ matrix.settings[2] }}
44-
LANG: ${{ matrix.settings[3] }}
45-
LC_ALL: ${{ matrix.settings[4] }}
46-
PANDAS_TESTING_MODE: ${{ matrix.settings[5] }}
47-
TEST_ARGS: ${{ matrix.settings[6] }}
48-
PYTEST_TARGET: pandas
49-
IS_PYPY: ${{ contains(matrix.settings[0], 'pypy') }}
54+
ENV_FILE: ci/deps/${{ matrix.env_file }}
55+
PATTERN: ${{ matrix.pattern }}
56+
EXTRA_APT: ${{ matrix.extra_apt || '' }}
57+
LANG: ${{ matrix.lang || '' }}
58+
LC_ALL: ${{ matrix.lc_all || '' }}
59+
PANDAS_TESTING_MODE: ${{ matrix.pandas_testing_mode || '' }}
60+
TEST_ARGS: ${{ matrix.test_args || '' }}
61+
PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }}
62+
PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
63+
IS_PYPY: ${{ contains(matrix.env_file, 'pypy') }}
5064
# TODO: re-enable coverage on pypy, its slow
51-
COVERAGE: ${{ !contains(matrix.settings[0], 'pypy') }}
65+
COVERAGE: ${{ !contains(matrix.env_file, 'pypy') }}
5266
concurrency:
5367
# https://github.community/t/concurrecy-not-work-for-push/183068/7
54-
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.settings[0] }}-${{ matrix.settings[1] }}-${{ matrix.settings[2] }}
68+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}
5569
cancel-in-progress: true
5670

5771
services:
@@ -129,8 +143,7 @@ jobs:
129143
shell: bash
130144
run: |
131145
# TODO: re-enable cov, its slowing the tests down though
132-
# TODO: Unpin Cython, the new Cython 0.29.26 is causing compilation errors
133-
pip install Cython==0.29.25 numpy python-dateutil pytz pytest>=6.0 pytest-xdist>=1.31.0 hypothesis>=5.5.3
146+
pip install Cython numpy python-dateutil pytz pytest>=6.0 pytest-xdist>=1.31.0 hypothesis>=5.5.3
134147
if: ${{ env.IS_PYPY == 'true' }}
135148

136149
- name: Build Pandas

doc/source/user_guide/io.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3675,6 +3675,10 @@ should be passed to ``index_col`` and ``header``:
36753675
36763676
os.remove("path_to_file.xlsx")
36773677
3678+
Missing values in columns specified in ``index_col`` will be forward filled to
3679+
allow roundtripping with ``to_excel`` for ``merged_cells=True``. To avoid forward
3680+
filling the missing values use ``set_index`` after reading the data instead of
3681+
``index_col``.
36783682

36793683
Parsing specific columns
36803684
++++++++++++++++++++++++

doc/source/whatsnew/v1.4.2.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ including other versions of pandas.
1414

1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
17-
-
17+
- Fixed regression in :meth:`DataFrame.drop` and :meth:`Series.drop` when :class:`Index` had extension dtype and duplicates (:issue:`45820`)
1818
-
1919

2020
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.5.0.rst

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ Conversion
304304

305305
Strings
306306
^^^^^^^
307-
-
307+
- Bug in :meth:`str.startswith` and :meth:`str.endswith` when using other series as parameter _pat_. Now raises ``TypeError`` (:issue:`3485`)
308308
-
309309

310310
Interval
@@ -316,6 +316,7 @@ Indexing
316316
^^^^^^^^
317317
- Bug in :meth:`loc.__getitem__` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`)
318318
- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
319+
- Bug in :meth:`Series.align` does not create :class:`MultiIndex` with union of levels when both MultiIndexes intersections are identical (:issue:`45224`)
319320
- Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`)
320321
- Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised instead of casting to a common dtype (:issue:`45070`)
321322
- Bug in :meth:`Series.__setitem__` when setting incompatible values into a ``PeriodDtype`` or ``IntervalDtype`` :class:`Series` raising when indexing with a boolean mask but coercing when indexing with otherwise-equivalent indexers; these now consistently coerce, along with :meth:`Series.mask` and :meth:`Series.where` (:issue:`45768`)
@@ -326,6 +327,7 @@ Indexing
326327
- Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`)
327328
- Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtpye :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`)
328329
- Bug in :meth:`Series.__setitem__` with ``datetime64[ns]`` dtype, an all-``False`` boolean mask, and an incompatible value incorrectly casting to ``object`` instead of retaining ``datetime64[ns]`` dtype (:issue:`45967`)
330+
- Bug in :meth:`Index.__getitem__` raising ``ValueError`` when indexer is from boolean dtype with ``NA`` (:issue:`45806`)
329331
- Bug in :meth:`Series.mask` with ``inplace=True`` or setting values with a boolean mask with small integer dtypes incorrectly raising (:issue:`45750`)
330332
- Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`)
331333
- Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`)
@@ -353,12 +355,13 @@ I/O
353355
- Bug in :func:`read_excel` results in an infinite loop with certain ``skiprows`` callables (:issue:`45585`)
354356
- Bug in :meth:`DataFrame.info` where a new line at the end of the output is omitted when called on an empty :class:`DataFrame` (:issue:`45494`)
355357
- Bug in :func:`read_csv` not recognizing line break for ``on_bad_lines="warn"`` for ``engine="c"`` (:issue:`41710`)
358+
- Bug in :meth:`DataFrame.to_csv` not respecting ``float_format`` for ``Float64`` dtype (:issue:`45991`)
356359
- Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`)
357360
- Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`)
358361

359362
Period
360363
^^^^^^
361-
-
364+
- Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`)
362365
-
363366

364367
Plotting
@@ -372,7 +375,8 @@ Plotting
372375
Groupby/resample/rolling
373376
^^^^^^^^^^^^^^^^^^^^^^^^
374377
- Bug in :meth:`DataFrame.resample` ignoring ``closed="right"`` on :class:`TimedeltaIndex` (:issue:`45414`)
375-
- Bug in :meth:`.DataFrameGroupBy.transform` fails when the input DataFrame has multiple columns (:issue:`27469`)
378+
- Bug in :meth:`.DataFrameGroupBy.transform` fails when ``func="size"`` and the input DataFrame has multiple columns (:issue:`27469`)
379+
- Bug in :meth:`.DataFrameGroupBy.size` and :meth:`.DataFrameGroupBy.transform` with ``func="size"`` produced incorrect results when ``axis=1`` (:issue:`45715`)
376380

377381
Reshaping
378382
^^^^^^^^^

pandas/_libs/tslibs/ctime.pyx

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
"""
2+
Cython implementation of (parts of) the standard library time module.
3+
"""
4+
5+
from cpython.exc cimport PyErr_SetFromErrno
6+
from libc.stdint cimport int64_t
7+
8+
9+
cdef extern from "Python.h":
10+
ctypedef int64_t _PyTime_t
11+
_PyTime_t _PyTime_GetSystemClock() nogil
12+
double _PyTime_AsSecondsDouble(_PyTime_t t) nogil
13+
14+
from libc.time cimport (
15+
localtime as libc_localtime,
16+
time_t,
17+
tm,
18+
)
19+
20+
21+
def pytime():
22+
"""
23+
python-exposed for testing
24+
"""
25+
return time()
26+
27+
28+
def pylocaltime():
29+
"""
30+
python-exposed for testing
31+
"""
32+
lt = localtime()
33+
# https://github.com/pandas-dev/pandas/pull/45864#issuecomment-1033021599
34+
return {
35+
"tm_year": lt.tm_year,
36+
"tm_mon": lt.tm_mon,
37+
"tm_mday": lt.tm_mday,
38+
"tm_hour": lt.tm_hour,
39+
"tm_min": lt.tm_min,
40+
"tm_sec": lt.tm_sec,
41+
"tm_wday": lt.tm_wday,
42+
"tm_yday": lt.tm_yday,
43+
"tm_isdst": lt.tm_isdst,
44+
}
45+
46+
47+
cdef inline double time() nogil:
48+
cdef:
49+
_PyTime_t tic
50+
51+
tic = _PyTime_GetSystemClock()
52+
return _PyTime_AsSecondsDouble(tic)
53+
54+
55+
cdef inline int _raise_from_errno() except -1 with gil:
56+
PyErr_SetFromErrno(RuntimeError)
57+
return <int>-1 # Let the C compiler know that this function always raises.
58+
59+
60+
cdef inline tm localtime() nogil except *:
61+
"""
62+
Analogue to the stdlib time.localtime. The returned struct
63+
has some entries that the stdlib version does not: tm_gmtoff, tm_zone
64+
"""
65+
cdef:
66+
time_t tic = <time_t>time()
67+
tm* result
68+
69+
result = libc_localtime(&tic)
70+
if result is NULL:
71+
_raise_from_errno()
72+
# Fix 0-based date values (and the 1900-based year).
73+
# See tmtotuple() in https://github.com/python/cpython/blob/master/Modules/timemodule.c
74+
result.tm_year += 1900
75+
result.tm_mon += 1
76+
result.tm_wday = (result.tm_wday + 6) % 7
77+
result.tm_yday += 1
78+
return result[0]

pandas/_testing/_io.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import bz2
44
from functools import wraps
55
import gzip
6+
import socket
67
from typing import (
78
TYPE_CHECKING,
89
Any,
@@ -73,7 +74,13 @@ def _get_default_network_errors():
7374
import http.client
7475
import urllib.error
7576

76-
return (OSError, http.client.HTTPException, TimeoutError, urllib.error.URLError)
77+
return (
78+
OSError,
79+
http.client.HTTPException,
80+
TimeoutError,
81+
urllib.error.URLError,
82+
socket.timeout,
83+
)
7784

7885

7986
def optional_args(decorator):
@@ -264,8 +271,10 @@ def can_connect(url, error_classes=None):
264271
error_classes = _get_default_network_errors()
265272

266273
try:
267-
with urlopen(url):
268-
pass
274+
with urlopen(url, timeout=20) as response:
275+
# Timeout just in case rate-limiting is applied
276+
if response.status != 200:
277+
return False
269278
except error_classes:
270279
return False
271280
else:

pandas/_testing/contexts.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@
33
from contextlib import contextmanager
44
import os
55
from pathlib import Path
6-
import random
76
from shutil import rmtree
8-
import string
97
import tempfile
108
from typing import (
119
IO,
1210
Any,
1311
)
12+
import uuid
1413

1514
import numpy as np
1615

@@ -107,9 +106,7 @@ def ensure_clean(filename=None, return_filelike: bool = False, **kwargs: Any):
107106

108107
if filename is None:
109108
filename = ""
110-
filename = (
111-
"".join(random.choices(string.ascii_letters + string.digits, k=30)) + filename
112-
)
109+
filename = str(uuid.uuid4()) + filename
113110
path = folder / filename
114111

115112
path.touch()

pandas/conftest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,14 @@ def axis(request):
229229
axis_frame = axis
230230

231231

232+
@pytest.fixture(params=[1, "columns"], ids=lambda x: f"axis={repr(x)}")
233+
def axis_1(request):
234+
"""
235+
Fixture for returning aliases of axis 1 of a DataFrame.
236+
"""
237+
return request.param
238+
239+
232240
@pytest.fixture(params=[True, False, None])
233241
def observed(request):
234242
"""

pandas/core/algorithms.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
)
6565
from pandas.core.dtypes.concat import concat_compat
6666
from pandas.core.dtypes.dtypes import (
67+
BaseMaskedDtype,
6768
ExtensionDtype,
6869
PandasDtype,
6970
)
@@ -103,6 +104,7 @@
103104
Series,
104105
)
105106
from pandas.core.arrays import (
107+
BaseMaskedArray,
106108
DatetimeArray,
107109
ExtensionArray,
108110
TimedeltaArray,
@@ -142,6 +144,15 @@ def _ensure_data(values: ArrayLike) -> np.ndarray:
142144
if is_object_dtype(values.dtype):
143145
return ensure_object(np.asarray(values))
144146

147+
elif isinstance(values.dtype, BaseMaskedDtype):
148+
# i.e. BooleanArray, FloatingArray, IntegerArray
149+
values = cast("BaseMaskedArray", values)
150+
if not values._hasna:
151+
# No pd.NAs -> We can avoid an object-dtype cast (and copy) GH#41816
152+
# recurse to avoid re-implementing logic for eg bool->uint8
153+
return _ensure_data(values._data)
154+
return np.asarray(values)
155+
145156
elif is_bool_dtype(values.dtype):
146157
if isinstance(values, np.ndarray):
147158
# i.e. actually dtype == np.dtype("bool")
@@ -1188,18 +1199,6 @@ def compute(self, method: str) -> Series:
11881199
dropped = self.obj.dropna()
11891200
nan_index = self.obj.drop(dropped.index)
11901201

1191-
if is_extension_array_dtype(dropped.dtype):
1192-
# GH#41816 bc we have dropped NAs above, MaskedArrays can use the
1193-
# numpy logic.
1194-
from pandas.core.arrays import BaseMaskedArray
1195-
1196-
arr = dropped._values
1197-
if isinstance(arr, BaseMaskedArray):
1198-
ser = type(dropped)(arr._data, index=dropped.index, name=dropped.name)
1199-
1200-
result = type(self)(ser, n=self.n, keep=self.keep).compute(method)
1201-
return result.astype(arr.dtype)
1202-
12031202
# slow method
12041203
if n >= len(self.obj):
12051204
ascending = method == "nsmallest"

pandas/core/arrays/boolean.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,3 @@ def _logical_method(self, other, op):
381381
# error: Argument 2 to "BooleanArray" has incompatible type "Optional[Any]";
382382
# expected "ndarray"
383383
return BooleanArray(result, mask) # type: ignore[arg-type]
384-
385-
def __abs__(self):
386-
return self.copy()

0 commit comments

Comments
 (0)