From 6f61d7b590ccef2b1933f65e885f95531eeb6f89 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 15 Mar 2025 13:34:22 +0700 Subject: [PATCH 1/9] ENH: Adding engine parameter to Series.map --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/series.py | 30 +++++++++++++ pandas/tests/apply/common.py | 59 +++++++++++++++++++++++++ pandas/tests/apply/test_frame_apply.py | 54 +--------------------- pandas/tests/apply/test_series_apply.py | 4 +- 5 files changed, 93 insertions(+), 55 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 873c1e7cd41cc..6610b7813ceea 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -71,6 +71,7 @@ Other enhancements - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`) - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`) - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`) +- :meth:`Series.map` now accepts an ``engine`` parameter to allow execution with a third-party execution engine (:issue:`61125`) - :meth:`Series.str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`) - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`) diff --git a/pandas/core/series.py b/pandas/core/series.py index da46f8ede3409..4f219cc6d5e2f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4322,6 +4322,7 @@ def map( self, arg: Callable | Mapping | Series, na_action: Literal["ignore"] | None = None, + engine: Callable | None = None, **kwargs, ) -> Series: """ @@ -4338,6 +4339,23 @@ def map( na_action : {None, 'ignore'}, default None If 'ignore', propagate NaN values, without passing them to the mapping correspondence. + engine : decorator, optional + Choose the execution engine to use. If not provided the function + will be executed by the regular Python interpreter. + + Other options include JIT compilers such as Numba and Bodo, which in some + cases can speed up the execution. To use an executor you can provide + the decorators ``numba.jit``, ``numba.njit`` or ``bodo.jit``. You can + also provide the decorator with parameters, like ``numba.jit(nogit=True)``. + + Not all functions can be executed with all execution engines. In general, + JIT compilers will require type stability in the function (no variable + should change data type during the execution). And not all pandas and + NumPy APIs are supported. Check the engine documentation [1]_ and [2]_ + for limitations. + + .. versionadded:: 3.0.0 + **kwargs Additional keyword arguments to pass as keywords arguments to `arg`. @@ -4404,6 +4422,18 @@ def map( 3 I am a rabbit dtype: object """ + if engine is not None: + if not hasattr(engine, "__pandas_udf__"): + raise ValueError(f"Not a valid engine: {engine}") + return engine.__pandas_udf__.map( + data=self, + func=arg, + args=(), + kwargs=kwargs, + decorator=engine, + skip_na=na_action == "ignore", + ).__finalize__(self, method="map") + if callable(arg): arg = functools.partial(arg, **kwargs) new_values = self._map_values(arg, na_action=na_action) diff --git a/pandas/tests/apply/common.py b/pandas/tests/apply/common.py index b4d153df54059..4424ff0666700 100644 --- a/pandas/tests/apply/common.py +++ b/pandas/tests/apply/common.py @@ -1,3 +1,10 @@ +import numpy as np + +from pandas import ( + DataFrame, + Series, +) +from pandas.api.executors import BaseExecutionEngine from pandas.core.groupby.base import transformation_kernels # There is no Series.cumcount or DataFrame.cumcount @@ -5,3 +12,55 @@ x for x in sorted(transformation_kernels) if x != "cumcount" ] frame_transform_kernels = [x for x in sorted(transformation_kernels) if x != "cumcount"] + + +class MockExecutionEngine(BaseExecutionEngine): + """ + Execution Engine to test if the execution engine interface receives and + uses all parameters provided by the user. + + Making this engine work as the default Python engine by calling it, no extra + functionality is implemented here. + + When testing, this will be called when this engine is provided, and then the + same pandas.map and pandas.apply function will be called, but without engine, + executing the default behavior from the python engine. + """ + + def map(data, func, args, kwargs, decorator, skip_na): + kwargs_to_pass = kwargs if isinstance(data, DataFrame) else {} + return data.map( + func, action_na="ignore" if skip_na else False, **kwargs_to_pass + ) + + def apply(data, func, args, kwargs, decorator, axis): + if isinstance(data, Series): + return data.apply(func, convert_dtype=True, args=args, by_row=False) + elif isinstance(data, DataFrame): + return data.apply( + func, + axis=axis, + raw=False, + result_type=None, + args=args, + by_row="compat", + **kwargs, + ) + else: + assert isinstance(data, np.ndarray) + + def wrap_function(func): + # https://github.com/numpy/numpy/issues/8352 + def wrapper(*args, **kwargs): + result = func(*args, **kwargs) + if isinstance(result, str): + result = np.array(result, dtype=object) + return result + + return wrapper + + return np.apply_along_axis(wrap_function(func), axis, data, *args, **kwargs) + + +class MockEngineDecorator: + __pandas_udf__ = MockExecutionEngine diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 2d47cd851ad10..19c39d46a72a5 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -17,63 +17,11 @@ date_range, ) import pandas._testing as tm -from pandas.api.executors import BaseExecutionEngine +from pandas.tests.apply.common import MockEngineDecorator from pandas.tests.frame.common import zip_frames from pandas.util.version import Version -class MockExecutionEngine(BaseExecutionEngine): - """ - Execution Engine to test if the execution engine interface receives and - uses all parameters provided by the user. - - Making this engine work as the default Python engine by calling it, no extra - functionality is implemented here. - - When testing, this will be called when this engine is provided, and then the - same pandas.map and pandas.apply function will be called, but without engine, - executing the default behavior from the python engine. - """ - - def map(data, func, args, kwargs, decorator, skip_na): - kwargs_to_pass = kwargs if isinstance(data, DataFrame) else {} - return data.map( - func, action_na="ignore" if skip_na else False, **kwargs_to_pass - ) - - def apply(data, func, args, kwargs, decorator, axis): - if isinstance(data, Series): - return data.apply(func, convert_dtype=True, args=args, by_row=False) - elif isinstance(data, DataFrame): - return data.apply( - func, - axis=axis, - raw=False, - result_type=None, - args=args, - by_row="compat", - **kwargs, - ) - else: - assert isinstance(data, np.ndarray) - - def wrap_function(func): - # https://github.com/numpy/numpy/issues/8352 - def wrapper(*args, **kwargs): - result = func(*args, **kwargs) - if isinstance(result, str): - result = np.array(result, dtype=object) - return result - - return wrapper - - return np.apply_along_axis(wrap_function(func), axis, data, *args, **kwargs) - - -class MockEngineDecorator: - __pandas_udf__ = MockExecutionEngine - - @pytest.fixture def int_frame_const_col(): """ diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 9541b0b7495c7..896c5c5fca9f7 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -376,13 +376,13 @@ def test_demo(): @pytest.mark.parametrize("func", [str, lambda x: str(x)]) -def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row): +def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row, engine): # test that we are evaluating row-by-row first if by_row="compat" # else vectorized evaluation result = string_series.apply(func, by_row=by_row) if by_row: - expected = string_series.map(func) + expected = string_series.map(func, engine=engine) tm.assert_series_equal(result, expected) else: assert result == str(string_series) From ef62074fb5af851b44a1ee3472187bcfab4fdd1a Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sat, 12 Apr 2025 20:59:38 +0200 Subject: [PATCH 2/9] Add missing file --- pandas/tests/apply/conftest.py | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 pandas/tests/apply/conftest.py diff --git a/pandas/tests/apply/conftest.py b/pandas/tests/apply/conftest.py new file mode 100644 index 0000000000000..1d01a5dc5fd3e --- /dev/null +++ b/pandas/tests/apply/conftest.py @@ -0,0 +1,8 @@ +import pytest + +from pandas.tests.apply.common import MockEngineDecorator + + +@pytest.fixture(params=[None, MockEngineDecorator]) +def engine(request): + return request.param From b5e5519a43d33342a0425edc0a25f9edbc3914a2 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 14 Apr 2025 00:08:51 +0200 Subject: [PATCH 3/9] Fixing bug when executor returns a numpy array --- pandas/core/series.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7e71f83a2e8dc..de019b407fbea 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4425,14 +4425,17 @@ def map( if engine is not None: if not hasattr(engine, "__pandas_udf__"): raise ValueError(f"Not a valid engine: {engine}") - return engine.__pandas_udf__.map( + result = engine.__pandas_udf__.map( data=self, func=arg, args=(), kwargs=kwargs, decorator=engine, skip_na=na_action == "ignore", - ).__finalize__(self, method="map") + ) + if not isinstance(result, Series): + result = Series(result, index=self.index, name=self.name) + return result.__finalize__(self, method="map") if callable(arg): arg = functools.partial(arg, **kwargs) From 30ca3bdb6f054b3a58c87d2ea8be58bca8b4aa35 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Mon, 14 Apr 2025 10:58:39 +0200 Subject: [PATCH 4/9] engine with no function and tests --- pandas/core/series.py | 21 +++++++++----- pandas/tests/series/methods/test_map.py | 38 ++++++++++++++++++++----- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index de019b407fbea..156697ef4284e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4340,13 +4340,16 @@ def map( If 'ignore', propagate NaN values, without passing them to the mapping correspondence. engine : decorator, optional - Choose the execution engine to use. If not provided the function - will be executed by the regular Python interpreter. + Choose the execution engine to use to run the function. Only used for + functions. If ``map`` is called with a mapping or ``Series``, and + exception will be raised. If ``engine`` is not provided the function will + be executed by the regular Python interpreter. - Other options include JIT compilers such as Numba and Bodo, which in some - cases can speed up the execution. To use an executor you can provide - the decorators ``numba.jit``, ``numba.njit`` or ``bodo.jit``. You can - also provide the decorator with parameters, like ``numba.jit(nogit=True)``. + Options include JIT compilers such as Numba, Bodo or Blosc2, which in some + cases can speed up the execution. To use an executor you can provide the + decorators ``numba.jit``, ``numba.njit``, ``bodo.jit`` or ``blosc2.jit``. + You can also provide the decorator with parameters, like + ``numba.jit(nogit=True)``. Not all functions can be executed with all execution engines. In general, JIT compilers will require type stability in the function (no variable @@ -4423,8 +4426,12 @@ def map( dtype: object """ if engine is not None: + if not callable(arg): + raise ValueError( + "The engine argument can only be specified when func is a function" + ) if not hasattr(engine, "__pandas_udf__"): - raise ValueError(f"Not a valid engine: {engine}") + raise ValueError(f"Not a valid engine: {engine!r}") result = engine.__pandas_udf__.map( data=self, func=arg, diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index 84b60a2afe6eb..661efa95fee61 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -32,16 +32,20 @@ def f(x): ser.map(f) -def test_map_callable(datetime_series): +def test_map_callable(datetime_series, engine): with np.errstate(all="ignore"): - tm.assert_series_equal(datetime_series.map(np.sqrt), np.sqrt(datetime_series)) + tm.assert_series_equal( + datetime_series.map(np.sqrt, engine=engine), np.sqrt(datetime_series) + ) # map function element-wise - tm.assert_series_equal(datetime_series.map(math.exp), np.exp(datetime_series)) + tm.assert_series_equal( + datetime_series.map(math.exp, engine=engine), np.exp(datetime_series) + ) # empty series s = Series(dtype=object, name="foo", index=Index([], name="bar")) - rs = s.map(lambda x: x) + rs = s.map(lambda x: x, engine=engine) tm.assert_series_equal(s, rs) # check all metadata (GH 9322) @@ -52,7 +56,7 @@ def test_map_callable(datetime_series): # index but no data s = Series(index=[1, 2, 3], dtype=np.float64) - rs = s.map(lambda x: x) + rs = s.map(lambda x: x, engine=engine) tm.assert_series_equal(s, rs) @@ -269,10 +273,10 @@ def test_map_decimal(string_series): assert isinstance(result.iloc[0], Decimal) -def test_map_na_exclusion(): +def test_map_na_exclusion(engine): s = Series([1.5, np.nan, 3, np.nan, 5]) - result = s.map(lambda x: x * 2, na_action="ignore") + result = s.map(lambda x: x * 2, na_action="ignore", engine=engine) exp = s * 2 tm.assert_series_equal(result, exp) @@ -604,3 +608,23 @@ def test_map_kwargs(): result = Series([2, 4, 5]).map(lambda x, y: x + y, y=2) expected = Series([4, 6, 7]) tm.assert_series_equal(result, expected) + + +def test_map_engine_no_function(): + s = Series([1, 2]) + + with pytest.raises(ValueError, match="engine argument can only be specified"): + s.map({}, engine="something") + + with pytest.raises(ValueError, match="engine argument can only be specified"): + s.map({1: 2}, engine="something") + + with pytest.raises(ValueError, match="engine argument can only be specified"): + s.map(Series([3, 4]), engine="something") + + +def test_map_engine_not_executor(): + s = Series([1, 2]) + + with pytest.raises(ValueError, match="Not a valid engine: 'something'"): + s.map(lambda x: x, engine="something") From 4a3bcfa4e233440b56107ddf570759a07004f3ea Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 21 May 2025 00:03:51 +0200 Subject: [PATCH 5/9] Last fixes --- pandas/core/series.py | 5 ++--- pandas/tests/apply/common.py | 4 +--- pandas/tests/series/methods/test_map.py | 11 +++-------- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index af8d4261109c4..0e6562a0abfe6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4345,7 +4345,7 @@ def map( mapping correspondence. engine : decorator, optional Choose the execution engine to use to run the function. Only used for - functions. If ``map`` is called with a mapping or ``Series``, and + functions. If ``map`` is called with a mapping or ``Series``, an exception will be raised. If ``engine`` is not provided the function will be executed by the regular Python interpreter. @@ -4358,8 +4358,7 @@ def map( Not all functions can be executed with all execution engines. In general, JIT compilers will require type stability in the function (no variable should change data type during the execution). And not all pandas and - NumPy APIs are supported. Check the engine documentation [1]_ and [2]_ - for limitations. + NumPy APIs are supported. Check the engine documentation for limitations. .. versionadded:: 3.0.0 diff --git a/pandas/tests/apply/common.py b/pandas/tests/apply/common.py index 4424ff0666700..e3dc0892f383e 100644 --- a/pandas/tests/apply/common.py +++ b/pandas/tests/apply/common.py @@ -29,9 +29,7 @@ class MockExecutionEngine(BaseExecutionEngine): def map(data, func, args, kwargs, decorator, skip_na): kwargs_to_pass = kwargs if isinstance(data, DataFrame) else {} - return data.map( - func, action_na="ignore" if skip_na else False, **kwargs_to_pass - ) + return data.map(func, na_action="ignore" if skip_na else None, **kwargs_to_pass) def apply(data, func, args, kwargs, decorator, axis): if isinstance(data, Series): diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index a24d1d07e9379..cbab8211e85da 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -634,17 +634,12 @@ def test_map_func_is_none(): Series([1, 2]).map(func=None) -def test_map_engine_no_function(): +@pytest.mark.parametrize("func", [{}, {1: 2}, Series([3, 4])]) +def test_map_engine_no_function(func): s = Series([1, 2]) with pytest.raises(ValueError, match="engine argument can only be specified"): - s.map({}, engine="something") - - with pytest.raises(ValueError, match="engine argument can only be specified"): - s.map({1: 2}, engine="something") - - with pytest.raises(ValueError, match="engine argument can only be specified"): - s.map(Series([3, 4]), engine="something") + s.map(func, engine="something") def test_map_engine_not_executor(): From e838c4c8c72782ba338b0ea56aeda7558be8e996 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 21 May 2025 00:36:36 +0200 Subject: [PATCH 6/9] Fix CI --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0e6562a0abfe6..ba3e8a0e72b2f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4448,7 +4448,7 @@ def map( ) if not hasattr(engine, "__pandas_udf__"): raise ValueError(f"Not a valid engine: {engine!r}") - result = engine.__pandas_udf__.map( + result = engine.__pandas_udf__.map( # type: ignore[attr-defined] data=self, func=func, args=(), From cae63acf4bfacf8cf9414f71fe1169b30ec339bb Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 21 May 2025 01:20:41 +0200 Subject: [PATCH 7/9] Add fixture import back --- pandas/tests/series/methods/test_map.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index cbab8211e85da..c16eba870364c 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -20,6 +20,7 @@ timedelta_range, ) import pandas._testing as tm +from pandas.tests.apply.conftest import engine # noqa: F401 def test_series_map_box_timedelta(): @@ -32,7 +33,7 @@ def f(x): ser.map(f) -def test_map_callable(datetime_series, engine): +def test_map_callable(datetime_series, engine): # noqa: F811 with np.errstate(all="ignore"): tm.assert_series_equal( datetime_series.map(np.sqrt, engine=engine), np.sqrt(datetime_series) @@ -273,7 +274,7 @@ def test_map_decimal(string_series): assert isinstance(result.iloc[0], Decimal) -def test_map_na_exclusion(engine): +def test_map_na_exclusion(engine): # noqa: F811 s = Series([1.5, np.nan, 3, np.nan, 5]) result = s.map(lambda x: x * 2, na_action="ignore", engine=engine) From a4d8b4a20348ff9f8ab72179a659432a6f0b4560 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 27 May 2025 19:14:06 +0200 Subject: [PATCH 8/9] Move mock execution class to conftest --- pandas/tests/apply/common.py | 57 -------------------------- pandas/tests/apply/conftest.py | 57 +++++++++++++++++++++++++- pandas/tests/apply/test_frame_apply.py | 2 +- 3 files changed, 57 insertions(+), 59 deletions(-) diff --git a/pandas/tests/apply/common.py b/pandas/tests/apply/common.py index e3dc0892f383e..b4d153df54059 100644 --- a/pandas/tests/apply/common.py +++ b/pandas/tests/apply/common.py @@ -1,10 +1,3 @@ -import numpy as np - -from pandas import ( - DataFrame, - Series, -) -from pandas.api.executors import BaseExecutionEngine from pandas.core.groupby.base import transformation_kernels # There is no Series.cumcount or DataFrame.cumcount @@ -12,53 +5,3 @@ x for x in sorted(transformation_kernels) if x != "cumcount" ] frame_transform_kernels = [x for x in sorted(transformation_kernels) if x != "cumcount"] - - -class MockExecutionEngine(BaseExecutionEngine): - """ - Execution Engine to test if the execution engine interface receives and - uses all parameters provided by the user. - - Making this engine work as the default Python engine by calling it, no extra - functionality is implemented here. - - When testing, this will be called when this engine is provided, and then the - same pandas.map and pandas.apply function will be called, but without engine, - executing the default behavior from the python engine. - """ - - def map(data, func, args, kwargs, decorator, skip_na): - kwargs_to_pass = kwargs if isinstance(data, DataFrame) else {} - return data.map(func, na_action="ignore" if skip_na else None, **kwargs_to_pass) - - def apply(data, func, args, kwargs, decorator, axis): - if isinstance(data, Series): - return data.apply(func, convert_dtype=True, args=args, by_row=False) - elif isinstance(data, DataFrame): - return data.apply( - func, - axis=axis, - raw=False, - result_type=None, - args=args, - by_row="compat", - **kwargs, - ) - else: - assert isinstance(data, np.ndarray) - - def wrap_function(func): - # https://github.com/numpy/numpy/issues/8352 - def wrapper(*args, **kwargs): - result = func(*args, **kwargs) - if isinstance(result, str): - result = np.array(result, dtype=object) - return result - - return wrapper - - return np.apply_along_axis(wrap_function(func), axis, data, *args, **kwargs) - - -class MockEngineDecorator: - __pandas_udf__ = MockExecutionEngine diff --git a/pandas/tests/apply/conftest.py b/pandas/tests/apply/conftest.py index 1d01a5dc5fd3e..aecf82f5a9419 100644 --- a/pandas/tests/apply/conftest.py +++ b/pandas/tests/apply/conftest.py @@ -1,6 +1,61 @@ +import numpy as np import pytest -from pandas.tests.apply.common import MockEngineDecorator +from pandas import ( + DataFrame, + Series, +) +from pandas.api.executors import BaseExecutionEngine + + +class MockExecutionEngine(BaseExecutionEngine): + """ + Execution Engine to test if the execution engine interface receives and + uses all parameters provided by the user. + + Making this engine work as the default Python engine by calling it, no extra + functionality is implemented here. + + When testing, this will be called when this engine is provided, and then the + same pandas.map and pandas.apply function will be called, but without engine, + executing the default behavior from the python engine. + """ + + def map(data, func, args, kwargs, decorator, skip_na): + kwargs_to_pass = kwargs if isinstance(data, DataFrame) else {} + return data.map(func, na_action="ignore" if skip_na else None, **kwargs_to_pass) + + def apply(data, func, args, kwargs, decorator, axis): + if isinstance(data, Series): + return data.apply(func, convert_dtype=True, args=args, by_row=False) + elif isinstance(data, DataFrame): + return data.apply( + func, + axis=axis, + raw=False, + result_type=None, + args=args, + by_row="compat", + **kwargs, + ) + else: + assert isinstance(data, np.ndarray) + + def wrap_function(func): + # https://github.com/numpy/numpy/issues/8352 + def wrapper(*args, **kwargs): + result = func(*args, **kwargs) + if isinstance(result, str): + result = np.array(result, dtype=object) + return result + + return wrapper + + return np.apply_along_axis(wrap_function(func), axis, data, *args, **kwargs) + + +class MockEngineDecorator: + __pandas_udf__ = MockExecutionEngine @pytest.fixture(params=[None, MockEngineDecorator]) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 34f153e0735b3..a9afb5dbd11d7 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -17,7 +17,7 @@ date_range, ) import pandas._testing as tm -from pandas.tests.apply.common import MockEngineDecorator +from pandas.tests.apply.conftest import MockEngineDecorator from pandas.tests.frame.common import zip_frames from pandas.util.version import Version From 56c3ce0f00ec091998d545a131679e40bf4147af Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Tue, 27 May 2025 19:16:41 +0200 Subject: [PATCH 9/9] Adding commit about imported fixture --- pandas/tests/series/methods/test_map.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index c16eba870364c..0ec973dea23d5 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -20,6 +20,9 @@ timedelta_range, ) import pandas._testing as tm + +# The fixture it's mostly used in pandas/tests/apply, so it's defined in that +# conftest, which is out of scope here. So we need to manually import from pandas.tests.apply.conftest import engine # noqa: F401