diff --git a/pandas/_libs/internals.pyi b/pandas/_libs/internals.pyi index 446ee299698c5..d35d414aaa012 100644 --- a/pandas/_libs/internals.pyi +++ b/pandas/_libs/internals.pyi @@ -6,7 +6,10 @@ from typing import ( import numpy as np -from pandas._typing import ArrayLike +from pandas._typing import ( + ArrayLike, + T, +) def slice_len(slc: slice, objlen: int = ...) -> int: ... @@ -50,9 +53,16 @@ class BlockPlacement: def append(self, others: list[BlockPlacement]) -> BlockPlacement: ... -class Block: +class SharedBlock: _mgr_locs: BlockPlacement ndim: int values: ArrayLike def __init__(self, values: ArrayLike, placement: BlockPlacement, ndim: int): ... + +class NumpyBlock(SharedBlock): + values: np.ndarray + def getitem_block_index(self: T, slicer: slice) -> T: ... + +class Block(SharedBlock): + ... diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 4295db9d1613c..d7c5882e92f97 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -458,14 +458,13 @@ def get_blkno_placements(blknos, group: bool = True): @cython.freelist(64) -cdef class Block: +cdef class SharedBlock: """ Defining __init__ in a cython class significantly improves performance. """ cdef: public BlockPlacement _mgr_locs readonly int ndim - public object values def __cinit__(self, values, placement: BlockPlacement, ndim: int): """ @@ -479,7 +478,6 @@ cdef class Block: """ self._mgr_locs = placement self.ndim = ndim - self.values = values cpdef __reduce__(self): # We have to do some gymnastics b/c "ndim" is keyword-only @@ -505,3 +503,33 @@ cdef class Block: ndim = maybe_infer_ndim(self.values, self.mgr_locs) self.ndim = ndim + + +cdef class NumpyBlock(SharedBlock): + cdef: + public ndarray values + + def __cinit__(self, ndarray values, BlockPlacement placement, int ndim): + # set values here the (implicit) call to SharedBlock.__cinit__ will + # set placement and ndim + self.values = values + + # @final # not useful in cython, but we _would_ annotate with @final + def getitem_block_index(self, slicer: slice) -> NumpyBlock: + """ + Perform __getitem__-like specialized to slicing along index. + + Assumes self.ndim == 2 + """ + new_values = self.values[..., slicer] + return type(self)(new_values, self._mgr_locs, ndim=self.ndim) + + +cdef class Block(SharedBlock): + cdef: + public object values + + def __cinit__(self, object values, BlockPlacement placement, int ndim): + # set values here the (implicit) call to SharedBlock.__cinit__ will + # set placement and ndim + self.values = values diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b18c25b8092a2..b267c40bd9f63 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -5,7 +5,7 @@ from typing import ( TYPE_CHECKING, Any, - Union, + Callable, cast, ) import warnings @@ -138,7 +138,7 @@ def newfunc(self, *args, **kwargs) -> list[Block]: return cast(F, newfunc) -class Block(libinternals.Block, PandasObject): +class Block(PandasObject): """ Canonical n-dimensional unit of homogeneous dtype contained in a pandas data structure @@ -147,6 +147,8 @@ class Block(libinternals.Block, PandasObject): """ values: np.ndarray | ExtensionArray + ndim: int + __init__: Callable __slots__ = () is_numeric = False @@ -313,7 +315,6 @@ def getitem_block(self, slicer) -> Block: return type(self)(new_values, new_mgr_locs, self.ndim) - @final def getitem_block_index(self, slicer: slice) -> Block: """ Perform __getitem__-like specialized to slicing along index. @@ -1371,7 +1372,7 @@ def interpolate( return self.make_block_same_class(new_values) -class ExtensionBlock(EABackedBlock): +class ExtensionBlock(libinternals.Block, EABackedBlock): """ Block for holding extension types. @@ -1660,7 +1661,13 @@ def _unstack(self, unstacker, fill_value, new_placement): return blocks, mask -class NumericBlock(Block): +class NumpyBlock(libinternals.NumpyBlock, Block): + values: np.ndarray + + getitem_block_index = libinternals.NumpyBlock.getitem_block_index + + +class NumericBlock(NumpyBlock): __slots__ = () is_numeric = True @@ -1771,16 +1778,15 @@ def fillna( return [self.make_block_same_class(values=new_values)] -class DatetimeLikeBlock(NDArrayBackedExtensionBlock): - """Mixin class for DatetimeLikeBlock, DatetimeTZBlock.""" +class DatetimeLikeBlock(libinternals.Block, NDArrayBackedExtensionBlock): + """Block for datetime64[ns], timedelta64[ns].""" __slots__ = () is_numeric = False - values: DatetimeArray | TimedeltaArray -class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlock): +class DatetimeTZBlock(ExtensionBlock, NDArrayBackedExtensionBlock): """ implement a datetime64 block with a tz attribute """ values: DatetimeArray @@ -1794,18 +1800,15 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlock): putmask = NDArrayBackedExtensionBlock.putmask fillna = NDArrayBackedExtensionBlock.fillna - # error: Incompatible types in assignment (expression has type - # "Callable[[NDArrayBackedExtensionBlock], bool]", base class "ExtensionBlock" - # defined the type as "bool") [assignment] - is_view = NDArrayBackedExtensionBlock.is_view # type: ignore[assignment] + get_values = NDArrayBackedExtensionBlock.get_values + + is_view = NDArrayBackedExtensionBlock.is_view -class ObjectBlock(Block): +class ObjectBlock(NumpyBlock): __slots__ = () is_object = True - values: np.ndarray - @maybe_split def reduce(self, func, ignore_failures: bool = False) -> list[Block]: """ @@ -2030,7 +2033,7 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023 # block.shape is incorrect for "2D" ExtensionArrays # We can't, and don't need to, reshape. - values = cast(Union[np.ndarray, DatetimeArray, TimedeltaArray], values) + values = cast("np.ndarray | DatetimeArray | TimedeltaArray", values) values = values.reshape(1, -1) return values