From f594d0443348339fae45c2f258841deda0c36192 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 9 Apr 2021 16:12:57 -0700 Subject: [PATCH 1/2] REF: separate 2D only methods from ArrayManager --- pandas/_typing.py | 5 +- pandas/core/groupby/generic.py | 10 +- pandas/core/internals/array_manager.py | 637 +++++++++++++------------ 3 files changed, 339 insertions(+), 313 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 36441e620286d..9f23fcc56597f 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -185,8 +185,11 @@ ] # internals -Manager = Union["ArrayManager", "BlockManager", "SingleBlockManager"] +Manager = Union[ + "ArrayManager", "SingleArrayManager", "BlockManager", "SingleBlockManager" +] SingleManager = Union["SingleArrayManager", "SingleBlockManager"] +Manager2D = Union["ArrayManager", "BlockManager"] # indexing # PositionalIndexer -> valid 1D positional indexer, e.g. can pass diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 5aebad84a0a30..e361440c1a2f4 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -36,7 +36,7 @@ ArrayLike, FrameOrSeries, FrameOrSeriesUnion, - Manager, + Manager2D, ) from pandas.util._decorators import ( Appender, @@ -1095,9 +1095,9 @@ def _cython_agg_general( def _cython_agg_manager( self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 - ) -> Manager: + ) -> Manager2D: - data: Manager = self._get_data_to_aggregate() + data: Manager2D = self._get_data_to_aggregate() if numeric_only: data = data.get_numeric_data(copy=False) @@ -1691,7 +1691,7 @@ def _wrap_frame_output(self, result, obj: DataFrame) -> DataFrame: else: return self.obj._constructor(result, index=obj.index, columns=result_index) - def _get_data_to_aggregate(self) -> Manager: + def _get_data_to_aggregate(self) -> Manager2D: obj = self._obj_with_exclusions if self.axis == 1: return obj.T._mgr @@ -1776,7 +1776,7 @@ def _wrap_transformed_output( return result - def _wrap_agged_manager(self, mgr: Manager) -> DataFrame: + def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: if not self.as_index: index = np.arange(mgr.shape[1]) mgr.set_axis(1, ibase.Index(index), verify_integrity=False) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 5581305a9baea..b0f61993ec653 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -90,10 +90,10 @@ from pandas import Float64Index -T = TypeVar("T", bound="ArrayManager") +T = TypeVar("T", bound="BaseArrayManager") -class ArrayManager(DataManager): +class BaseArrayManager(DataManager): """ Core internal data structure to implement DataFrame and Series. @@ -124,15 +124,7 @@ def __init__( axes: list[Index], verify_integrity: bool = True, ): - # Note: we are storing the axes in "_axes" in the (row, columns) order - # which contrasts the order how it is stored in BlockManager - self._axes = axes - self.arrays = arrays - - if verify_integrity: - self._axes = [ensure_index(ax) for ax in axes] - self.arrays = [ensure_wrapped_if_datetimelike(arr) for arr in arrays] - self._verify_integrity() + raise NotImplementedError def make_empty(self: T, axes=None) -> T: """Return an empty ArrayManager with the items axis of len 0 (no columns)""" @@ -182,7 +174,7 @@ def set_axis( self._axes[axis] = new_labels - def consolidate(self) -> ArrayManager: + def consolidate(self: T) -> T: return self def is_consolidated(self) -> bool: @@ -206,83 +198,6 @@ def __repr__(self) -> str: output += f"\n{arr.dtype}" return output - def _verify_integrity(self) -> None: - n_rows, n_columns = self.shape_proper - if not len(self.arrays) == n_columns: - raise ValueError( - "Number of passed arrays must equal the size of the column Index: " - f"{len(self.arrays)} arrays vs {n_columns} columns." - ) - for arr in self.arrays: - if not len(arr) == n_rows: - raise ValueError( - "Passed arrays should have the same length as the rows Index: " - f"{len(arr)} vs {n_rows} rows" - ) - if not isinstance(arr, (np.ndarray, ExtensionArray)): - raise ValueError( - "Passed arrays should be np.ndarray or ExtensionArray instances, " - f"got {type(arr)} instead" - ) - if not arr.ndim == 1: - raise ValueError( - "Passed arrays should be 1-dimensional, got array with " - f"{arr.ndim} dimensions instead." - ) - - def reduce( - self: T, func: Callable, ignore_failures: bool = False - ) -> tuple[T, np.ndarray]: - """ - Apply reduction function column-wise, returning a single-row ArrayManager. - - Parameters - ---------- - func : reduction function - ignore_failures : bool, default False - Whether to drop columns where func raises TypeError. - - Returns - ------- - ArrayManager - np.ndarray - Indexer of column indices that are retained. - """ - result_arrays: list[np.ndarray] = [] - result_indices: list[int] = [] - for i, arr in enumerate(self.arrays): - try: - res = func(arr, axis=0) - except TypeError: - if not ignore_failures: - raise - else: - # TODO NaT doesn't preserve dtype, so we need to ensure to create - # a timedelta result array if original was timedelta - # what if datetime results in timedelta? (eg std) - if res is NaT and is_timedelta64_ns_dtype(arr.dtype): - result_arrays.append(np.array(["NaT"], dtype="timedelta64[ns]")) - else: - # error: Argument 1 to "append" of "list" has incompatible type - # "ExtensionArray"; expected "ndarray" - result_arrays.append( - sanitize_array([res], None) # type: ignore[arg-type] - ) - result_indices.append(i) - - index = Index._simple_new(np.array([None], dtype=object)) # placeholder - if ignore_failures: - indexer = np.array(result_indices) - columns = self.items[result_indices] - else: - indexer = np.arange(self.shape[0]) - columns = self.items - - # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; - # expected "List[Union[ndarray, ExtensionArray]]" - new_mgr = type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] - return new_mgr, indexer - def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T: """ Apply grouped reduction function columnwise, returning a new ArrayManager. @@ -324,18 +239,6 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T: # expected "List[Union[ndarray, ExtensionArray]]" return type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] - def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager: - """ - Apply array_op blockwise with another (aligned) BlockManager. - """ - # TODO what if `other` is BlockManager ? - left_arrays = self.arrays - right_arrays = other.arrays - result_arrays = [ - array_op(left, right) for left, right in zip(left_arrays, right_arrays) - ] - return type(self)(result_arrays, self._axes) - def apply( self: T, f, @@ -499,29 +402,7 @@ def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T return type(self)(result_arrays, self._axes) - def quantile( - self, - *, - qs: Float64Index, - axis: int = 0, - transposed: bool = False, - interpolation="linear", - ) -> ArrayManager: - - arrs = [ensure_block_shape(x, 2) for x in self.arrays] - assert axis == 1 - new_arrs = [ - quantile_compat(x, np.asarray(qs._values), interpolation) for x in arrs - ] - for i, arr in enumerate(new_arrs): - if arr.ndim == 2: - assert arr.shape[0] == 1, arr.shape - new_arrs[i] = arr[0] - - axes = [qs, self._axes[1]] - return type(self)(new_arrs, axes) - - def where(self, other, cond, align: bool, errors: str) -> ArrayManager: + def where(self: T, other, cond, align: bool, errors: str) -> T: if align: align_keys = ["other", "cond"] else: @@ -554,7 +435,7 @@ def putmask(self, mask, new, align: bool = True): new=new, ) - def diff(self, n: int, axis: int) -> ArrayManager: + def diff(self: T, n: int, axis: int) -> T: if axis == 1: # DataFrame only calls this for n=0, in which case performing it # with axis=0 is equivalent @@ -562,10 +443,10 @@ def diff(self, n: int, axis: int) -> ArrayManager: axis = 0 return self.apply(algos.diff, n=n, axis=axis, stacklevel=5) - def interpolate(self, **kwargs) -> ArrayManager: + def interpolate(self: T, **kwargs) -> T: return self.apply_with_block("interpolate", swap_axis=False, **kwargs) - def shift(self, periods: int, axis: int, fill_value) -> ArrayManager: + def shift(self: T, periods: int, axis: int, fill_value) -> T: if fill_value is lib.no_default: fill_value = None @@ -577,24 +458,24 @@ def shift(self, periods: int, axis: int, fill_value) -> ArrayManager: "shift", periods=periods, axis=axis, fill_value=fill_value ) - def fillna(self, value, limit, inplace: bool, downcast) -> ArrayManager: + def fillna(self: T, value, limit, inplace: bool, downcast) -> T: return self.apply_with_block( "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast ) - def downcast(self) -> ArrayManager: + def downcast(self: T) -> T: return self.apply_with_block("downcast") - def astype(self, dtype, copy: bool = False, errors: str = "raise") -> ArrayManager: + def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors) def convert( - self, + self: T, copy: bool = True, datetime: bool = True, numeric: bool = True, timedelta: bool = True, - ) -> ArrayManager: + ) -> T: def _convert(arr): if is_object_dtype(arr.dtype): return soft_convert_objects( @@ -609,7 +490,7 @@ def _convert(arr): return self.apply(_convert) - def replace(self, value, **kwargs) -> ArrayManager: + def replace(self: T, value, **kwargs) -> T: assert np.ndim(value) == 0, value # TODO "replace" is right now implemented on the blocks, we should move # it to general array algos so it can be reused here @@ -659,14 +540,14 @@ def is_view(self) -> bool: def is_single_block(self) -> bool: return False - def _get_data_subset(self, predicate: Callable) -> ArrayManager: + def _get_data_subset(self: T, predicate: Callable) -> T: indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)] arrays = [self.arrays[i] for i in indices] # TODO copy? new_axes = [self._axes[0], self._axes[1][np.array(indices, dtype="int64")]] return type(self)(arrays, new_axes, verify_integrity=False) - def get_bool_data(self, copy: bool = False) -> ArrayManager: + def get_bool_data(self: T, copy: bool = False) -> T: """ Select columns that are bool-dtype and object-dtype columns that are all-bool. @@ -677,7 +558,7 @@ def get_bool_data(self, copy: bool = False) -> ArrayManager: """ return self._get_data_subset(is_inferred_bool_dtype) - def get_numeric_data(self, copy: bool = False) -> ArrayManager: + def get_numeric_data(self: T, copy: bool = False) -> T: """ Select columns that have a numeric dtype. @@ -783,71 +664,245 @@ def as_array( return result # return arr.transpose() if transpose else arr - def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager: + def reindex_indexer( + self: T, + new_axis, + indexer, + axis: int, + fill_value=None, + allow_dups: bool = False, + copy: bool = True, + # ignored keywords + consolidate: bool = True, + only_slice: bool = False, + ) -> T: axis = self._normalize_axis(axis) + return self._reindex_indexer( + new_axis, indexer, axis, fill_value, allow_dups, copy + ) - if axis == 0: - arrays = [arr[slobj] for arr in self.arrays] - elif axis == 1: - arrays = self.arrays[slobj] - - new_axes = list(self._axes) - new_axes[axis] = new_axes[axis]._getitem_slice(slobj) - - return type(self)(arrays, new_axes, verify_integrity=False) - - def fast_xs(self, loc: int) -> ArrayLike: + def _reindex_indexer( + self: T, + new_axis, + indexer, + axis: int, + fill_value=None, + allow_dups: bool = False, + copy: bool = True, + ) -> T: """ - Return the array corresponding to `frame.iloc[loc]`. - Parameters ---------- - loc : int + new_axis : Index + indexer : ndarray of int64 or None + axis : int + fill_value : object, default None + allow_dups : bool, default False + copy : bool, default True - Returns - ------- - np.ndarray or ExtensionArray + + pandas-indexer with -1's only. """ - dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) + if indexer is None: + if new_axis is self._axes[axis] and not copy: + return self + + result = self.copy(deep=copy) + result._axes = list(self._axes) + result._axes[axis] = new_axis + return result + + # some axes don't allow reindexing with dups + if not allow_dups: + self._axes[axis]._validate_can_reindex(indexer) + + if axis >= self.ndim: + raise IndexError("Requested axis not found in manager") + + if axis == 1: + new_arrays = [] + for i in indexer: + if i == -1: + arr = self._make_na_array(fill_value=fill_value) + else: + arr = self.arrays[i] + new_arrays.append(arr) - values = [arr[loc] for arr in self.arrays] - if isinstance(dtype, ExtensionDtype): - result = dtype.construct_array_type()._from_sequence(values, dtype=dtype) - # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT - elif is_datetime64_ns_dtype(dtype): - result = DatetimeArray._from_sequence(values, dtype=dtype)._data - elif is_timedelta64_ns_dtype(dtype): - result = TimedeltaArray._from_sequence(values, dtype=dtype)._data else: - result = np.array(values, dtype=dtype) - return result + validate_indices(indexer, len(self._axes[0])) + indexer = ensure_platform_int(indexer) + if (indexer == -1).any(): + allow_fill = True + else: + allow_fill = False + new_arrays = [ + take_1d( + arr, + indexer, + allow_fill=allow_fill, + fill_value=fill_value, + # if fill_value is not None else blk.fill_value + ) + for arr in self.arrays + ] - def iget(self, i: int) -> SingleArrayManager: - """ - Return the data as a SingleArrayManager. - """ - values = self.arrays[i] - return SingleArrayManager([values], [self._axes[0]]) + new_axes = list(self._axes) + new_axes[axis] = new_axis - def iget_values(self, i: int) -> ArrayLike: - """ - Return the data for column i as the values (ndarray or ExtensionArray). - """ - return self.arrays[i] + return type(self)(new_arrays, new_axes, verify_integrity=False) - def idelete(self, indexer): + def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T: """ - Delete selected locations in-place (new block and array, same BlockManager) + Take items along any axis. """ - to_keep = np.ones(self.shape[0], dtype=np.bool_) - to_keep[indexer] = False + axis = self._normalize_axis(axis) - self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]] - self._axes = [self._axes[0], self._axes[1][to_keep]] - return self + indexer = ( + np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64") + if isinstance(indexer, slice) + else np.asanyarray(indexer, dtype="int64") + ) - def iset(self, loc: int | slice | np.ndarray, value: ArrayLike): - """ + if not indexer.ndim == 1: + raise ValueError("indexer should be 1-dimensional") + + n = self.shape_proper[axis] + indexer = maybe_convert_indices(indexer, n, verify=verify) + + new_labels = self._axes[axis].take(indexer) + return self._reindex_indexer( + new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True + ) + + def _make_na_array(self, fill_value=None): + if fill_value is None: + fill_value = np.nan + + dtype, fill_value = infer_dtype_from_scalar(fill_value) + # error: Argument "dtype" to "empty" has incompatible type "Union[dtype[Any], + # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, + # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], + # _DTypeDict, Tuple[Any, Any]]]" + values = np.empty(self.shape_proper[0], dtype=dtype) # type: ignore[arg-type] + values.fill(fill_value) + return values + + def _equal_values(self, other) -> bool: + """ + Used in .equals defined in base class. Only check the column values + assuming shape and indexes have already been checked. + """ + for left, right in zip(self.arrays, other.arrays): + if not array_equals(left, right): + return False + else: + return True + + # TODO + # equals + # to_dict + + +class ArrayManager(BaseArrayManager): + ndim = 2 + + def __init__( + self, + arrays: list[np.ndarray | ExtensionArray], + axes: list[Index], + verify_integrity: bool = True, + ): + # Note: we are storing the axes in "_axes" in the (row, columns) order + # which contrasts the order how it is stored in BlockManager + self._axes = axes + self.arrays = arrays + + if verify_integrity: + self._axes = [ensure_index(ax) for ax in axes] + self.arrays = [ensure_wrapped_if_datetimelike(arr) for arr in arrays] + self._verify_integrity() + + def _verify_integrity(self) -> None: + n_rows, n_columns = self.shape_proper + if not len(self.arrays) == n_columns: + raise ValueError( + "Number of passed arrays must equal the size of the column Index: " + f"{len(self.arrays)} arrays vs {n_columns} columns." + ) + for arr in self.arrays: + if not len(arr) == n_rows: + raise ValueError( + "Passed arrays should have the same length as the rows Index: " + f"{len(arr)} vs {n_rows} rows" + ) + if not isinstance(arr, (np.ndarray, ExtensionArray)): + raise ValueError( + "Passed arrays should be np.ndarray or ExtensionArray instances, " + f"got {type(arr)} instead" + ) + if not arr.ndim == 1: + raise ValueError( + "Passed arrays should be 1-dimensional, got array with " + f"{arr.ndim} dimensions instead." + ) + + # -------------------------------------------------------------------- + # Indexing + + def fast_xs(self, loc: int) -> ArrayLike: + """ + Return the array corresponding to `frame.iloc[loc]`. + + Parameters + ---------- + loc : int + + Returns + ------- + np.ndarray or ExtensionArray + """ + dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) + + values = [arr[loc] for arr in self.arrays] + if isinstance(dtype, ExtensionDtype): + result = dtype.construct_array_type()._from_sequence(values, dtype=dtype) + # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT + elif is_datetime64_ns_dtype(dtype): + result = DatetimeArray._from_sequence(values, dtype=dtype)._data + elif is_timedelta64_ns_dtype(dtype): + result = TimedeltaArray._from_sequence(values, dtype=dtype)._data + else: + result = np.array(values, dtype=dtype) + return result + + def get_slice(self, slobj: slice, axis: int = 0) -> ArrayManager: + axis = self._normalize_axis(axis) + + if axis == 0: + arrays = [arr[slobj] for arr in self.arrays] + elif axis == 1: + arrays = self.arrays[slobj] + + new_axes = list(self._axes) + new_axes[axis] = new_axes[axis]._getitem_slice(slobj) + + return type(self)(arrays, new_axes, verify_integrity=False) + + def iget(self, i: int) -> SingleArrayManager: + """ + Return the data as a SingleArrayManager. + """ + values = self.arrays[i] + return SingleArrayManager([values], [self._axes[0]]) + + def iget_values(self, i: int) -> ArrayLike: + """ + Return the data for column i as the values (ndarray or ExtensionArray). + """ + return self.arrays[i] + + def iset(self, loc: int | slice | np.ndarray, value: ArrayLike): + """ Set new column(s). This changes the ArrayManager in-place, but replaces (an) existing @@ -941,139 +996,108 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: self.arrays = arrays self._axes[1] = new_axis - def reindex_indexer( - self: T, - new_axis, - indexer, - axis: int, - fill_value=None, - allow_dups: bool = False, - copy: bool = True, - # ignored keywords - consolidate: bool = True, - only_slice: bool = False, - ) -> T: - axis = self._normalize_axis(axis) - return self._reindex_indexer( - new_axis, indexer, axis, fill_value, allow_dups, copy - ) - - def _reindex_indexer( - self: T, - new_axis, - indexer, - axis: int, - fill_value=None, - allow_dups: bool = False, - copy: bool = True, - ) -> T: + def idelete(self, indexer): """ - Parameters - ---------- - new_axis : Index - indexer : ndarray of int64 or None - axis : int - fill_value : object, default None - allow_dups : bool, default False - copy : bool, default True - - - pandas-indexer with -1's only. + Delete selected locations in-place (new block and array, same BlockManager) """ - if indexer is None: - if new_axis is self._axes[axis] and not copy: - return self + to_keep = np.ones(self.shape[0], dtype=np.bool_) + to_keep[indexer] = False - result = self.copy(deep=copy) - result._axes = list(self._axes) - result._axes[axis] = new_axis - return result + self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]] + self._axes = [self._axes[0], self._axes[1][to_keep]] + return self - # some axes don't allow reindexing with dups - if not allow_dups: - self._axes[axis]._validate_can_reindex(indexer) + # -------------------------------------------------------------------- + # Array-wise Operation - if axis >= self.ndim: - raise IndexError("Requested axis not found in manager") + def reduce( + self: T, func: Callable, ignore_failures: bool = False + ) -> tuple[T, np.ndarray]: + """ + Apply reduction function column-wise, returning a single-row ArrayManager. - if axis == 1: - new_arrays = [] - for i in indexer: - if i == -1: - arr = self._make_na_array(fill_value=fill_value) - else: - arr = self.arrays[i] - new_arrays.append(arr) + Parameters + ---------- + func : reduction function + ignore_failures : bool, default False + Whether to drop columns where func raises TypeError. - else: - validate_indices(indexer, len(self._axes[0])) - indexer = ensure_platform_int(indexer) - if (indexer == -1).any(): - allow_fill = True + Returns + ------- + ArrayManager + np.ndarray + Indexer of column indices that are retained. + """ + result_arrays: list[np.ndarray] = [] + result_indices: list[int] = [] + for i, arr in enumerate(self.arrays): + try: + res = func(arr, axis=0) + except TypeError: + if not ignore_failures: + raise else: - allow_fill = False - new_arrays = [ - take_1d( - arr, - indexer, - allow_fill=allow_fill, - fill_value=fill_value, - # if fill_value is not None else blk.fill_value - ) - for arr in self.arrays - ] + # TODO NaT doesn't preserve dtype, so we need to ensure to create + # a timedelta result array if original was timedelta + # what if datetime results in timedelta? (eg std) + if res is NaT and is_timedelta64_ns_dtype(arr.dtype): + result_arrays.append(np.array(["NaT"], dtype="timedelta64[ns]")) + else: + # error: Argument 1 to "append" of "list" has incompatible type + # "ExtensionArray"; expected "ndarray" + result_arrays.append( + sanitize_array([res], None) # type: ignore[arg-type] + ) + result_indices.append(i) - new_axes = list(self._axes) - new_axes[axis] = new_axis + index = Index._simple_new(np.array([None], dtype=object)) # placeholder + if ignore_failures: + indexer = np.array(result_indices) + columns = self.items[result_indices] + else: + indexer = np.arange(self.shape[0]) + columns = self.items - return type(self)(new_arrays, new_axes, verify_integrity=False) + # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; + # expected "List[Union[ndarray, ExtensionArray]]" + new_mgr = type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] + return new_mgr, indexer - def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T: + def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager: """ - Take items along any axis. + Apply array_op blockwise with another (aligned) BlockManager. """ - axis = self._normalize_axis(axis) - - indexer = ( - np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64") - if isinstance(indexer, slice) - else np.asanyarray(indexer, dtype="int64") - ) - - if not indexer.ndim == 1: - raise ValueError("indexer should be 1-dimensional") - - n = self.shape_proper[axis] - indexer = maybe_convert_indices(indexer, n, verify=verify) + # TODO what if `other` is BlockManager ? + left_arrays = self.arrays + right_arrays = other.arrays + result_arrays = [ + array_op(left, right) for left, right in zip(left_arrays, right_arrays) + ] + return type(self)(result_arrays, self._axes) - new_labels = self._axes[axis].take(indexer) - return self._reindex_indexer( - new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True - ) + def quantile( + self, + *, + qs: Float64Index, + axis: int = 0, + transposed: bool = False, + interpolation="linear", + ) -> ArrayManager: - def _make_na_array(self, fill_value=None): - if fill_value is None: - fill_value = np.nan + arrs = [ensure_block_shape(x, 2) for x in self.arrays] + assert axis == 1 + new_arrs = [ + quantile_compat(x, np.asarray(qs._values), interpolation) for x in arrs + ] + for i, arr in enumerate(new_arrs): + if arr.ndim == 2: + assert arr.shape[0] == 1, arr.shape + new_arrs[i] = arr[0] - dtype, fill_value = infer_dtype_from_scalar(fill_value) - # error: Argument "dtype" to "empty" has incompatible type "Union[dtype[Any], - # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, - # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], - # _DTypeDict, Tuple[Any, Any]]]" - values = np.empty(self.shape_proper[0], dtype=dtype) # type: ignore[arg-type] - values.fill(fill_value) - return values + axes = [qs, self._axes[1]] + return type(self)(new_arrs, axes) - def _equal_values(self, other) -> bool: - """ - Used in .equals defined in base class. Only check the column values - assuming shape and indexes have already been checked. - """ - for left, right in zip(self.arrays, other.arrays): - if not array_equals(left, right): - return False - else: - return True + # ---------------------------------------------------------------- def unstack(self, unstacker, fill_value) -> ArrayManager: """ @@ -1117,12 +1141,8 @@ def unstack(self, unstacker, fill_value) -> ArrayManager: return type(self)(new_arrays, new_axes, verify_integrity=False) - # TODO - # equals - # to_dict - -class SingleArrayManager(ArrayManager, SingleDataManager): +class SingleArrayManager(BaseArrayManager, SingleDataManager): __slots__ = [ "_axes", # private attribute, because 'axes' has different order, see below @@ -1222,6 +1242,9 @@ def is_single_block(self) -> bool: def _consolidate_check(self): pass + def fast_xs(self, loc: int) -> ArrayLike: + raise NotImplementedError("Use series._values[loc] instead") + def get_slice(self, slobj: slice, axis: int = 0) -> SingleArrayManager: if axis >= self.ndim: raise IndexError("Requested axis not found in manager") @@ -1256,7 +1279,7 @@ def idelete(self, indexer) -> SingleArrayManager: self._axes = [self._axes[0][to_keep]] return self - def _get_data_subset(self, predicate: Callable) -> ArrayManager: + def _get_data_subset(self, predicate: Callable) -> SingleArrayManager: # used in get_numeric_data / get_bool_data if predicate(self.array): return type(self)(self.arrays, self._axes, verify_integrity=False) From 8d584672c8631df244b7245c33b8b39b3520c462 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 12 Apr 2021 09:02:20 -0700 Subject: [PATCH 2/2] post-merge fixup --- pandas/core/internals/array_manager.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 78d1ca61e834e..8c9902d330eee 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -678,10 +678,18 @@ def reindex_indexer( # ignored keywords consolidate: bool = True, only_slice: bool = False, + # ArrayManager specific keywords + use_na_proxy: bool = False, ) -> T: axis = self._normalize_axis(axis) return self._reindex_indexer( - new_axis, indexer, axis, fill_value, allow_dups, copy + new_axis, + indexer, + axis, + fill_value, + allow_dups, + copy, + use_na_proxy, ) def _reindex_indexer( @@ -692,6 +700,7 @@ def _reindex_indexer( fill_value=None, allow_dups: bool = False, copy: bool = True, + use_na_proxy: bool = False, ) -> T: """ Parameters @@ -726,7 +735,9 @@ def _reindex_indexer( new_arrays = [] for i in indexer: if i == -1: - arr = self._make_na_array(fill_value=fill_value) + arr = self._make_na_array( + fill_value=fill_value, use_na_proxy=use_na_proxy + ) else: arr = self.arrays[i] new_arrays.append(arr)