diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 9d3437fe08b24..d0c6a1a841edb 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -322,6 +322,7 @@ def agg_list_like(self) -> FrameOrSeriesUnion: # i.e. obj is Series or DataFrame selected_obj = obj elif obj._selected_obj.ndim == 1: + # For SeriesGroupBy this matches _obj_with_exclusions selected_obj = obj._selected_obj else: selected_obj = obj._obj_with_exclusions diff --git a/pandas/core/base.py b/pandas/core/base.py index e45c4bf514973..ff94c833d5268 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -180,16 +180,6 @@ class SelectionMixin(Generic[FrameOrSeries]): _internal_names = ["_cache", "__setstate__"] _internal_names_set = set(_internal_names) - @property - def _selection_name(self): - """ - Return a name for myself; - - This would ideally be called the 'name' property, - but we cannot conflict with the Series.name property which can be set. - """ - return self._selection - @final @property def _selection_list(self): diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c5d9144893f48..4fff12d45af7d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -11,7 +11,6 @@ abc, namedtuple, ) -import copy from functools import partial from textwrap import dedent from typing import ( @@ -169,18 +168,6 @@ class SeriesGroupBy(GroupBy[Series]): def _iterate_slices(self) -> Iterable[Series]: yield self._selected_obj - @property - def _selection_name(self): - """ - since we are a series, we by definition only have - a single name, but may be the result of a selection or - the name of our object - """ - if self._selection is None: - return self.obj.name - else: - return self._selection - _agg_examples_doc = dedent( """ Examples @@ -314,15 +301,9 @@ def _aggregate_multiple_funcs(self, arg) -> DataFrame: results: dict[base.OutputKey, FrameOrSeriesUnion] = {} for idx, (name, func) in enumerate(arg): - obj = self - # reset the cache so that we - # only include the named selection - if name in self._selected_obj: - obj = copy.copy(obj) - obj._reset_cache() - obj._selection = name - results[base.OutputKey(label=name, position=idx)] = obj.aggregate(func) + key = base.OutputKey(label=name, position=idx) + results[key] = self.aggregate(func) if any(isinstance(x, DataFrame) for x in results.values()): from pandas import concat @@ -464,7 +445,7 @@ def _wrap_applied_output( # GH #6265 return self.obj._constructor( [], - name=self._selection_name, + name=self.obj.name, index=self.grouper.result_index, dtype=data.dtype, ) @@ -485,14 +466,14 @@ def _get_index() -> Index: # if self.observed is False, # keep all-NaN rows created while re-indexing res_ser = res_df.stack(dropna=self.observed) - res_ser.name = self._selection_name + res_ser.name = self.obj.name return res_ser elif isinstance(values[0], (Series, DataFrame)): return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) else: # GH #6265 #24880 result = self.obj._constructor( - data=values, index=_get_index(), name=self._selection_name + data=values, index=_get_index(), name=self.obj.name ) return self._reindex_output(result) @@ -550,7 +531,7 @@ def _transform_general(self, func: Callable, *args, **kwargs) -> Series: Transform with a callable func`. """ assert callable(func) - klass = type(self._selected_obj) + klass = type(self.obj) results = [] for name, group in self: @@ -572,8 +553,10 @@ def _transform_general(self, func: Callable, *args, **kwargs) -> Series: else: result = self.obj._constructor(dtype=np.float64) - result.name = self._selected_obj.name - return result + result.name = self.obj.name + # error: Incompatible return value type (got "Union[DataFrame, Series]", + # expected "Series") + return result # type: ignore[return-value] def _can_use_transform_fast(self, result) -> bool: return True @@ -693,7 +676,7 @@ def nunique(self, dropna: bool = True) -> Series: res, out = np.zeros(len(ri), dtype=out.dtype), res res[ids[idx]] = out - result = self.obj._constructor(res, index=ri, name=self._selection_name) + result = self.obj._constructor(res, index=ri, name=self.obj.name) return self._reindex_output(result, fill_value=0) @doc(Series.describe) @@ -799,7 +782,7 @@ def apply_series_value_counts(): levels = [ping.group_index for ping in self.grouper.groupings] + [ lev # type: ignore[list-item] ] - names = self.grouper.names + [self._selection_name] + names = self.grouper.names + [self.obj.name] if dropna: mask = codes[-1] != -1 @@ -855,7 +838,7 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray: if is_integer_dtype(out.dtype): out = ensure_int64(out) - return self.obj._constructor(out, index=mi, name=self._selection_name) + return self.obj._constructor(out, index=mi, name=self.obj.name) def count(self) -> Series: """ @@ -876,7 +859,7 @@ def count(self) -> Series: result = self.obj._constructor( out, index=self.grouper.result_index, - name=self._selection_name, + name=self.obj.name, dtype="int64", ) return self._reindex_output(result, fill_value=0) @@ -1048,7 +1031,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) if isinstance(sobj, Series): # GH#35246 test_groupby_as_index_select_column_sum_empty_df - result.columns = [self._selected_obj.name] + result.columns = [sobj.name] else: # select everything except for the last level, which is the one # containing the name of the function(s), see GH#32040 @@ -1174,11 +1157,11 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same=False): # TODO: sure this is right? we used to do this # after raising AttributeError above return self.obj._constructor_sliced( - values, index=key_index, name=self._selection_name + values, index=key_index, name=self._selection ) elif not isinstance(first_not_none, Series): # values are not series or array-like but scalars - # self._selection_name not passed through to Series as the + # self._selection not passed through to Series as the # result should not take the name of original selection # of columns if self.as_index: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 2510fcaa84f1c..2091d2fc484e1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1052,9 +1052,10 @@ def reset_identity(values): values = reset_identity(values) result = concat(values, axis=self.axis) - if isinstance(result, Series) and self._selection_name is not None: + name = self.obj.name if self.obj.ndim == 1 else self._selection + if isinstance(result, Series) and name is not None: - result.name = self._selection_name + result.name = name return result