diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 0454150f61045..6f14c8183f0c4 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -288,10 +288,6 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pytest -q --doctest-modules pandas/core/generic.py RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests groupby.py' ; echo $MSG - pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe" - RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests series.py' ; echo $MSG pytest -q --doctest-modules pandas/core/series.py RET=$(($RET + $?)) ; echo $MSG "DONE" @@ -314,6 +310,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pytest -q --doctest-modules pandas/core/dtypes/ RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Doctests groupby' ; echo $MSG + pytest -q --doctest-modules pandas/core/groupby/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Doctests indexes' ; echo $MSG pytest -q --doctest-modules pandas/core/indexes/ RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 208cbfc5b06d6..91839d8393f8c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -833,10 +833,13 @@ class DataFrameGroupBy(GroupBy[DataFrame]): """ Examples -------- - - >>> df = pd.DataFrame({'A': [1, 1, 2, 2], - ... 'B': [1, 2, 3, 4], - ... 'C': np.random.randn(4)}) + >>> df = pd.DataFrame( + ... { + ... "A": [1, 1, 2, 2], + ... "B": [1, 2, 3, 4], + ... "C": [0.362838, 0.227877, 1.267767, -0.562860], + ... } + ... ) >>> df A B C @@ -876,7 +879,7 @@ class DataFrameGroupBy(GroupBy[DataFrame]): B C min max sum A - 1 1 2 0.590716 + 1 1 2 0.590715 2 3 4 0.704907 To control the output names with different aggregations per column, @@ -887,8 +890,9 @@ class DataFrameGroupBy(GroupBy[DataFrame]): ... c_sum=pd.NamedAgg(column="C", aggfunc="sum")) b_min c_sum A - 1 1 -1.956929 - 2 3 -0.322183 + 1 1 0.590715 + 2 3 0.704907 + - The keywords are the *output* column names - The values are tuples whose first element is the column to select diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1474e173b4f8c..ac5bdfe1ba042 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -200,14 +200,14 @@ class providing the base-class of operations. functions that expect Series, DataFrames, GroupBy or Resampler objects. Instead of writing ->>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c) +>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c) # doctest: +SKIP You can write >>> (df.groupby('group') ... .pipe(f) ... .pipe(g, arg1=a) -... .pipe(h, arg2=b, arg3=c)) +... .pipe(h, arg2=b, arg3=c)) # doctest: +SKIP which is much more readable. @@ -2011,7 +2011,9 @@ def cumcount(self, ascending: bool = True): Essentially this is equivalent to - >>> self.apply(lambda x: pd.Series(np.arange(len(x)), x.index)) + .. code-block:: python + + self.apply(lambda x: pd.Series(np.arange(len(x)), x.index)) Parameters ---------- diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 2f50845fda4dc..9bd098d1d49a3 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -79,16 +79,51 @@ class Grouper: -------- Syntactic sugar for ``df.groupby('A')`` - >>> df.groupby(Grouper(key='A')) - - Specify a resample operation on the column 'date' - - >>> df.groupby(Grouper(key='date', freq='60s')) - - Specify a resample operation on the level 'date' on the columns axis - with a frequency of 60s - - >>> df.groupby(Grouper(level='date', freq='60s', axis=1)) + >>> df = pd.DataFrame( + ... { + ... "Animal": ["Falcon", "Parrot", "Falcon", "Falcon", "Parrot"], + ... "Speed": [100, 5, 200, 300, 15], + ... } + ... ) + >>> df + Animal Speed + 0 Falcon 100 + 1 Parrot 5 + 2 Falcon 200 + 3 Falcon 300 + 4 Parrot 15 + >>> df.groupby(pd.Grouper(key="Animal")).mean() + Speed + Animal + Falcon 200 + Parrot 10 + + Specify a resample operation on the column 'Publish date' + + >>> df = pd.DataFrame( + ... { + ... "Publish date": [ + ... pd.Timestamp("2000-01-02"), + ... pd.Timestamp("2000-01-02"), + ... pd.Timestamp("2000-01-09"), + ... pd.Timestamp("2000-01-16") + ... ], + ... "ID": [0, 1, 2, 3], + ... "Price": [10, 20, 30, 40] + ... } + ... ) + >>> df + Publish date ID Price + 0 2000-01-02 0 10 + 1 2000-01-02 1 20 + 2 2000-01-09 2 30 + 3 2000-01-16 3 40 + >>> df.groupby(pd.Grouper(key="Publish date", freq="1W")).mean() + ID Price + Publish date + 2000-01-02 0.5 15.0 + 2000-01-09 2.0 30.0 + 2000-01-16 3.0 40.0 """ _attributes: Tuple[str, ...] = ("key", "level", "freq", "axis", "sort")