From c4590a37f3eb662fbc44f5a915d90646943db1ff Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Thu, 2 Apr 2020 05:08:06 +0300 Subject: [PATCH 1/4] Fixed examples in `pandas/core/groupby/` --- ci/code_checks.sh | 4 +-- pandas/core/groupby/generic.py | 18 ++++++----- pandas/core/groupby/groupby.py | 6 ++-- pandas/core/groupby/grouper.py | 56 ++++++++++++++++++++++++++++------ 4 files changed, 62 insertions(+), 22 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index c8d08277e9a26..ae7b30b1b9cc7 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -274,8 +274,8 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pytest -q --doctest-modules pandas/core/series.py RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests groupby.py' ; echo $MSG - pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe" + MSG='Doctests groupby' ; echo $MSG + pytest -q --doctest-modules pandas/core/groupby/ RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests tools' ; echo $MSG diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 093c925acbc49..27442996f22c6 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -833,10 +833,13 @@ class DataFrameGroupBy(GroupBy): """ Examples -------- - - >>> df = pd.DataFrame({'A': [1, 1, 2, 2], - ... 'B': [1, 2, 3, 4], - ... 'C': np.random.randn(4)}) + >>> df = pd.DataFrame( + ... { + ... "A": [1, 1, 2, 2], + ... "B": [1, 2, 3, 4], + ... "C": [0.362838, 0.227877, 1.267767, -0.562860], + ... } + ... ) >>> df A B C @@ -876,7 +879,7 @@ class DataFrameGroupBy(GroupBy): B C min max sum A - 1 1 2 0.590716 + 1 1 2 0.590715 2 3 4 0.704907 To control the output names with different aggregations per column, @@ -887,8 +890,9 @@ class DataFrameGroupBy(GroupBy): ... c_sum=pd.NamedAgg(column="C", aggfunc="sum")) b_min c_sum A - 1 1 -1.956929 - 2 3 -0.322183 + 1 1 0.590715 + 2 3 0.704907 + - The keywords are the *output* column names - The values are tuples whose first element is the column to select diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index dff712ee17ea6..267b2083dc6a0 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -198,14 +198,14 @@ class providing the base-class of operations. functions that expect Series, DataFrames, GroupBy or Resampler objects. Instead of writing ->>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c) +>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c) # doctest: +SKIP You can write >>> (df.groupby('group') ... .pipe(f) ... .pipe(g, arg1=a) -... .pipe(h, arg2=b, arg3=c)) +... .pipe(h, arg2=b, arg3=c)) # doctest: +SKIP which is much more readable. @@ -2005,7 +2005,7 @@ def cumcount(self, ascending: bool = True): Essentially this is equivalent to - >>> self.apply(lambda x: pd.Series(np.arange(len(x)), x.index)) + self.apply(lambda x: pd.Series(np.arange(len(x)), x.index)) Parameters ---------- diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 2f50845fda4dc..a734afdd75b1b 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -79,16 +79,52 @@ class Grouper: -------- Syntactic sugar for ``df.groupby('A')`` - >>> df.groupby(Grouper(key='A')) - - Specify a resample operation on the column 'date' - - >>> df.groupby(Grouper(key='date', freq='60s')) - - Specify a resample operation on the level 'date' on the columns axis - with a frequency of 60s - - >>> df.groupby(Grouper(level='date', freq='60s', axis=1)) + >>> df = pd.DataFrame( + ... { + ... "Animal": ["Falcon", "Parrot", "Falcon", "Falcon", "Parrot"], + ... "Speed": [100, 5, 200, 300, 15], + ... } + ... ) + >>> df + Animal Speed + 0 Falcon 100 + 1 Parrot 5 + 2 Falcon 200 + 3 Falcon 300 + 4 Parrot 15 + >>> df.groupby(pd.Grouper(key="Animal")).mean() + Speed + Animal + Falcon 200 + Parrot 10 + + + Specify a resample operation on the column 'Publish date' + + >>> df = pd.DataFrame( + ... { + ... "Publish date": [ + ... pd.Timestamp("2000-01-02"), + ... pd.Timestamp("2000-01-02"), + ... pd.Timestamp("2000-01-09"), + ... pd.Timestamp("2000-01-16") + ... ], + ... "ID": [0, 1, 2, 3], + ... "Price": [10, 20, 30, 40] + ... } + ... ) + >>> df + Publish date ID Price + 0 2000-01-02 0 10 + 1 2000-01-02 1 20 + 2 2000-01-09 2 30 + 3 2000-01-16 3 40 + >>> df.groupby(pd.Grouper(key="Publish date", freq="1W")).mean() + ID Price + Publish date + 2000-01-02 0.5 15.0 + 2000-01-09 2.0 30.0 + 2000-01-16 3.0 40.0 """ _attributes: Tuple[str, ...] = ("key", "level", "freq", "axis", "sort") From 9d2930d540a23e906eb053d74040efa726e3b59c Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Thu, 2 Apr 2020 14:07:45 +0300 Subject: [PATCH 2/4] Fixed CI issues --- pandas/core/groupby/grouper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index a734afdd75b1b..9bd098d1d49a3 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -98,7 +98,6 @@ class Grouper: Falcon 200 Parrot 10 - Specify a resample operation on the column 'Publish date' >>> df = pd.DataFrame( From 1fad1fbae50db00137eb1573d98be26936668534 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Fri, 3 Apr 2020 17:08:33 +0300 Subject: [PATCH 3/4] Moved doctest check to the correct place --- ci/code_checks.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 83daa1e73911d..0b01838720394 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -284,10 +284,6 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pytest -q --doctest-modules pandas/core/generic.py RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests groupby' ; echo $MSG - pytest -q --doctest-modules pandas/core/groupby/ - RET=$(($RET + $?)) ; echo $MSG "DONE" - MSG='Doctests series.py' ; echo $MSG pytest -q --doctest-modules pandas/core/series.py RET=$(($RET + $?)) ; echo $MSG "DONE" @@ -306,6 +302,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pytest -q --doctest-modules pandas/core/dtypes/ RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Doctests groupby' ; echo $MSG + pytest -q --doctest-modules pandas/core/groupby/ + RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Doctests indexes' ; echo $MSG pytest -q --doctest-modules pandas/core/indexes/ RET=$(($RET + $?)) ; echo $MSG "DONE" From 4f1b138f4e35fdbd0766e40d8c54bbe9a71f1e71 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Fri, 3 Apr 2020 19:16:10 +0300 Subject: [PATCH 4/4] Displaying text as code block XREF: https://github.com/pandas-dev/pandas/pull/33230#discussion_r403076394 --- pandas/core/groupby/groupby.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 267b2083dc6a0..23fcf2de5bbe2 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2005,6 +2005,8 @@ def cumcount(self, ascending: bool = True): Essentially this is equivalent to + .. code-block:: python + self.apply(lambda x: pd.Series(np.arange(len(x)), x.index)) Parameters