pandas-dev · WillAyd · Apr 10, 2020 · Apr 2, 2020 · Apr 2, 2020 · Apr 3, 2020
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -288,10 +288,6 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
     pytest -q --doctest-modules pandas/core/generic.py
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
-    MSG='Doctests groupby.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/groupby/groupby.py -k"-cumcount -describe -pipe"
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
     MSG='Doctests series.py' ; echo $MSG
     pytest -q --doctest-modules pandas/core/series.py
     RET=$(($RET + $?)) ; echo $MSG "DONE"
@@ -314,6 +310,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
     pytest -q --doctest-modules pandas/core/dtypes/
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Doctests groupby' ; echo $MSG
+    pytest -q --doctest-modules pandas/core/groupby/
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     MSG='Doctests indexes' ; echo $MSG
     pytest -q --doctest-modules pandas/core/indexes/
     RET=$(($RET + $?)) ; echo $MSG "DONE"

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -833,10 +833,13 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
         """
     Examples
     --------
-
-    >>> df = pd.DataFrame({'A': [1, 1, 2, 2],
-    ...                    'B': [1, 2, 3, 4],
-    ...                    'C': np.random.randn(4)})
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "A": [1, 1, 2, 2],
+    ...         "B": [1, 2, 3, 4],
+    ...         "C": [0.362838, 0.227877, 1.267767, -0.562860],
+    ...     }
+    ... )
 
     >>> df
        A  B         C
@@ -876,7 +879,7 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
         B             C
       min max       sum
     A
-    1   1   2  0.590716
+    1   1   2  0.590715
     2   3   4  0.704907
 
     To control the output names with different aggregations per column,
@@ -887,8 +890,9 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
     ...     c_sum=pd.NamedAgg(column="C", aggfunc="sum"))
        b_min     c_sum
     A
-    1      1 -1.956929
-    2      3 -0.322183
+    1      1  0.590715
+    2      3  0.704907
+
 
     - The keywords are the *output* column names
     - The values are tuples whose first element is the column to select

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -200,14 +200,14 @@ class providing the base-class of operations.
 functions that expect Series, DataFrames, GroupBy or Resampler objects.
 Instead of writing
 
->>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c)
+>>> h(g(f(df.groupby('group')), arg1=a), arg2=b, arg3=c)  # doctest: +SKIP
 
 You can write
 
 >>> (df.groupby('group')
 ...    .pipe(f)
 ...    .pipe(g, arg1=a)
-...    .pipe(h, arg2=b, arg3=c))
+...    .pipe(h, arg2=b, arg3=c))  # doctest: +SKIP
 
 which is much more readable.
 
@@ -2011,7 +2011,9 @@ def cumcount(self, ascending: bool = True):
 
         Essentially this is equivalent to
 
-        >>> self.apply(lambda x: pd.Series(np.arange(len(x)), x.index))
+        .. code-block:: python
+
+            self.apply(lambda x: pd.Series(np.arange(len(x)), x.index))
     .. code-block:: python 
         def __init__(self, pandas_object):  # noqa: E999 
             ... 
     For consistency with pandas methods, you should raise an ``AttributeError`` 
     if the data passed to your accessor has an incorrect dtype. 
     >>> pd.Series(['a', 'b']).dt 
     Traceback (most recent call last): 
     ... 
     AttributeError: Can only use .dt accessor with datetimelike values 
     Examples 
     -------- 
     In your library code:: 
         import pandas as pd 
         @pd.api.extensions.register_dataframe_accessor("geo") 
         class GeoAccessor: 
             def __init__(self, pandas_obj): 
                 self._obj = pandas_obj 
             @property 
             def center(self): 
                 # return the geographic center point of this DataFrame 
                 lat = self._obj.latitude 
                 lon = self._obj.longitude 
                 return (float(lon.mean()), float(lat.mean())) 
             def plot(self): 
                 # plot this array's data on a map, e.g., using Cartopy 
                 pass 
     Back in an interactive IPython session: 
         .. code-block:: ipython 
             In [1]: ds = pd.DataFrame({{"longitude": np.linspace(0, 10), 
                ...:                    "latitude": np.linspace(0, 20)}}) 
             In [2]: ds.geo.center 
             Out[2]: (5.0, 10.0) 
             In [3]: ds.geo.plot()  # plots data on a map 
     .. code-block:: python 
  
         def __init__(self, pandas_object):  # noqa: E999 
             ... 
  
     For consistency with pandas methods, you should raise an ``AttributeError`` 
     if the data passed to your accessor has an incorrect dtype. 
  
     >>> pd.Series(['a', 'b']).dt 
     Traceback (most recent call last): 
     ... 
     AttributeError: Can only use .dt accessor with datetimelike values 
  
     Examples 
     -------- 
     In your library code:: 
  
         import pandas as pd 
  
         @pd.api.extensions.register_dataframe_accessor("geo") 
         class GeoAccessor: 
             def __init__(self, pandas_obj): 
                 self._obj = pandas_obj 
  
             @property 
             def center(self): 
                 # return the geographic center point of this DataFrame 
                 lat = self._obj.latitude 
                 lon = self._obj.longitude 
                 return (float(lon.mean()), float(lat.mean())) 
  
             def plot(self): 
                 # plot this array's data on a map, e.g., using Cartopy 
                 pass 
  
     Back in an interactive IPython session: 
  
         .. code-block:: ipython 
  
             In [1]: ds = pd.DataFrame({{"longitude": np.linspace(0, 10), 
                ...:                    "latitude": np.linspace(0, 20)}}) 
             In [2]: ds.geo.center 
             Out[2]: (5.0, 10.0) 
             In [3]: ds.geo.plot()  # plots data on a map 
 
         Parameters
         ----------

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -79,16 +79,51 @@ class Grouper:
     --------
     Syntactic sugar for ``df.groupby('A')``
 
-    >>> df.groupby(Grouper(key='A'))
-
-    Specify a resample operation on the column 'date'
-
-    >>> df.groupby(Grouper(key='date', freq='60s'))
-
-    Specify a resample operation on the level 'date' on the columns axis
-    with a frequency of 60s
-
-    >>> df.groupby(Grouper(level='date', freq='60s', axis=1))
+    >>> df = pd.DataFrame(
+    ...     {
+    ...         "Animal": ["Falcon", "Parrot", "Falcon", "Falcon", "Parrot"],
+    ...         "Speed": [100, 5, 200, 300, 15],
+    ...     }
+    ... )
+    >>> df
+       Animal  Speed
+    0  Falcon    100
+    1  Parrot      5
+    2  Falcon    200
+    3  Falcon    300
+    4  Parrot     15
+    >>> df.groupby(pd.Grouper(key="Animal")).mean()
+            Speed
+    Animal
+    Falcon    200
+    Parrot     10
+
+    Specify a resample operation on the column 'Publish date'
+
+    >>> df = pd.DataFrame(
+    ...    {
+    ...        "Publish date": [
+    ...             pd.Timestamp("2000-01-02"),
+    ...             pd.Timestamp("2000-01-02"),
+    ...             pd.Timestamp("2000-01-09"),
+    ...             pd.Timestamp("2000-01-16")
+    ...         ],
+    ...         "ID": [0, 1, 2, 3],
+    ...         "Price": [10, 20, 30, 40]
+    ...     }
+    ... )
+    >>> df
+      Publish date  ID  Price
+    0   2000-01-02   0     10
+    1   2000-01-02   1     20
+    2   2000-01-09   2     30
+    3   2000-01-16   3     40
+    >>> df.groupby(pd.Grouper(key="Publish date", freq="1W")).mean()
+                   ID  Price
+    Publish date
+    2000-01-02    0.5   15.0
+    2000-01-09    2.0   30.0
+    2000-01-16    3.0   40.0
     """
 
     _attributes: Tuple[str, ...] = ("key", "level", "freq", "axis", "sort")