From c13af19cfc9562fc7b4e0248e7c6c49e993ac57a Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Thu, 9 Jan 2020 23:52:55 +0800
Subject: [PATCH 01/44] ENH: Added DataFrame.differences and Series.differences
 (GH30429)

---
 pandas/core/frame.py                          | 141 ++++++++++++++++++
 pandas/core/series.py                         | 120 +++++++++++++++
 .../tests/frame/methods/test_differences.py   | 130 ++++++++++++++++
 .../tests/series/methods/test_differences.py  |  85 +++++++++++
 4 files changed, 476 insertions(+)
 create mode 100644 pandas/tests/frame/methods/test_differences.py
 create mode 100644 pandas/tests/series/methods/test_differences.py

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 538d0feade96f..c8bbab101bea7 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5304,6 +5304,147 @@ def _construct_result(self, result) -> "DataFrame":
         out.columns = self.columns
         return out
 
+    def differences(self, other, axis=1, keep_indices=False, keep_values=False):
+        """
+        Compare to another DataFrame and show the differences.
+
+        The axis on which to stack results and how much information to
+        preserve can be customized.
+
+        Note that NaNs are considered not different from other NaNs.
+
+        Parameters
+        ----------
+        other : DataFrame
+            Object to compare with.
+
+        axis : {0 or 'index', 1 or 'columns'}, default 1
+            Determine how the differences are stacked.
+            * 0, or 'index' : Stack differences on neighbouring rows.
+            * 1, or 'columns' : Stack differences on neighbouring columns.
+
+        keep_indices: bool, default False
+            Whether to keep the rows and columns that are equal, or drop them.
+
+        keep_values: bool, default False
+            Whether to keep the values that are equal, or show as NaNs.
+
+        Returns
+        -------
+        DataFrame
+            DataFrame that shows the differences stacked side by side.
+
+        See Also
+        --------
+        Series.differences: Show differences.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame(
+        ...     {
+        ...         "col1": ["a", "a", "b", "b", "a"],
+        ...         "col2": [1.0, 2.0, 3.0, np.nan, 5.0],
+        ...         "col3": [1.0, 2.0, 3.0, 4.0, 5.0]
+        ...     },
+        ...     columns=["col1", "col2", "col3"],
+        ... )
+        >>> df
+          col1  col2  col3
+        0    a   1.0   1.0
+        1    a   2.0   2.0
+        2    b   3.0   3.0
+        3    b   NaN   4.0
+        4    a   5.0   5.0
+
+        >>> df2 = df.copy()
+        >>> df2.loc[0, 'col1'] = 'c'
+        >>> df2.loc[2, 'col3'] = 4.0
+        >>> df2
+          col1  col2  col3
+        0    c   1.0   1.0
+        1    a   2.0   2.0
+        2    b   3.0   4.0
+        3    b   NaN   4.0
+        4    a   5.0   5.0
+
+        Stack the differences on columns
+
+        >>> df.differences(df2)
+          col1       col3
+          self other self other
+        0    a     c  NaN   NaN
+        2  NaN   NaN  3.0   4.0
+
+        Stack the differences on rows
+
+        >>> df.differences(df2, axis=0)
+                col1  col3
+        0 self     a   NaN
+          other    c   NaN
+        2 self   NaN   3.0
+          other  NaN   4.0
+
+        Keep all the original indices (rows and columns)
+
+        >>> df.differences(df2, keep_indices=True)
+          col1       col2       col3
+          self other self other self other
+        0    a     c  NaN   NaN  NaN   NaN
+        1  NaN   NaN  NaN   NaN  NaN   NaN
+        2  NaN   NaN  NaN   NaN  3.0   4.0
+        3  NaN   NaN  NaN   NaN  NaN   NaN
+        4  NaN   NaN  NaN   NaN  NaN   NaN
+
+        Keep all original indices and data
+
+        >>> df.differences(df2, keep_indices=True, keep_values=True)
+          col1       col2       col3
+          self other self other self other
+        0    a     c  1.0   1.0  1.0   1.0
+        1    a     a  2.0   2.0  2.0   2.0
+        2    b     b  3.0   3.0  3.0   4.0
+        3    b     b  NaN   NaN  4.0   4.0
+        4    a     a  5.0   5.0  5.0   5.0
+        """
+        from pandas.core.reshape.concat import concat
+
+        mask = ~((self == other) | (self.isna() & other.isna()))
+        keys = ["self", "other"]
+
+        if not keep_values:
+            self = self.where(mask)
+            other = other.where(mask)
+
+        if not keep_indices:
+            cmask = mask.any()
+            rmask = mask.any(axis=1)
+            self = self.loc[rmask, cmask]
+            other = other.loc[rmask, cmask]
+
+        axis = self._get_axis_number(axis)
+        diff = concat([self, other], axis=axis, keys=keys)
+
+        ax = diff._get_axis(axis)
+        ax_names = np.array(ax.names)
+
+        # set index names to positions to avoid confusion
+        ax.names = np.arange(len(ax_names))
+
+        # bring self-other to inner level
+        order = list(range(1, ax.nlevels)) + [0]
+        diff = diff.reorder_levels(order, axis=axis)
+
+        # restore the index names in order
+        diff._get_axis(axis=axis).names = ax_names[order]
+
+        # reorder axis to keep things organized
+        indices = (
+            np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten()
+        )
+        diff = diff.take(indices, axis=axis)
+
+        return diff
+
     def combine(
         self, other: "DataFrame", func, fill_value=None, overwrite=True
     ) -> "DataFrame":
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 446654374f37c..96a015253080d 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2566,6 +2566,126 @@ def _binop(self, other, func, level=None, fill_value=None):
         ret = ops._construct_result(self, result, new_index, name)
         return ret
 
+    def differences(self, other, axis=1, keep_indices=False, keep_values=False):
+        """
+        Compare to another Series and show the differences.
+
+        The axis on which to stack results and how much information to
+        preserve can be customized.
+
+        Note that NaNs are considered not different from other NaNs.
+
+        Parameters
+        ----------
+        other : Series
+            Object to compare with.
+
+        axis : {0 or 'index', 1 or 'columns'}, default 1
+            Determine how the differences are stacked.
+            * 0, or 'index' : Stack differences on neighbouring indices.
+            * 1, or 'columns' : Stack differences on neighbouring columns.
+
+        keep_indices: bool, default False
+            Whether to keep the indices that are equal, or drop them.
+
+        keep_values: bool, default False
+            Whether to keep the values that are equal, or show as NaNs.
+
+        Returns
+        -------
+        Series or DataFrame
+            If axis is 0 or 'index' the result will be a Series.
+            If axis is 1 or 'columns' the result will be a DataFrame.
+
+        See Also
+        --------
+        DataFrame.differences: Show differences.
+
+        Examples
+        --------
+        >>> s1 = pd.Series(["a", "b", "c", "d", "e"])
+        >>> s2 = pd.Series(["a", "a", "c", "b", "e"])
+
+        Stack the differences on columns
+
+        >>> s1.differences(s2)
+          self other
+        1    b     a
+        3    d     b
+
+        Stack the differences on indices
+
+        >>> s1.differences(s2, axis=0)
+        1  self     b
+           other    a
+        3  self     d
+           other    b
+        dtype: object
+
+        Keep all the original indices
+
+        >>> s1.differences(s2, keep_indices=True)
+          self other
+        0  NaN   NaN
+        1    b     a
+        2  NaN   NaN
+        3    d     b
+        4  NaN   NaN
+
+        Keep all original indices and data
+
+        >>> s1.differences(s2, keep_indices=True, keep_values=True)
+          self other
+        0    a     a
+        1    b     a
+        2    c     c
+        3    d     b
+        4    e     e
+        """
+        from pandas.core.reshape.concat import concat
+
+        mask = ~((self == other) | (self.isna() & other.isna()))
+        keys = ["self", "other"]
+
+        if not keep_values:
+            self = self.where(mask)
+            other = other.where(mask)
+
+        if not keep_indices:
+            self = self[mask]
+            other = other[mask]
+
+        if axis in (1, "columns"):
+            axis = 1
+        else:
+            axis = self._get_axis_number(axis)
+
+        diff = concat([self, other], axis=axis, keys=keys)
+
+        if axis == 1:
+            return diff
+
+        ax = diff._get_axis(axis)
+        ax_names = np.array(ax.names)
+
+        # set index names to positions to avoid confusion
+        ax.names = np.arange(len(ax_names))
+
+        # bring self-other to inner level
+        order = list(range(1, ax.nlevels)) + [0]
+        diff = diff.reorder_levels(order)
+
+        # restore the index names in order
+        diff._get_axis(axis=axis).names = ax_names[order]
+
+        # reorder axis to keep things organized
+        indices = (
+            np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten()
+        )
+        diff = diff.take(indices, axis=axis)
+
+        return diff
+
     def combine(self, other, func, fill_value=None):
         """
         Combine the Series with a Series or scalar according to `func`.
diff --git a/pandas/tests/frame/methods/test_differences.py b/pandas/tests/frame/methods/test_differences.py
new file mode 100644
index 0000000000000..058888f8551e2
--- /dev/null
+++ b/pandas/tests/frame/methods/test_differences.py
@@ -0,0 +1,130 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize("axis", [0, 1, "index", "columns"])
+def test_differences_axis(axis):
+    df = pd.DataFrame(
+        {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
+        columns=["col1", "col2", "col3"],
+    )
+    df2 = df.copy()
+    df2.loc[0, "col1"] = "c"
+    df2.loc[2, "col3"] = 4.0
+
+    result = df.differences(df2, axis=axis)
+
+    if axis in (1, "columns"):
+        indices = pd.Index([0, 2])
+        columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]])
+        expected = pd.DataFrame(
+            [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, 4.0]],
+            index=indices,
+            columns=columns,
+        )
+    else:
+        indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
+        columns = pd.Index(["col1", "col3"])
+        expected = pd.DataFrame(
+            [["a", np.nan], ["c", np.nan], [np.nan, 3.0], [np.nan, 4.0]],
+            index=indices,
+            columns=columns,
+        )
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "keep_indices, keep_values",
+    [
+        (True, False),
+        (False, True),
+        (True, True),
+        # False, False case is already covered in test_differences_axis
+    ],
+)
+def test_differences_various_formats(keep_indices, keep_values):
+    df = pd.DataFrame(
+        {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
+        columns=["col1", "col2", "col3"],
+    )
+    df2 = df.copy()
+    df2.loc[0, "col1"] = "c"
+    df2.loc[2, "col3"] = 4.0
+
+    result = df.differences(df2, keep_indices=keep_indices, keep_values=keep_values)
+
+    if keep_indices:
+        indices = pd.Index([0, 1, 2])
+        columns = pd.MultiIndex.from_product(
+            [["col1", "col2", "col3"], ["self", "other"]]
+        )
+        if keep_values:
+            expected = pd.DataFrame(
+                [
+                    ["a", "c", 1.0, 1.0, 1.0, 1.0],
+                    ["b", "b", 2.0, 2.0, 2.0, 2.0],
+                    ["c", "c", np.nan, np.nan, 3.0, 4.0],
+                ],
+                index=indices,
+                columns=columns,
+            )
+        else:
+            expected = pd.DataFrame(
+                [
+                    ["a", "c", np.nan, np.nan, np.nan, np.nan],
+                    [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
+                    [np.nan, np.nan, np.nan, np.nan, 3.0, 4.0],
+                ],
+                index=indices,
+                columns=columns,
+            )
+    else:
+        indices = pd.Index([0, 2])
+        columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]])
+        expected = pd.DataFrame(
+            [["a", "c", 1.0, 1.0], ["c", "c", 3.0, 4.0]], index=indices, columns=columns
+        )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_differences_with_equal_nulls():
+    # We want to make sure two NaNs are considered the same
+    # and dropped where applicable
+    df = pd.DataFrame(
+        {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
+        columns=["col1", "col2", "col3"],
+    )
+    df2 = df.copy()
+    df2.loc[0, "col1"] = "c"
+
+    result = df.differences(df2)
+    indices = pd.Index([0])
+    columns = pd.MultiIndex.from_product([["col1"], ["self", "other"]])
+    expected = pd.DataFrame([["a", "c"]], index=indices, columns=columns)
+    tm.assert_frame_equal(result, expected)
+
+
+def test_differences_with_non_equal_nulls():
+    # We want to make sure the relevant NaNs do not get dropped
+    # even if the entire row or column are NaNs
+    df = pd.DataFrame(
+        {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
+        columns=["col1", "col2", "col3"],
+    )
+    df2 = df.copy()
+    df2.loc[0, "col1"] = "c"
+    df2.loc[2, "col3"] = np.nan
+
+    result = df.differences(df2)
+
+    indices = pd.Index([0, 2])
+    columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]])
+    expected = pd.DataFrame(
+        [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, np.nan]],
+        index=indices,
+        columns=columns,
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_differences.py b/pandas/tests/series/methods/test_differences.py
new file mode 100644
index 0000000000000..58780b129905c
--- /dev/null
+++ b/pandas/tests/series/methods/test_differences.py
@@ -0,0 +1,85 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+
+@pytest.mark.parametrize("axis", [0, 1, "index", "columns"])
+def test_differences_axis(axis):
+    s1 = pd.Series(["a", "b", "c"])
+    s2 = pd.Series(["x", "b", "z"])
+
+    result = s1.differences(s2, axis=axis)
+
+    if axis in (1, "columns"):
+        indices = pd.Index([0, 2])
+        columns = pd.Index(["self", "other"])
+        expected = pd.DataFrame(
+            [["a", "x"], ["c", "z"]], index=indices, columns=columns
+        )
+        tm.assert_frame_equal(result, expected)
+    else:
+        indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
+        expected = pd.Series(["a", "x", "c", "z"], index=indices)
+        tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "keep_indices, keep_values",
+    [
+        (True, False),
+        (False, True),
+        (True, True),
+        # False, False case is already covered in test_differences_axis
+    ],
+)
+def test_differences_various_formats(keep_indices, keep_values):
+    s1 = pd.Series(["a", "b", "c"])
+    s2 = pd.Series(["x", "b", "z"])
+
+    result = s1.differences(s2, keep_indices=keep_indices, keep_values=keep_values)
+
+    if keep_indices:
+        indices = pd.Index([0, 1, 2])
+        columns = pd.Index(["self", "other"])
+        if keep_values:
+            expected = pd.DataFrame(
+                [["a", "x"], ["b", "b"], ["c", "z"]], index=indices, columns=columns
+            )
+        else:
+            expected = pd.DataFrame(
+                [["a", "x"], [np.nan, np.nan], ["c", "z"]],
+                index=indices,
+                columns=columns,
+            )
+    else:
+        indices = pd.Index([0, 2])
+        columns = pd.Index(["self", "other"])
+        expected = pd.DataFrame(
+            [["a", "x"], ["c", "z"]], index=indices, columns=columns
+        )
+    tm.assert_frame_equal(result, expected)
+
+
+def test_differences_with_equal_nulls():
+    # We want to make sure two NaNs are considered the same
+    # and dropped where applicable
+    s1 = pd.Series(["a", "b", np.nan])
+    s2 = pd.Series(["x", "b", np.nan])
+
+    result = s1.differences(s2)
+    expected = pd.DataFrame([["a", "x"]], columns=["self", "other"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_differences_with_non_equal_nulls():
+    # We want to make sure the relevant NaNs do not get dropped
+    s1 = pd.Series(["a", "b", "c"])
+    s2 = pd.Series(["x", "b", np.nan])
+
+    result = s1.differences(s2, axis=0)
+
+    indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
+    expected = pd.Series(["a", "x", "c", np.nan], index=indices)
+    tm.assert_series_equal(result, expected)

From 8f5d0fb4370785c688fff5144c5d6697ecbc639c Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Fri, 10 Jan 2020 00:42:26 +0800
Subject: [PATCH 02/44] CLN: reformatted docstring (GH30429)

---
 pandas/core/frame.py  | 4 ++--
 pandas/core/series.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index c8bbab101bea7..1cf6a38f48363 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5323,10 +5323,10 @@ def differences(self, other, axis=1, keep_indices=False, keep_values=False):
             * 0, or 'index' : Stack differences on neighbouring rows.
             * 1, or 'columns' : Stack differences on neighbouring columns.
 
-        keep_indices: bool, default False
+        keep_indices : bool, default False
             Whether to keep the rows and columns that are equal, or drop them.
 
-        keep_values: bool, default False
+        keep_values : bool, default False
             Whether to keep the values that are equal, or show as NaNs.
 
         Returns
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 96a015253080d..9f74ffec27197 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2585,10 +2585,10 @@ def differences(self, other, axis=1, keep_indices=False, keep_values=False):
             * 0, or 'index' : Stack differences on neighbouring indices.
             * 1, or 'columns' : Stack differences on neighbouring columns.
 
-        keep_indices: bool, default False
+        keep_indices : bool, default False
             Whether to keep the indices that are equal, or drop them.
 
-        keep_values: bool, default False
+        keep_values : bool, default False
             Whether to keep the values that are equal, or show as NaNs.
 
         Returns

From c5b793a2de275cfbc385d2e8fab17c25e0cd4a25 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Fri, 10 Jan 2020 22:16:02 +0800
Subject: [PATCH 03/44] ENH: Extracted differences() from DataFrame and Series
 into NDFrame

---
 pandas/core/frame.py   | 39 ++---------------------------
 pandas/core/generic.py | 56 ++++++++++++++++++++++++++++++++++++++++++
 pandas/core/series.py  | 44 ++-------------------------------
 3 files changed, 60 insertions(+), 79 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1cf6a38f48363..5edd11a4ba889 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5406,44 +5406,9 @@ def differences(self, other, axis=1, keep_indices=False, keep_values=False):
         3    b     b  NaN   NaN  4.0   4.0
         4    a     a  5.0   5.0  5.0   5.0
         """
-        from pandas.core.reshape.concat import concat
-
-        mask = ~((self == other) | (self.isna() & other.isna()))
-        keys = ["self", "other"]
-
-        if not keep_values:
-            self = self.where(mask)
-            other = other.where(mask)
-
-        if not keep_indices:
-            cmask = mask.any()
-            rmask = mask.any(axis=1)
-            self = self.loc[rmask, cmask]
-            other = other.loc[rmask, cmask]
-
-        axis = self._get_axis_number(axis)
-        diff = concat([self, other], axis=axis, keys=keys)
-
-        ax = diff._get_axis(axis)
-        ax_names = np.array(ax.names)
-
-        # set index names to positions to avoid confusion
-        ax.names = np.arange(len(ax_names))
-
-        # bring self-other to inner level
-        order = list(range(1, ax.nlevels)) + [0]
-        diff = diff.reorder_levels(order, axis=axis)
-
-        # restore the index names in order
-        diff._get_axis(axis=axis).names = ax_names[order]
-
-        # reorder axis to keep things organized
-        indices = (
-            np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten()
+        return super().differences(
+            other=other, axis=axis, keep_indices=keep_indices, keep_values=keep_values
         )
-        diff = diff.take(indices, axis=axis)
-
-        return diff
 
     def combine(
         self, other: "DataFrame", func, fill_value=None, overwrite=True
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 22655bf9889c7..6bb965655ee6f 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8189,6 +8189,62 @@ def ranker(data):
 
         return ranker(data)
 
+    def differences(self, other, axis=1, keep_indices=False, keep_values=False):
+        from pandas.core.reshape.concat import concat
+
+        mask = ~((self == other) | (self.isna() & other.isna()))
+        keys = ["self", "other"]
+
+        if not keep_values:
+            self = self.where(mask)
+            other = other.where(mask)
+
+        if not keep_indices:
+            if isinstance(self, ABCDataFrame):
+                cmask = mask.any()
+                rmask = mask.any(axis=1)
+                self = self.loc[rmask, cmask]
+                other = other.loc[rmask, cmask]
+            else:
+                self = self[mask]
+                other = other[mask]
+
+        if axis in (1, "columns"):  # This is needed for Series
+            axis = 1
+        else:
+            axis = self._get_axis_number(axis)
+
+        diff = concat([self, other], axis=axis, keys=keys)
+
+        if axis >= self.ndim:
+            # No need to reorganize data if stacking on new axis
+            # This currently applies for stacking two Series on columns
+            return diff
+
+        ax = diff._get_axis(axis)
+        ax_names = np.array(ax.names)
+
+        # set index names to positions to avoid confusion
+        ax.names = np.arange(len(ax_names))
+
+        # bring self-other to inner level
+        order = list(range(1, ax.nlevels)) + [0]
+        if isinstance(diff, ABCDataFrame):
+            diff = diff.reorder_levels(order, axis=axis)
+        else:
+            diff = diff.reorder_levels(order)
+
+        # restore the index names in order
+        diff._get_axis(axis=axis).names = ax_names[order]
+
+        # reorder axis to keep things organized
+        indices = (
+            np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten()
+        )
+        diff = diff.take(indices, axis=axis)
+
+        return diff
+
     _shared_docs[
         "align"
     ] = """
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 9f74ffec27197..791c2ab4839de 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2642,49 +2642,9 @@ def differences(self, other, axis=1, keep_indices=False, keep_values=False):
         3    d     b
         4    e     e
         """
-        from pandas.core.reshape.concat import concat
-
-        mask = ~((self == other) | (self.isna() & other.isna()))
-        keys = ["self", "other"]
-
-        if not keep_values:
-            self = self.where(mask)
-            other = other.where(mask)
-
-        if not keep_indices:
-            self = self[mask]
-            other = other[mask]
-
-        if axis in (1, "columns"):
-            axis = 1
-        else:
-            axis = self._get_axis_number(axis)
-
-        diff = concat([self, other], axis=axis, keys=keys)
-
-        if axis == 1:
-            return diff
-
-        ax = diff._get_axis(axis)
-        ax_names = np.array(ax.names)
-
-        # set index names to positions to avoid confusion
-        ax.names = np.arange(len(ax_names))
-
-        # bring self-other to inner level
-        order = list(range(1, ax.nlevels)) + [0]
-        diff = diff.reorder_levels(order)
-
-        # restore the index names in order
-        diff._get_axis(axis=axis).names = ax_names[order]
-
-        # reorder axis to keep things organized
-        indices = (
-            np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten()
+        return super().differences(
+            other=other, axis=axis, keep_indices=keep_indices, keep_values=keep_values
         )
-        diff = diff.take(indices, axis=axis)
-
-        return diff
 
     def combine(self, other, func, fill_value=None):
         """

From d22e21a11752c97a4cd797867b7f3ca8f7f073e9 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sat, 18 Jan 2020 12:09:15 +0800
Subject: [PATCH 04/44] ENH: organized docstring using _shared_doc and reduced
 duplicates (GH30429)

---
 pandas/core/frame.py   | 180 +++++++++++++++++++----------------------
 pandas/core/generic.py |  32 +++++++-
 pandas/core/series.py  | 130 +++++++++++++----------------
 3 files changed, 168 insertions(+), 174 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 7f66ae8e75638..936b089b7ea82 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5365,108 +5365,90 @@ def _construct_result(self, result) -> "DataFrame":
         out.columns = self.columns
         return out
 
-    def differences(self, other, axis=1, keep_indices=False, keep_values=False):
+    @Appender(
         """
-        Compare to another DataFrame and show the differences.
-
-        The axis on which to stack results and how much information to
-        preserve can be customized.
-
-        Note that NaNs are considered not different from other NaNs.
-
-        Parameters
-        ----------
-        other : DataFrame
-            Object to compare with.
-
-        axis : {0 or 'index', 1 or 'columns'}, default 1
-            Determine how the differences are stacked.
-            * 0, or 'index' : Stack differences on neighbouring rows.
-            * 1, or 'columns' : Stack differences on neighbouring columns.
-
-        keep_indices : bool, default False
-            Whether to keep the rows and columns that are equal, or drop them.
-
-        keep_values : bool, default False
-            Whether to keep the values that are equal, or show as NaNs.
-
-        Returns
-        -------
-        DataFrame
-            DataFrame that shows the differences stacked side by side.
+Returns
+-------
+DataFrame
+    DataFrame that shows the differences stacked side by side.
 
-        See Also
-        --------
-        Series.differences: Show differences.
+See Also
+--------
+Series.differences: Show differences.
 
-        Examples
-        --------
-        >>> df = pd.DataFrame(
-        ...     {
-        ...         "col1": ["a", "a", "b", "b", "a"],
-        ...         "col2": [1.0, 2.0, 3.0, np.nan, 5.0],
-        ...         "col3": [1.0, 2.0, 3.0, 4.0, 5.0]
-        ...     },
-        ...     columns=["col1", "col2", "col3"],
-        ... )
-        >>> df
-          col1  col2  col3
-        0    a   1.0   1.0
-        1    a   2.0   2.0
-        2    b   3.0   3.0
-        3    b   NaN   4.0
-        4    a   5.0   5.0
-
-        >>> df2 = df.copy()
-        >>> df2.loc[0, 'col1'] = 'c'
-        >>> df2.loc[2, 'col3'] = 4.0
-        >>> df2
-          col1  col2  col3
-        0    c   1.0   1.0
-        1    a   2.0   2.0
-        2    b   3.0   4.0
-        3    b   NaN   4.0
-        4    a   5.0   5.0
-
-        Stack the differences on columns
-
-        >>> df.differences(df2)
-          col1       col3
-          self other self other
-        0    a     c  NaN   NaN
-        2  NaN   NaN  3.0   4.0
-
-        Stack the differences on rows
-
-        >>> df.differences(df2, axis=0)
-                col1  col3
-        0 self     a   NaN
-          other    c   NaN
-        2 self   NaN   3.0
-          other  NaN   4.0
-
-        Keep all the original indices (rows and columns)
-
-        >>> df.differences(df2, keep_indices=True)
-          col1       col2       col3
-          self other self other self other
-        0    a     c  NaN   NaN  NaN   NaN
-        1  NaN   NaN  NaN   NaN  NaN   NaN
-        2  NaN   NaN  NaN   NaN  3.0   4.0
-        3  NaN   NaN  NaN   NaN  NaN   NaN
-        4  NaN   NaN  NaN   NaN  NaN   NaN
-
-        Keep all original indices and data
-
-        >>> df.differences(df2, keep_indices=True, keep_values=True)
-          col1       col2       col3
-          self other self other self other
-        0    a     c  1.0   1.0  1.0   1.0
-        1    a     a  2.0   2.0  2.0   2.0
-        2    b     b  3.0   3.0  3.0   4.0
-        3    b     b  NaN   NaN  4.0   4.0
-        4    a     a  5.0   5.0  5.0   5.0
-        """
+Examples
+--------
+>>> df = pd.DataFrame(
+...     {
+...         "col1": ["a", "a", "b", "b", "a"],
+...         "col2": [1.0, 2.0, 3.0, np.nan, 5.0],
+...         "col3": [1.0, 2.0, 3.0, 4.0, 5.0]
+...     },
+...     columns=["col1", "col2", "col3"],
+... )
+>>> df
+  col1  col2  col3
+0    a   1.0   1.0
+1    a   2.0   2.0
+2    b   3.0   3.0
+3    b   NaN   4.0
+4    a   5.0   5.0
+
+>>> df2 = df.copy()
+>>> df2.loc[0, 'col1'] = 'c'
+>>> df2.loc[2, 'col3'] = 4.0
+>>> df2
+  col1  col2  col3
+0    c   1.0   1.0
+1    a   2.0   2.0
+2    b   3.0   4.0
+3    b   NaN   4.0
+4    a   5.0   5.0
+
+Stack the differences on columns
+
+>>> df.differences(df2)
+  col1       col3
+  self other self other
+0    a     c  NaN   NaN
+2  NaN   NaN  3.0   4.0
+
+Stack the differences on rows
+
+>>> df.differences(df2, axis=0)
+        col1  col3
+0 self     a   NaN
+  other    c   NaN
+2 self   NaN   3.0
+  other  NaN   4.0
+
+Keep all the original indices (rows and columns)
+
+>>> df.differences(df2, keep_indices=True)
+  col1       col2       col3
+  self other self other self other
+0    a     c  NaN   NaN  NaN   NaN
+1  NaN   NaN  NaN   NaN  NaN   NaN
+2  NaN   NaN  NaN   NaN  3.0   4.0
+3  NaN   NaN  NaN   NaN  NaN   NaN
+4  NaN   NaN  NaN   NaN  NaN   NaN
+
+Keep all original indices and data
+
+>>> df.differences(df2, keep_indices=True, keep_values=True)
+  col1       col2       col3
+  self other self other self other
+0    a     c  1.0   1.0  1.0   1.0
+1    a     a  2.0   2.0  2.0   2.0
+2    b     b  3.0   3.0  3.0   4.0
+3    b     b  NaN   NaN  4.0   4.0
+4    a     a  5.0   5.0  5.0   5.0
+"""
+    )
+    @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
+    def differences(
+        self, other, axis=1, keep_indices=False, keep_values=False
+    ) -> "DataFrame":
         return super().differences(
             other=other, axis=axis, keep_indices=keep_indices, keep_values=keep_values
         )
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 58e8b813163c6..537f4fd8d3c9f 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8103,7 +8103,37 @@ def ranker(data):
 
         return ranker(data)
 
-    def differences(self, other, axis=1, keep_indices=False, keep_values=False):
+    _shared_docs[
+        "differences"
+    ] = """
+        Compare to another %(klass)s and show the differences.
+
+        The axis on which to stack results and how much information to
+        preserve can be customized.
+
+        Note that NaNs are considered not different from other NaNs.
+
+        Parameters
+        ----------
+        other : %(klass)s
+            Object to compare with.
+
+        axis : {0 or 'index', 1 or 'columns'}, default 1
+            Determine how the differences are stacked.
+            * 0, or 'index' : Stack differences on neighbouring rows.
+            * 1, or 'columns' : Stack differences on neighbouring columns.
+
+        keep_indices : bool, default False
+            Whether to keep the rows and columns that are equal, or drop them.
+
+        keep_values : bool, default False
+            Whether to keep the values that are equal, or show as NaNs.
+        """
+
+    @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
+    def differences(
+        self, other, axis=1, keep_indices=False, keep_values=False
+    ) -> FrameOrSeries:
         from pandas.core.reshape.concat import concat
 
         mask = ~((self == other) | (self.isna() & other.isna()))
diff --git a/pandas/core/series.py b/pandas/core/series.py
index d5904103fc758..08caf5b0c62af 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -23,7 +23,7 @@
 from pandas._config import get_option
 
 from pandas._libs import index as libindex, lib, reshape, tslibs
-from pandas._typing import Label
+from pandas._typing import Label, FrameOrSeries
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import Appender, Substitution
 from pandas.util._validators import validate_bool_kwarg, validate_percentile
@@ -2555,82 +2555,64 @@ def _binop(self, other, func, level=None, fill_value=None):
         ret = ops._construct_result(self, result, new_index, name)
         return ret
 
-    def differences(self, other, axis=1, keep_indices=False, keep_values=False):
+    @Appender(
         """
-        Compare to another Series and show the differences.
-
-        The axis on which to stack results and how much information to
-        preserve can be customized.
-
-        Note that NaNs are considered not different from other NaNs.
-
-        Parameters
-        ----------
-        other : Series
-            Object to compare with.
-
-        axis : {0 or 'index', 1 or 'columns'}, default 1
-            Determine how the differences are stacked.
-            * 0, or 'index' : Stack differences on neighbouring indices.
-            * 1, or 'columns' : Stack differences on neighbouring columns.
-
-        keep_indices : bool, default False
-            Whether to keep the indices that are equal, or drop them.
-
-        keep_values : bool, default False
-            Whether to keep the values that are equal, or show as NaNs.
-
-        Returns
-        -------
-        Series or DataFrame
-            If axis is 0 or 'index' the result will be a Series.
-            If axis is 1 or 'columns' the result will be a DataFrame.
-
-        See Also
-        --------
-        DataFrame.differences: Show differences.
+Returns
+-------
+Series or DataFrame
+    If axis is 0 or 'index' the result will be a Series.
+    If axis is 1 or 'columns' the result will be a DataFrame.
 
-        Examples
-        --------
-        >>> s1 = pd.Series(["a", "b", "c", "d", "e"])
-        >>> s2 = pd.Series(["a", "a", "c", "b", "e"])
-
-        Stack the differences on columns
-
-        >>> s1.differences(s2)
-          self other
-        1    b     a
-        3    d     b
-
-        Stack the differences on indices
-
-        >>> s1.differences(s2, axis=0)
-        1  self     b
-           other    a
-        3  self     d
-           other    b
-        dtype: object
-
-        Keep all the original indices
-
-        >>> s1.differences(s2, keep_indices=True)
-          self other
-        0  NaN   NaN
-        1    b     a
-        2  NaN   NaN
-        3    d     b
-        4  NaN   NaN
-
-        Keep all original indices and data
+See Also
+--------
+DataFrame.differences: Show differences.
 
-        >>> s1.differences(s2, keep_indices=True, keep_values=True)
-          self other
-        0    a     a
-        1    b     a
-        2    c     c
-        3    d     b
-        4    e     e
-        """
+Examples
+--------
+>>> s1 = pd.Series(["a", "b", "c", "d", "e"])
+>>> s2 = pd.Series(["a", "a", "c", "b", "e"])
+
+Stack the differences on columns
+
+>>> s1.differences(s2)
+  self other
+1    b     a
+3    d     b
+
+Stack the differences on indices
+
+>>> s1.differences(s2, axis=0)
+1  self     b
+   other    a
+3  self     d
+   other    b
+dtype: object
+
+Keep all the original indices
+
+>>> s1.differences(s2, keep_indices=True)
+  self other
+0  NaN   NaN
+1    b     a
+2  NaN   NaN
+3    d     b
+4  NaN   NaN
+
+Keep all original indices and data
+
+>>> s1.differences(s2, keep_indices=True, keep_values=True)
+  self other
+0    a     a
+1    b     a
+2    c     c
+3    d     b
+4    e     e
+"""
+    )
+    @Appender(generic._shared_docs["differences"] % _shared_doc_kwargs)
+    def differences(
+        self, other, axis=1, keep_indices=False, keep_values=False
+    ) -> FrameOrSeries:
         return super().differences(
             other=other, axis=axis, keep_indices=keep_indices, keep_values=keep_values
         )

From 83f31df165040a948706dc21518f4f4c30677794 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sat, 18 Jan 2020 12:12:32 +0800
Subject: [PATCH 05/44] ENH: added argument type indication (GH30429)

---
 pandas/core/frame.py  | 2 +-
 pandas/core/series.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 936b089b7ea82..31f601c0458f4 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5447,7 +5447,7 @@ def _construct_result(self, result) -> "DataFrame":
     )
     @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
     def differences(
-        self, other, axis=1, keep_indices=False, keep_values=False
+        self, other: "DataFrame", axis=1, keep_indices=False, keep_values=False
     ) -> "DataFrame":
         return super().differences(
             other=other, axis=axis, keep_indices=keep_indices, keep_values=keep_values
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 08caf5b0c62af..c1afa9b6d01f1 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2611,7 +2611,7 @@ def _binop(self, other, func, level=None, fill_value=None):
     )
     @Appender(generic._shared_docs["differences"] % _shared_doc_kwargs)
     def differences(
-        self, other, axis=1, keep_indices=False, keep_values=False
+        self, other: "Series", axis=1, keep_indices=False, keep_values=False
     ) -> FrameOrSeries:
         return super().differences(
             other=other, axis=axis, keep_indices=keep_indices, keep_values=keep_values

From 488c8a89ca5494bb9d5dcdfc35f2e1b6cb9bd1c9 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sat, 18 Jan 2020 12:57:18 +0800
Subject: [PATCH 06/44] ENH: reordered imports (GH30429)

---
 pandas/core/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index c1afa9b6d01f1..3b03eebb2dfcf 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -23,7 +23,7 @@
 from pandas._config import get_option
 
 from pandas._libs import index as libindex, lib, reshape, tslibs
-from pandas._typing import Label, FrameOrSeries
+from pandas._typing import FrameOrSeries, Label
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import Appender, Substitution
 from pandas.util._validators import validate_bool_kwarg, validate_percentile

From 322ff20fcd195b194e0ef8051741bad13ccccaa1 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sat, 18 Jan 2020 13:48:27 +0800
Subject: [PATCH 07/44] ENH: removed inconsistent type indication (GH30429)

---
 pandas/core/generic.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 537f4fd8d3c9f..7e44d5dec5158 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8131,9 +8131,7 @@ def ranker(data):
         """
 
     @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
-    def differences(
-        self, other, axis=1, keep_indices=False, keep_values=False
-    ) -> FrameOrSeries:
+    def differences(self, other, axis=1, keep_indices=False, keep_values=False):
         from pandas.core.reshape.concat import concat
 
         mask = ~((self == other) | (self.isna() & other.isna()))

From e50172c01c0d9941c6701aa11f4b237132bf336d Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Thu, 30 Jan 2020 23:11:59 +0800
Subject: [PATCH 08/44] ENH: Added whatsnew entry (GH30429)

---
 doc/source/whatsnew/v1.1.0.rst | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 920919755dc23..bb43508e2c10d 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -36,6 +36,31 @@ For example:
    ser["2014"]
    ser.loc["May 2015"]
 
+.. _whatsnew_100.differences:
+
+Comparing two `DataFrame` or two `Series` and summarizing the differences
+^^^^^^^^^^^^^^^^^^^^^^
+
+We've added :meth:`~DataFrame.differences` and :meth:`~Series.differences` for comparing two `DataFrame`s or two `Series`s (:issue:`30429`)
+
+.. ipython:: python
+
+   df = pd.DataFrame(
+       {
+           "col1": ["a", "a", "b", "b", "a"],
+           "col2": [1.0, 2.0, 3.0, np.nan, 5.0],
+           "col3": [1.0, 2.0, 3.0, 4.0, 5.0]
+       },
+       columns=["col1", "col2", "col3"],
+   )
+   df
+   df2 = df.copy()
+   df2.loc[0, 'col1'] = 'c'
+   df2.loc[2, 'col3'] = 4.0
+   df2
+   df.differences(df2)
+
+
 .. _whatsnew_110.enhancements.other:
 
 Other enhancements

From 4a82bec7a2ab602050d1f909c3f58176790f96b2 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Thu, 30 Jan 2020 23:12:58 +0800
Subject: [PATCH 09/44] ENH: Minor correction in whatsnew entry (GH30429)

---
 doc/source/whatsnew/v1.1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index bb43508e2c10d..b794d7375ca86 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -36,7 +36,7 @@ For example:
    ser["2014"]
    ser.loc["May 2015"]
 
-.. _whatsnew_100.differences:
+.. _whatsnew_110.differences:
 
 Comparing two `DataFrame` or two `Series` and summarizing the differences
 ^^^^^^^^^^^^^^^^^^^^^^

From b2849ed41e757ffdbe63540113be921daaa70904 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Thu, 30 Jan 2020 23:34:48 +0800
Subject: [PATCH 10/44] ENH: Minor correction in whatsnew entry (GH30429)

---
 doc/source/whatsnew/v1.1.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index b794d7375ca86..2e17e8b941f5f 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -41,7 +41,7 @@ For example:
 Comparing two `DataFrame` or two `Series` and summarizing the differences
 ^^^^^^^^^^^^^^^^^^^^^^
 
-We've added :meth:`~DataFrame.differences` and :meth:`~Series.differences` for comparing two `DataFrame`s or two `Series`s (:issue:`30429`)
+We've added :meth:`~DataFrame.differences` and :meth:`~Series.differences` for comparing two `DataFrame` or two `Series` (:issue:`30429`)
 
 .. ipython:: python
 

From ff7a57205fd3ab3021af75efdbc5819c8d0d8fc4 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Fri, 31 Jan 2020 09:49:16 +0800
Subject: [PATCH 11/44] ENH: Correction in whatsnew entry (GH30429)

---
 doc/source/whatsnew/v1.1.0.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 2e17e8b941f5f..2a85a55fcae91 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -39,9 +39,9 @@ For example:
 .. _whatsnew_110.differences:
 
 Comparing two `DataFrame` or two `Series` and summarizing the differences
-^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-We've added :meth:`~DataFrame.differences` and :meth:`~Series.differences` for comparing two `DataFrame` or two `Series` (:issue:`30429`)
+We've added :meth:`DataFrame.differences` and :meth:`Series.differences` for comparing two `DataFrame` or two `Series` (:issue:`30429`)
 
 .. ipython:: python
 

From bc969e8ec787f3671003a494d116c290d8477d87 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Mon, 10 Feb 2020 11:50:09 +0800
Subject: [PATCH 12/44] ENH: updated whatsnew (GH31200)

---
 doc/source/whatsnew/v1.1.0.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 2a85a55fcae91..02b9ea9fc13cd 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -53,10 +53,13 @@ We've added :meth:`DataFrame.differences` and :meth:`Series.differences` for com
        },
        columns=["col1", "col2", "col3"],
    )
-   df
    df2 = df.copy()
    df2.loc[0, 'col1'] = 'c'
    df2.loc[2, 'col3'] = 4.0
+
+.. ipython:: python
+
+   df
    df2
    df.differences(df2)
 

From 26c6ca6633a2009d8ac38853e86a43e987d8ea37 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Mon, 10 Feb 2020 11:50:28 +0800
Subject: [PATCH 13/44] ENH: added doc references (GH31200)

---
 doc/source/reference/frame.rst  | 3 ++-
 doc/source/reference/series.rst | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
index c7b1cc1c832be..3ef881868f0f6 100644
--- a/doc/source/reference/frame.rst
+++ b/doc/source/reference/frame.rst
@@ -239,7 +239,7 @@ Reshaping, sorting, transposing
    DataFrame.T
    DataFrame.transpose
 
-Combining / joining / merging
+Combining / comparing / joining / merging
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
    :toctree: api/
@@ -249,6 +249,7 @@ Combining / joining / merging
    DataFrame.join
    DataFrame.merge
    DataFrame.update
+   DataFrame.differences
 
 Time series-related
 ~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
index 1a69fa076dbf0..6ae2640ca7cbe 100644
--- a/doc/source/reference/series.rst
+++ b/doc/source/reference/series.rst
@@ -240,7 +240,7 @@ Reshaping, sorting
    Series.squeeze
    Series.view
 
-Combining / joining / merging
+Combining / comparing / joining / merging
 -----------------------------
 .. autosummary::
    :toctree: api/
@@ -248,6 +248,7 @@ Combining / joining / merging
    Series.append
    Series.replace
    Series.update
+   Series.differences
 
 Time series-related
 -------------------

From 5fb2edceda2967a3d578e93880c29d8d8a018d30 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Mon, 10 Feb 2020 12:26:20 +0800
Subject: [PATCH 14/44] DOC: fixed formatting issue in doc references

---
 doc/source/reference/frame.rst  | 2 +-
 doc/source/reference/series.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
index 3ef881868f0f6..d0ad3e0a102e1 100644
--- a/doc/source/reference/frame.rst
+++ b/doc/source/reference/frame.rst
@@ -240,7 +240,7 @@ Reshaping, sorting, transposing
    DataFrame.transpose
 
 Combining / comparing / joining / merging
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
    :toctree: api/
 
diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
index 6ae2640ca7cbe..8ac2052b5c693 100644
--- a/doc/source/reference/series.rst
+++ b/doc/source/reference/series.rst
@@ -241,7 +241,7 @@ Reshaping, sorting
    Series.view
 
 Combining / comparing / joining / merging
------------------------------
+-----------------------------------------
 .. autosummary::
    :toctree: api/
 

From 35ccb5f334bd798cd5ae9144f5b01c1e67df8889 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Tue, 25 Feb 2020 23:03:31 +0800
Subject: [PATCH 15/44] updated parameter names, docstring, and relevant tests
 (GH30429)

---
 pandas/core/frame.py                          | 11 +++++---
 pandas/core/generic.py                        | 25 ++++++++-----------
 pandas/core/series.py                         | 15 ++++++++---
 .../tests/frame/methods/test_differences.py   | 10 ++++----
 .../tests/series/methods/test_differences.py  | 10 ++++----
 5 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1e53cf06c7b54..81d9dbdc135db 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5150,12 +5150,17 @@ def _construct_result(self, result) -> "DataFrame":
 Returns
 -------
 DataFrame
-    DataFrame that shows the differences stacked side by side.
+    DataFrame that shows the differences stacked side by side. 
+    The resulting index will be a MultiIndex with 'self' and 'other' stacked alternately at the inner level. 
 
 See Also
 --------
 Series.differences: Show differences.
 
+Notes
+-----
+NaNs are considered equal to other NaNs.
+
 Examples
 --------
 >>> df = pd.DataFrame(
@@ -5227,10 +5232,10 @@ def _construct_result(self, result) -> "DataFrame":
     )
     @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
     def differences(
-        self, other: "DataFrame", axis=1, keep_indices=False, keep_values=False
+        self, other: "DataFrame", axis=1, keep_shape=False, keep_equal=False
     ) -> "DataFrame":
         return super().differences(
-            other=other, axis=axis, keep_indices=keep_indices, keep_values=keep_values
+            other=other, axis=axis, keep_shape=keep_shape, keep_equal=keep_equal
         )
 
     def combine(
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index e572eb7382590..2f8d6eb5b033f 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8153,11 +8153,6 @@ def ranker(data):
     ] = """
         Compare to another %(klass)s and show the differences.
 
-        The axis on which to stack results and how much information to
-        preserve can be customized.
-
-        Note that NaNs are considered not different from other NaNs.
-
         Parameters
         ----------
         other : %(klass)s
@@ -8165,28 +8160,30 @@ def ranker(data):
 
         axis : {0 or 'index', 1 or 'columns'}, default 1
             Determine how the differences are stacked.
-            * 0, or 'index' : Stack differences on neighbouring rows.
-            * 1, or 'columns' : Stack differences on neighbouring columns.
+            * 0, or 'index' : Resulting differences are stacked vertically 
+                with rows drawn alternately from self and other.
+            * 1, or 'columns' : Resulting differences are stacked horizontally 
+                with columns drawn alternately from self and other.
 
-        keep_indices : bool, default False
-            Whether to keep the rows and columns that are equal, or drop them.
+        keep_shape : bool, default False
+            If true, all rows and columns are kept. Otherwise only the different ones are kept.
 
-        keep_values : bool, default False
-            Whether to keep the values that are equal, or show as NaNs.
+        keep_equal : bool, default False
+            If true, the result keeps values that are equal. Otherwise they are shown as NaNs.
         """
 
     @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
-    def differences(self, other, axis=1, keep_indices=False, keep_values=False):
+    def differences(self, other, axis=1, keep_shape=False, keep_equal=False):
         from pandas.core.reshape.concat import concat
 
         mask = ~((self == other) | (self.isna() & other.isna()))
         keys = ["self", "other"]
 
-        if not keep_values:
+        if not keep_equal:
             self = self.where(mask)
             other = other.where(mask)
 
-        if not keep_indices:
+        if not keep_shape:
             if isinstance(self, ABCDataFrame):
                 cmask = mask.any()
                 rmask = mask.any(axis=1)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index e278ab4b318ca..304de8382debd 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2579,13 +2579,20 @@ def _binop(self, other, func, level=None, fill_value=None):
 Returns
 -------
 Series or DataFrame
-    If axis is 0 or 'index' the result will be a Series.
-    If axis is 1 or 'columns' the result will be a DataFrame.
+    If axis is 0 or 'index' the result will be a Series. 
+    The resulting index will be a MultiIndex with 'self' and 'other' stacked alternately at the inner level.
+    
+    If axis is 1 or 'columns' the result will be a DataFrame. 
+    Its will have two columns namely 'self' and 'other'.
 
 See Also
 --------
 DataFrame.differences: Show differences.
 
+Notes
+-----
+NaNs are considered equal to other NaNs.
+
 Examples
 --------
 >>> s1 = pd.Series(["a", "b", "c", "d", "e"])
@@ -2630,10 +2637,10 @@ def _binop(self, other, func, level=None, fill_value=None):
     )
     @Appender(generic._shared_docs["differences"] % _shared_doc_kwargs)
     def differences(
-        self, other: "Series", axis=1, keep_indices=False, keep_values=False
+        self, other: "Series", axis=1, keep_shape=False, keep_equal=False
     ) -> FrameOrSeries:
         return super().differences(
-            other=other, axis=axis, keep_indices=keep_indices, keep_values=keep_values
+            other=other, axis=axis, keep_shape=keep_shape, keep_equal=keep_equal
         )
 
     def combine(self, other, func, fill_value=None) -> "Series":
diff --git a/pandas/tests/frame/methods/test_differences.py b/pandas/tests/frame/methods/test_differences.py
index 058888f8551e2..aa2f3d7b11a2a 100644
--- a/pandas/tests/frame/methods/test_differences.py
+++ b/pandas/tests/frame/methods/test_differences.py
@@ -37,7 +37,7 @@ def test_differences_axis(axis):
 
 
 @pytest.mark.parametrize(
-    "keep_indices, keep_values",
+    "keep_shape, keep_equal",
     [
         (True, False),
         (False, True),
@@ -45,7 +45,7 @@ def test_differences_axis(axis):
         # False, False case is already covered in test_differences_axis
     ],
 )
-def test_differences_various_formats(keep_indices, keep_values):
+def test_differences_various_formats(keep_shape, keep_equal):
     df = pd.DataFrame(
         {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
         columns=["col1", "col2", "col3"],
@@ -54,14 +54,14 @@ def test_differences_various_formats(keep_indices, keep_values):
     df2.loc[0, "col1"] = "c"
     df2.loc[2, "col3"] = 4.0
 
-    result = df.differences(df2, keep_indices=keep_indices, keep_values=keep_values)
+    result = df.differences(df2, keep_shape=keep_shape, keep_equal=keep_equal)
 
-    if keep_indices:
+    if keep_shape:
         indices = pd.Index([0, 1, 2])
         columns = pd.MultiIndex.from_product(
             [["col1", "col2", "col3"], ["self", "other"]]
         )
-        if keep_values:
+        if keep_equal:
             expected = pd.DataFrame(
                 [
                     ["a", "c", 1.0, 1.0, 1.0, 1.0],
diff --git a/pandas/tests/series/methods/test_differences.py b/pandas/tests/series/methods/test_differences.py
index 58780b129905c..c6d1b3ed1a65c 100644
--- a/pandas/tests/series/methods/test_differences.py
+++ b/pandas/tests/series/methods/test_differences.py
@@ -26,7 +26,7 @@ def test_differences_axis(axis):
 
 
 @pytest.mark.parametrize(
-    "keep_indices, keep_values",
+    "keep_shape, keep_equal",
     [
         (True, False),
         (False, True),
@@ -34,16 +34,16 @@ def test_differences_axis(axis):
         # False, False case is already covered in test_differences_axis
     ],
 )
-def test_differences_various_formats(keep_indices, keep_values):
+def test_differences_various_formats(keep_shape, keep_equal):
     s1 = pd.Series(["a", "b", "c"])
     s2 = pd.Series(["x", "b", "z"])
 
-    result = s1.differences(s2, keep_indices=keep_indices, keep_values=keep_values)
+    result = s1.differences(s2, keep_shape=keep_shape, keep_equal=keep_equal)
 
-    if keep_indices:
+    if keep_shape:
         indices = pd.Index([0, 1, 2])
         columns = pd.Index(["self", "other"])
-        if keep_values:
+        if keep_equal:
             expected = pd.DataFrame(
                 [["a", "x"], ["b", "b"], ["c", "z"]], index=indices, columns=columns
             )

From 586e37c56b3b49c0c110dd054aa5353d828fc47a Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Tue, 25 Feb 2020 23:15:02 +0800
Subject: [PATCH 16/44] added doc-string tests (GH30429)

---
 ci/code_checks.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index e2dc543360a62..10ff7085d5b81 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -259,17 +259,18 @@ fi
 if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
 
     MSG='Doctests frame.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/frame.py
+    pytest -q --doctest-modules pandas/core/frame.py \
+        -k"-differences"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Doctests series.py' ; echo $MSG
     pytest -q --doctest-modules pandas/core/series.py \
-        -k"-nonzero -reindex -searchsorted -to_dict"
+        -k"-differences -nonzero -reindex -searchsorted -to_dict"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Doctests generic.py' ; echo $MSG
     pytest -q --doctest-modules pandas/core/generic.py \
-        -k"-_set_axis_name -_xs -describe -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard"
+        -k"-_set_axis_name -_xs -describe -differences -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_clipboard -to_json -transpose -values -xs"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Doctests groupby.py' ; echo $MSG

From d13db2fe0e251f37fe8b23af6f191cc2fa56473b Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Tue, 25 Feb 2020 23:28:33 +0800
Subject: [PATCH 17/44] fixed some PEP8 issues in doc-strings (GH30429)

---
 pandas/core/frame.py   |  6 ++++--
 pandas/core/generic.py | 10 ++++++----
 pandas/core/series.py  | 10 ++++++----
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 81d9dbdc135db..2cdc9dcee5aba 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5150,8 +5150,10 @@ def _construct_result(self, result) -> "DataFrame":
 Returns
 -------
 DataFrame
-    DataFrame that shows the differences stacked side by side. 
-    The resulting index will be a MultiIndex with 'self' and 'other' stacked alternately at the inner level. 
+    DataFrame that shows the differences stacked side by side.
+
+    The resulting index will be a MultiIndex with 'self' and 'other' 
+    stacked alternately at the inner level.
 
 See Also
 --------
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 2f8d6eb5b033f..5ac1f33d7f25b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8160,16 +8160,18 @@ def ranker(data):
 
         axis : {0 or 'index', 1 or 'columns'}, default 1
             Determine how the differences are stacked.
-            * 0, or 'index' : Resulting differences are stacked vertically 
+            * 0, or 'index' : Resulting differences are stacked vertically
                 with rows drawn alternately from self and other.
-            * 1, or 'columns' : Resulting differences are stacked horizontally 
+            * 1, or 'columns' : Resulting differences are stacked horizontally
                 with columns drawn alternately from self and other.
 
         keep_shape : bool, default False
-            If true, all rows and columns are kept. Otherwise only the different ones are kept.
+            If true, all rows and columns are kept. 
+            Otherwise, only the different ones are kept.
 
         keep_equal : bool, default False
-            If true, the result keeps values that are equal. Otherwise they are shown as NaNs.
+            If true, the result keeps values that are equal. 
+            Otherwise, equal values are shown as NaNs.
         """
 
     @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 304de8382debd..9f8647022d528 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2579,10 +2579,12 @@ def _binop(self, other, func, level=None, fill_value=None):
 Returns
 -------
 Series or DataFrame
-    If axis is 0 or 'index' the result will be a Series. 
-    The resulting index will be a MultiIndex with 'self' and 'other' stacked alternately at the inner level.
-    
-    If axis is 1 or 'columns' the result will be a DataFrame. 
+    If axis is 0 or 'index' the result will be a Series.
+
+    The resulting index will be a MultiIndex with 'self' and 'other'
+    stacked alternately at the inner level.
+
+    If axis is 1 or 'columns' the result will be a DataFrame.
     Its will have two columns namely 'self' and 'other'.
 
 See Also

From 534220875f8c03ebd6e005e4c7c067099eafe62d Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Wed, 26 Feb 2020 09:24:03 +0800
Subject: [PATCH 18/44] removed trailing spaces in doc-strings (GH30429)

---
 pandas/core/frame.py   | 2 +-
 pandas/core/generic.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 2cdc9dcee5aba..858ee18c28a6e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5152,7 +5152,7 @@ def _construct_result(self, result) -> "DataFrame":
 DataFrame
     DataFrame that shows the differences stacked side by side.
 
-    The resulting index will be a MultiIndex with 'self' and 'other' 
+    The resulting index will be a MultiIndex with 'self' and 'other'
     stacked alternately at the inner level.
 
 See Also
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 5ac1f33d7f25b..0f463724716b6 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8166,11 +8166,11 @@ def ranker(data):
                 with columns drawn alternately from self and other.
 
         keep_shape : bool, default False
-            If true, all rows and columns are kept. 
+            If true, all rows and columns are kept.
             Otherwise, only the different ones are kept.
 
         keep_equal : bool, default False
-            If true, the result keeps values that are equal. 
+            If true, the result keeps values that are equal.
             Otherwise, equal values are shown as NaNs.
         """
 

From 77b1c9e217aff2cab0f67d6ba0b79d6fa8fdd9cf Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Wed, 26 Feb 2020 20:29:18 +0800
Subject: [PATCH 19/44] fixed sphinx identation issues in doc-strings (GH30429)

---
 pandas/core/generic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 0f463724716b6..4bcbe8edf2805 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8167,11 +8167,11 @@ def ranker(data):
 
         keep_shape : bool, default False
             If true, all rows and columns are kept.
-            Otherwise, only the different ones are kept.
+                Otherwise, only the different ones are kept.
 
         keep_equal : bool, default False
             If true, the result keeps values that are equal.
-            Otherwise, equal values are shown as NaNs.
+                Otherwise, equal values are shown as NaNs.
         """
 
     @Appender(_shared_docs["differences"] % _shared_doc_kwargs)

From 51ffe0ed28a6511a50bf2f43d63d2eca16388b6b Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Wed, 26 Feb 2020 21:39:33 +0800
Subject: [PATCH 20/44] sphinx identation issues in doc-strings (GH30429)

---
 pandas/core/frame.py   | 2 +-
 pandas/core/generic.py | 4 ++--
 pandas/core/series.py  | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 858ee18c28a6e..91154f52315ea 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5151,7 +5151,7 @@ def _construct_result(self, result) -> "DataFrame":
 -------
 DataFrame
     DataFrame that shows the differences stacked side by side.
-
+    
     The resulting index will be a MultiIndex with 'self' and 'other'
     stacked alternately at the inner level.
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 4bcbe8edf2805..0f463724716b6 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8167,11 +8167,11 @@ def ranker(data):
 
         keep_shape : bool, default False
             If true, all rows and columns are kept.
-                Otherwise, only the different ones are kept.
+            Otherwise, only the different ones are kept.
 
         keep_equal : bool, default False
             If true, the result keeps values that are equal.
-                Otherwise, equal values are shown as NaNs.
+            Otherwise, equal values are shown as NaNs.
         """
 
     @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 9f8647022d528..259eabb67d914 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2580,12 +2580,12 @@ def _binop(self, other, func, level=None, fill_value=None):
 -------
 Series or DataFrame
     If axis is 0 or 'index' the result will be a Series.
-
+    
     The resulting index will be a MultiIndex with 'self' and 'other'
     stacked alternately at the inner level.
-
+    
     If axis is 1 or 'columns' the result will be a DataFrame.
-    Its will have two columns namely 'self' and 'other'.
+    It will have two columns namely 'self' and 'other'.
 
 See Also
 --------

From 827b69cbccb07f825f8cc81a3cbc8eb70092c73f Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Wed, 26 Feb 2020 22:27:40 +0800
Subject: [PATCH 21/44] sphinx identation issues in doc-strings (GH30429)

---
 pandas/core/frame.py  | 5 ++---
 pandas/core/series.py | 8 +++-----
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index d6289cf703b55..f68edfaf9c6b1 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5251,9 +5251,8 @@ def _construct_result(self, result) -> "DataFrame":
 -------
 DataFrame
     DataFrame that shows the differences stacked side by side.
-    
-    The resulting index will be a MultiIndex with 'self' and 'other'
-    stacked alternately at the inner level.
+        The resulting index will be a MultiIndex with 'self' and 'other'
+        stacked alternately at the inner level.
 
 See Also
 --------
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 259eabb67d914..c6ca89b75b279 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2580,12 +2580,10 @@ def _binop(self, other, func, level=None, fill_value=None):
 -------
 Series or DataFrame
     If axis is 0 or 'index' the result will be a Series.
-    
-    The resulting index will be a MultiIndex with 'self' and 'other'
-    stacked alternately at the inner level.
-    
+        The resulting index will be a MultiIndex with 'self' and 'other'
+        stacked alternately at the inner level.
     If axis is 1 or 'columns' the result will be a DataFrame.
-    It will have two columns namely 'self' and 'other'.
+        It will have two columns namely 'self' and 'other'.
 
 See Also
 --------

From 53918a571a2b1a172336c0f38bf6cb27e28cc14c Mon Sep 17 00:00:00 2001
From: Jiaxiang <fujiaxiang1995@gmail.com>
Date: Wed, 26 Feb 2020 23:45:56 +0800
Subject: [PATCH 22/44] Update pandas/core/frame.py

minor changes in docstring

Co-Authored-By: William Ayd <william.ayd@icloud.com>
---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f68edfaf9c6b1..a1ad127542002 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5256,7 +5256,7 @@ def _construct_result(self, result) -> "DataFrame":
 
 See Also
 --------
-Series.differences: Show differences.
+Series.differences : Show differences.
 
 Notes
 -----

From 110f1387613b49b88e6e43a353fae067dc142264 Mon Sep 17 00:00:00 2001
From: Jiaxiang <fujiaxiang1995@gmail.com>
Date: Wed, 26 Feb 2020 23:46:08 +0800
Subject: [PATCH 23/44] Update pandas/core/series.py

minor changes in docstring

Co-Authored-By: William Ayd <william.ayd@icloud.com>
---
 pandas/core/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index c6ca89b75b279..9e7c7b53da8e9 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2587,7 +2587,7 @@ def _binop(self, other, func, level=None, fill_value=None):
 
 See Also
 --------
-DataFrame.differences: Show differences.
+DataFrame.differences : Show differences.
 
 Notes
 -----

From acd51e010f08638d89d6f2a47262d2b80bd9d7fa Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Thu, 27 Feb 2020 19:47:23 +0800
Subject: [PATCH 24/44] attempt to fix sphinx identation issues in doc-strings
 (GH30429)

---
 pandas/core/frame.py   | 5 +++--
 pandas/core/generic.py | 1 +
 pandas/core/series.py  | 7 ++++---
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a1ad127542002..a0d8729543a19 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5251,8 +5251,9 @@ def _construct_result(self, result) -> "DataFrame":
 -------
 DataFrame
     DataFrame that shows the differences stacked side by side.
-        The resulting index will be a MultiIndex with 'self' and 'other'
-        stacked alternately at the inner level.
+    
+    The resulting index will be a MultiIndex with 'self' and 'other'
+    stacked alternately at the inner level.
 
 See Also
 --------
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 0f463724716b6..b955a11b6822b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8160,6 +8160,7 @@ def ranker(data):
 
         axis : {0 or 'index', 1 or 'columns'}, default 1
             Determine how the differences are stacked.
+            
             * 0, or 'index' : Resulting differences are stacked vertically
                 with rows drawn alternately from self and other.
             * 1, or 'columns' : Resulting differences are stacked horizontally
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 9e7c7b53da8e9..3de8ebfaa91f5 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2580,10 +2580,11 @@ def _binop(self, other, func, level=None, fill_value=None):
 -------
 Series or DataFrame
     If axis is 0 or 'index' the result will be a Series.
-        The resulting index will be a MultiIndex with 'self' and 'other'
-        stacked alternately at the inner level.
+    The resulting index will be a MultiIndex with 'self' and 'other'
+    stacked alternately at the inner level.
+    
     If axis is 1 or 'columns' the result will be a DataFrame.
-        It will have two columns namely 'self' and 'other'.
+    It will have two columns namely 'self' and 'other'.
 
 See Also
 --------

From 1ef31c947156a334b6496514da72290dbb189ad5 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Thu, 27 Feb 2020 20:20:52 +0800
Subject: [PATCH 25/44] removed trailing spaces in doc-strings (GH30429)

---
 pandas/core/frame.py   | 2 +-
 pandas/core/generic.py | 2 +-
 pandas/core/series.py  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a0d8729543a19..9ea057dfb7d44 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5251,7 +5251,7 @@ def _construct_result(self, result) -> "DataFrame":
 -------
 DataFrame
     DataFrame that shows the differences stacked side by side.
-    
+
     The resulting index will be a MultiIndex with 'self' and 'other'
     stacked alternately at the inner level.
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index b955a11b6822b..e6130f244ebe4 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8160,7 +8160,7 @@ def ranker(data):
 
         axis : {0 or 'index', 1 or 'columns'}, default 1
             Determine how the differences are stacked.
-            
+
             * 0, or 'index' : Resulting differences are stacked vertically
                 with rows drawn alternately from self and other.
             * 1, or 'columns' : Resulting differences are stacked horizontally
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 3de8ebfaa91f5..823d5d5d78284 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2582,7 +2582,7 @@ def _binop(self, other, func, level=None, fill_value=None):
     If axis is 0 or 'index' the result will be a Series.
     The resulting index will be a MultiIndex with 'self' and 'other'
     stacked alternately at the inner level.
-    
+
     If axis is 1 or 'columns' the result will be a DataFrame.
     It will have two columns namely 'self' and 'other'.
 

From 3bc7485a9c4eb0bc470b815b135ddd8a600376e0 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Tue, 10 Mar 2020 10:47:29 +0800
Subject: [PATCH 26/44] removed unintended changes in ci/code_checks (GH30429)

---
 ci/code_checks.sh | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 10ff7085d5b81..e2dc543360a62 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -259,18 +259,17 @@ fi
 if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
 
     MSG='Doctests frame.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/frame.py \
-        -k"-differences"
+    pytest -q --doctest-modules pandas/core/frame.py
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Doctests series.py' ; echo $MSG
     pytest -q --doctest-modules pandas/core/series.py \
-        -k"-differences -nonzero -reindex -searchsorted -to_dict"
+        -k"-nonzero -reindex -searchsorted -to_dict"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Doctests generic.py' ; echo $MSG
     pytest -q --doctest-modules pandas/core/generic.py \
-        -k"-_set_axis_name -_xs -describe -differences -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_clipboard -to_json -transpose -values -xs"
+        -k"-_set_axis_name -_xs -describe -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs -to_clipboard"
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Doctests groupby.py' ; echo $MSG

From 06ed216570ea46afda2d69c7a2591632a6d5acaf Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Tue, 10 Mar 2020 11:10:45 +0800
Subject: [PATCH 27/44] corrected errors in docstring (GH30429)

---
 pandas/core/frame.py  | 4 ++--
 pandas/core/series.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index cb4797aa39cbf..89b3e92405d4e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5312,7 +5312,7 @@ def _construct_result(self, result) -> "DataFrame":
 
 Keep all the original indices (rows and columns)
 
->>> df.differences(df2, keep_indices=True)
+>>> df.differences(df2, keep_shape=True)
   col1       col2       col3
   self other self other self other
 0    a     c  NaN   NaN  NaN   NaN
@@ -5323,7 +5323,7 @@ def _construct_result(self, result) -> "DataFrame":
 
 Keep all original indices and data
 
->>> df.differences(df2, keep_indices=True, keep_values=True)
+>>> df.differences(df2, keep_shape=True, keep_equal=True)
   col1       col2       col3
   self other self other self other
 0    a     c  1.0   1.0  1.0   1.0
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 09e6e1f9d5eaa..39e659f599717 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2619,7 +2619,7 @@ def _binop(self, other, func, level=None, fill_value=None):
 
 Keep all the original indices
 
->>> s1.differences(s2, keep_indices=True)
+>>> s1.differences(s2, keep_shape=True)
   self other
 0  NaN   NaN
 1    b     a
@@ -2629,7 +2629,7 @@ def _binop(self, other, func, level=None, fill_value=None):
 
 Keep all original indices and data
 
->>> s1.differences(s2, keep_indices=True, keep_values=True)
+>>> s1.differences(s2, keep_shape=True, keep_equal=True)
   self other
 0    a     a
 1    b     a

From e4729ca19c45e009079693e38f711452cf06d424 Mon Sep 17 00:00:00 2001
From: Jiaxiang <fujiaxiang1995@gmail.com>
Date: Sat, 14 Mar 2020 22:27:18 +0800
Subject: [PATCH 28/44] Update pandas/core/frame.py: slight semantic cleanup in
 docstring

Co-Authored-By: William Ayd <william.ayd@icloud.com>
---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 89b3e92405d4e..766dc1898cb7d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5262,7 +5262,7 @@ def _construct_result(self, result) -> "DataFrame":
 
 Notes
 -----
-NaNs are considered equal to other NaNs.
+Matching NaNs will not appear as a difference.
 
 Examples
 --------

From b6c0f78549c82bc629b098522b6e3074b3c90a0f Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sat, 14 Mar 2020 23:18:42 +0800
Subject: [PATCH 29/44] renamed parameter axis to align_axis; added tests
 (GH30429)

---
 pandas/core/frame.py                          |  7 ++-
 pandas/core/generic.py                        | 30 +++++++------
 pandas/core/series.py                         |  9 ++--
 .../tests/frame/methods/test_differences.py   | 43 +++++++++++++++++--
 .../tests/series/methods/test_differences.py  | 24 ++++++++---
 5 files changed, 85 insertions(+), 28 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 766dc1898cb7d..edea72fce22ec 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5335,10 +5335,13 @@ def _construct_result(self, result) -> "DataFrame":
     )
     @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
     def differences(
-        self, other: "DataFrame", axis=1, keep_shape=False, keep_equal=False
+        self, other: "DataFrame", align_axis=1, keep_shape=False, keep_equal=False
     ) -> "DataFrame":
         return super().differences(
-            other=other, axis=axis, keep_shape=keep_shape, keep_equal=keep_equal
+            other=other,
+            align_axis=align_axis,
+            keep_shape=keep_shape,
+            keep_equal=keep_equal,
         )
 
     def combine(
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 0427bf589c8be..f19d137bcb280 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8169,12 +8169,12 @@ def ranker(data):
         other : %(klass)s
             Object to compare with.
 
-        axis : {0 or 'index', 1 or 'columns'}, default 1
-            Determine how the differences are stacked.
+        align_axis : {0 or 'index', 1 or 'columns'}, default 1
+            Determine which axis to align the comparison on.
 
             * 0, or 'index' : Resulting differences are stacked vertically
                 with rows drawn alternately from self and other.
-            * 1, or 'columns' : Resulting differences are stacked horizontally
+            * 1, or 'columns' : Resulting differences are aligned horizontally
                 with columns drawn alternately from self and other.
 
         keep_shape : bool, default False
@@ -8187,7 +8187,7 @@ def ranker(data):
         """
 
     @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
-    def differences(self, other, axis=1, keep_shape=False, keep_equal=False):
+    def differences(self, other, align_axis=1, keep_shape=False, keep_equal=False):
         from pandas.core.reshape.concat import concat
 
         mask = ~((self == other) | (self.isna() & other.isna()))
@@ -8207,19 +8207,19 @@ def differences(self, other, axis=1, keep_shape=False, keep_equal=False):
                 self = self[mask]
                 other = other[mask]
 
-        if axis in (1, "columns"):  # This is needed for Series
-            axis = 1
+        if align_axis in (1, "columns"):  # This is needed for Series
+            align_axis = 1
         else:
-            axis = self._get_axis_number(axis)
+            align_axis = self._get_axis_number(align_axis)
 
-        diff = concat([self, other], axis=axis, keys=keys)
+        diff = concat([self, other], axis=align_axis, keys=keys)
 
-        if axis >= self.ndim:
+        if align_axis >= self.ndim:
             # No need to reorganize data if stacking on new axis
             # This currently applies for stacking two Series on columns
             return diff
 
-        ax = diff._get_axis(axis)
+        ax = diff._get_axis(align_axis)
         ax_names = np.array(ax.names)
 
         # set index names to positions to avoid confusion
@@ -8228,18 +8228,20 @@ def differences(self, other, axis=1, keep_shape=False, keep_equal=False):
         # bring self-other to inner level
         order = list(range(1, ax.nlevels)) + [0]
         if isinstance(diff, ABCDataFrame):
-            diff = diff.reorder_levels(order, axis=axis)
+            diff = diff.reorder_levels(order, axis=align_axis)
         else:
             diff = diff.reorder_levels(order)
 
         # restore the index names in order
-        diff._get_axis(axis=axis).names = ax_names[order]
+        diff._get_axis(axis=align_axis).names = ax_names[order]
 
         # reorder axis to keep things organized
         indices = (
-            np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten()
+            np.arange(diff.shape[align_axis])
+            .reshape([2, diff.shape[align_axis] // 2])
+            .T.flatten()
         )
-        diff = diff.take(indices, axis=axis)
+        diff = diff.take(indices, axis=align_axis)
 
         return diff
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 39e659f599717..b7314a686d33e 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2594,7 +2594,7 @@ def _binop(self, other, func, level=None, fill_value=None):
 
 Notes
 -----
-NaNs are considered equal to other NaNs.
+Matching NaNs will not appear as a difference.
 
 Examples
 --------
@@ -2640,10 +2640,13 @@ def _binop(self, other, func, level=None, fill_value=None):
     )
     @Appender(generic._shared_docs["differences"] % _shared_doc_kwargs)
     def differences(
-        self, other: "Series", axis=1, keep_shape=False, keep_equal=False
+        self, other: "Series", align_axis=1, keep_shape=False, keep_equal=False
     ) -> FrameOrSeries:
         return super().differences(
-            other=other, axis=axis, keep_shape=keep_shape, keep_equal=keep_equal
+            other=other,
+            align_axis=align_axis,
+            keep_shape=keep_shape,
+            keep_equal=keep_equal,
         )
 
     def combine(self, other, func, fill_value=None) -> "Series":
diff --git a/pandas/tests/frame/methods/test_differences.py b/pandas/tests/frame/methods/test_differences.py
index aa2f3d7b11a2a..c4524ea9dda75 100644
--- a/pandas/tests/frame/methods/test_differences.py
+++ b/pandas/tests/frame/methods/test_differences.py
@@ -5,8 +5,8 @@
 import pandas._testing as tm
 
 
-@pytest.mark.parametrize("axis", [0, 1, "index", "columns"])
-def test_differences_axis(axis):
+@pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"])
+def test_differences_axis(align_axis):
     df = pd.DataFrame(
         {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
         columns=["col1", "col2", "col3"],
@@ -15,9 +15,9 @@ def test_differences_axis(axis):
     df2.loc[0, "col1"] = "c"
     df2.loc[2, "col3"] = 4.0
 
-    result = df.differences(df2, axis=axis)
+    result = df.differences(df2, align_axis=align_axis)
 
-    if axis in (1, "columns"):
+    if align_axis in (1, "columns"):
         indices = pd.Index([0, 2])
         columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]])
         expected = pd.DataFrame(
@@ -128,3 +128,38 @@ def test_differences_with_non_equal_nulls():
         columns=columns,
     )
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("align_axis", [0, 1])
+def test_differences_multi_index(align_axis):
+    df = pd.DataFrame(
+        {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}
+    )
+    df.columns = pd.MultiIndex.from_arrays([["a", "a", "b"], ["col1", "col2", "col3"]])
+    df.index = pd.MultiIndex.from_arrays([["x", "x", "y"], [0, 1, 2]])
+
+    df2 = df.copy()
+    df2.iloc[0, 0] = "c"
+    df2.iloc[2, 2] = 4.0
+
+    result = df.differences(df2, align_axis=align_axis)
+
+    if align_axis == 0:
+        indices = pd.MultiIndex.from_arrays(
+            [["x", "x", "y", "y"], [0, 0, 2, 2], ["self", "other", "self", "other"]]
+        )
+        columns = pd.MultiIndex.from_arrays([["a", "b"], ["col1", "col3"]])
+        data = [["a", np.nan], ["c", np.nan], [np.nan, 3.0], [np.nan, 4.0]]
+    else:
+        indices = pd.MultiIndex.from_arrays([["x", "y"], [0, 2]])
+        columns = pd.MultiIndex.from_arrays(
+            [
+                ["a", "a", "b", "b"],
+                ["col1", "col1", "col3", "col3"],
+                ["self", "other", "self", "other"],
+            ]
+        )
+        data = [["a", "c", np.nan, np.nan], [np.nan, np.nan, 3.0, 4.0]]
+
+    expected = pd.DataFrame(data=data, index=indices, columns=columns)
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_differences.py b/pandas/tests/series/methods/test_differences.py
index c6d1b3ed1a65c..30b972636cbf2 100644
--- a/pandas/tests/series/methods/test_differences.py
+++ b/pandas/tests/series/methods/test_differences.py
@@ -5,14 +5,14 @@
 import pandas._testing as tm
 
 
-@pytest.mark.parametrize("axis", [0, 1, "index", "columns"])
-def test_differences_axis(axis):
+@pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"])
+def test_differences_axis(align_axis):
     s1 = pd.Series(["a", "b", "c"])
     s2 = pd.Series(["x", "b", "z"])
 
-    result = s1.differences(s2, axis=axis)
+    result = s1.differences(s2, align_axis=align_axis)
 
-    if axis in (1, "columns"):
+    if align_axis in (1, "columns"):
         indices = pd.Index([0, 2])
         columns = pd.Index(["self", "other"])
         expected = pd.DataFrame(
@@ -78,8 +78,22 @@ def test_differences_with_non_equal_nulls():
     s1 = pd.Series(["a", "b", "c"])
     s2 = pd.Series(["x", "b", np.nan])
 
-    result = s1.differences(s2, axis=0)
+    result = s1.differences(s2, align_axis=0)
 
     indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
     expected = pd.Series(["a", "x", "c", np.nan], index=indices)
     tm.assert_series_equal(result, expected)
+
+
+def test_differences_multi_index():
+    index = pd.MultiIndex.from_arrays([[0, 0, 1], [0, 1, 2]])
+    s1 = pd.Series(["a", "b", "c"], index=index)
+    s2 = pd.Series(["x", "b", "z"], index=index)
+
+    result = s1.differences(s2, align_axis=0)
+
+    indices = pd.MultiIndex.from_arrays(
+        [[0, 0, 1, 1], [0, 0, 2, 2], ["self", "other", "self", "other"]]
+    )
+    expected = pd.Series(["a", "x", "c", "z"], index=indices)
+    tm.assert_series_equal(result, expected)

From 08504204a3a6c0628467e3684a8b8518d7cf0071 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sat, 14 Mar 2020 23:29:38 +0800
Subject: [PATCH 30/44] minor correction in docstring

---
 pandas/core/frame.py  | 4 ++--
 pandas/core/series.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 513823ae81283..d7ade19197a2e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5295,7 +5295,7 @@ def _construct_result(self, result) -> "DataFrame":
 3    b   NaN   4.0
 4    a   5.0   5.0
 
-Stack the differences on columns
+Align the differences on columns
 
 >>> df.differences(df2)
   col1       col3
@@ -5305,7 +5305,7 @@ def _construct_result(self, result) -> "DataFrame":
 
 Stack the differences on rows
 
->>> df.differences(df2, axis=0)
+>>> df.differences(df2, align_axis=0)
         col1  col3
 0 self     a   NaN
   other    c   NaN
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 1606473cf88d1..3a0816a78e626 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2602,7 +2602,7 @@ def _binop(self, other, func, level=None, fill_value=None):
 >>> s1 = pd.Series(["a", "b", "c", "d", "e"])
 >>> s2 = pd.Series(["a", "a", "c", "b", "e"])
 
-Stack the differences on columns
+Align the differences on columns
 
 >>> s1.differences(s2)
   self other
@@ -2611,7 +2611,7 @@ def _binop(self, other, func, level=None, fill_value=None):
 
 Stack the differences on indices
 
->>> s1.differences(s2, axis=0)
+>>> s1.differences(s2, align_axis=0)
 1  self     b
    other    a
 3  self     d

From a709db7f0b959d45cbe8860a80e23b1ead975263 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sat, 14 Mar 2020 23:39:17 +0800
Subject: [PATCH 31/44] some semantic cleanup in docstrings

---
 pandas/core/frame.py   | 4 ++--
 pandas/core/generic.py | 2 +-
 pandas/core/series.py  | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index d7ade19197a2e..a0305c74488a8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5312,7 +5312,7 @@ def _construct_result(self, result) -> "DataFrame":
 2 self   NaN   3.0
   other  NaN   4.0
 
-Keep all the original indices (rows and columns)
+Keep all original rows and columns
 
 >>> df.differences(df2, keep_shape=True)
   col1       col2       col3
@@ -5323,7 +5323,7 @@ def _construct_result(self, result) -> "DataFrame":
 3  NaN   NaN  NaN   NaN  NaN   NaN
 4  NaN   NaN  NaN   NaN  NaN   NaN
 
-Keep all original indices and data
+Keep all original rows and columns and also all original values
 
 >>> df.differences(df2, keep_shape=True, keep_equal=True)
   col1       col2       col3
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index d606e05b38161..2443f31faef99 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8288,7 +8288,7 @@ def ranker(data):
 
         keep_shape : bool, default False
             If true, all rows and columns are kept.
-            Otherwise, only the different ones are kept.
+            Otherwise, only the ones with different values are kept.
 
         keep_equal : bool, default False
             If true, the result keeps values that are equal.
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 3a0816a78e626..e5aa9ba1bff38 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2618,7 +2618,7 @@ def _binop(self, other, func, level=None, fill_value=None):
    other    b
 dtype: object
 
-Keep all the original indices
+Keep all original rows
 
 >>> s1.differences(s2, keep_shape=True)
   self other
@@ -2628,7 +2628,7 @@ def _binop(self, other, func, level=None, fill_value=None):
 3    d     b
 4  NaN   NaN
 
-Keep all original indices and data
+Keep all original rows and also all original values
 
 >>> s1.differences(s2, keep_shape=True, keep_equal=True)
   self other

From 9509604137a21a50870d7b31544e349a31cfadd3 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Tue, 17 Mar 2020 22:23:53 +0800
Subject: [PATCH 32/44] added type indicator for method arguments

---
 pandas/core/frame.py   | 6 +++++-
 pandas/core/generic.py | 8 +++++++-
 pandas/core/series.py  | 6 +++++-
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a0305c74488a8..150cd2ea8df82 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5337,7 +5337,11 @@ def _construct_result(self, result) -> "DataFrame":
     )
     @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
     def differences(
-        self, other: "DataFrame", align_axis=1, keep_shape=False, keep_equal=False
+        self,
+        other: "DataFrame",
+        align_axis: Axis = 1,
+        keep_shape: bool = False,
+        keep_equal: bool = False,
     ) -> "DataFrame":
         return super().differences(
             other=other,
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 2443f31faef99..05409bcdde90a 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8296,7 +8296,13 @@ def ranker(data):
         """
 
     @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
-    def differences(self, other, align_axis=1, keep_shape=False, keep_equal=False):
+    def differences(
+        self,
+        other: FrameOrSeries,
+        align_axis: Axis = 1,
+        keep_shape: bool = False,
+        keep_equal: bool = False,
+    ):
         from pandas.core.reshape.concat import concat
 
         mask = ~((self == other) | (self.isna() & other.isna()))
diff --git a/pandas/core/series.py b/pandas/core/series.py
index e5aa9ba1bff38..1afa04ad62940 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2641,7 +2641,11 @@ def _binop(self, other, func, level=None, fill_value=None):
     )
     @Appender(generic._shared_docs["differences"] % _shared_doc_kwargs)
     def differences(
-        self, other: "Series", align_axis=1, keep_shape=False, keep_equal=False
+        self,
+        other: "Series",
+        align_axis: Axis = 1,
+        keep_shape: bool = False,
+        keep_equal: bool = False,
     ) -> FrameOrSeries:
         return super().differences(
             other=other,

From e1a1c49bb5de3019c179444050bddbcb3b01762f Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Fri, 20 Mar 2020 17:57:21 +0800
Subject: [PATCH 33/44] updated type hints

---
 pandas/core/generic.py | 26 +++++++++++++-------------
 pandas/core/series.py  |  4 ++--
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 05409bcdde90a..1f7254ffb3ee8 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8298,10 +8298,10 @@ def ranker(data):
     @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
     def differences(
         self,
-        other: FrameOrSeries,
+        other,
         align_axis: Axis = 1,
-        keep_shape: bool = False,
-        keep_equal: bool = False,
+        keep_shape: bool_t = False,
+        keep_equal: bool_t = False,
     ):
         from pandas.core.reshape.concat import concat
 
@@ -8323,18 +8323,18 @@ def differences(
                 other = other[mask]
 
         if align_axis in (1, "columns"):  # This is needed for Series
-            align_axis = 1
+            axis = 1
         else:
-            align_axis = self._get_axis_number(align_axis)
+            axis = self._get_axis_number(align_axis)
 
-        diff = concat([self, other], axis=align_axis, keys=keys)
+        diff = concat([self, other], axis=axis, keys=keys)
 
-        if align_axis >= self.ndim:
+        if axis >= self.ndim:
             # No need to reorganize data if stacking on new axis
             # This currently applies for stacking two Series on columns
             return diff
 
-        ax = diff._get_axis(align_axis)
+        ax = diff._get_axis(axis)
         ax_names = np.array(ax.names)
 
         # set index names to positions to avoid confusion
@@ -8343,20 +8343,20 @@ def differences(
         # bring self-other to inner level
         order = list(range(1, ax.nlevels)) + [0]
         if isinstance(diff, ABCDataFrame):
-            diff = diff.reorder_levels(order, axis=align_axis)
+            diff = diff.reorder_levels(order, axis=axis)
         else:
             diff = diff.reorder_levels(order)
 
         # restore the index names in order
-        diff._get_axis(axis=align_axis).names = ax_names[order]
+        diff._get_axis(axis=axis).names = ax_names[order]
 
         # reorder axis to keep things organized
         indices = (
-            np.arange(diff.shape[align_axis])
-            .reshape([2, diff.shape[align_axis] // 2])
+            np.arange(diff.shape[axis])
+            .reshape([2, diff.shape[axis] // 2])
             .T.flatten()
         )
-        diff = diff.take(indices, axis=align_axis)
+        diff = diff.take(indices, axis=axis)
 
         return diff
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 1afa04ad62940..ccc9103b4a1dd 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -22,7 +22,7 @@
 from pandas._config import get_option
 
 from pandas._libs import lib, properties, reshape, tslibs
-from pandas._typing import Axis, DtypeObj, FrameOrSeries, Label
+from pandas._typing import Axis, DtypeObj, FrameOrSeriesUnion, Label
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import Appender, Substitution, doc
 from pandas.util._validators import validate_bool_kwarg, validate_percentile
@@ -2646,7 +2646,7 @@ def differences(
         align_axis: Axis = 1,
         keep_shape: bool = False,
         keep_equal: bool = False,
-    ) -> FrameOrSeries:
+    ) -> FrameOrSeriesUnion:
         return super().differences(
             other=other,
             align_axis=align_axis,

From a8caa539b071a1a62082ccab54042cde8774b342 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Fri, 20 Mar 2020 19:00:32 +0800
Subject: [PATCH 34/44] added NDFrame in FrameOrSeriesUnion type

---
 pandas/_typing.py      | 2 +-
 pandas/core/generic.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 3b7392f781525..7387e100bcfad 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -52,7 +52,7 @@
 # `def func(a: FrameOrSeriesUnion) -> FrameOrSeriesUnion: ...` means that if a Series
 # is passed in, either a Series or DataFrame is returned, and if a DataFrame is passed
 # in, either a DataFrame or a Series is returned.
-FrameOrSeriesUnion = Union["DataFrame", "Series"]
+FrameOrSeriesUnion = Union["DataFrame", "NDFrame", "Series"]
 
 # FrameOrSeries is stricter and ensures that the same subclass of NDFrame always is
 # used. E.g. `def func(a: FrameOrSeries) -> FrameOrSeries: ...` means that if a
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 1f7254ffb3ee8..eeb761418a059 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8352,9 +8352,7 @@ def differences(
 
         # reorder axis to keep things organized
         indices = (
-            np.arange(diff.shape[axis])
-            .reshape([2, diff.shape[axis] // 2])
-            .T.flatten()
+            np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten()
         )
         diff = diff.take(indices, axis=axis)
 

From 4056f90a21c17a620e513fab4051ba0efb8b199d Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sat, 21 Mar 2020 09:21:12 +0800
Subject: [PATCH 35/44] fixed type hints of concat function

---
 pandas/_typing.py             | 2 +-
 pandas/core/reshape/concat.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 7387e100bcfad..3b7392f781525 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -52,7 +52,7 @@
 # `def func(a: FrameOrSeriesUnion) -> FrameOrSeriesUnion: ...` means that if a Series
 # is passed in, either a Series or DataFrame is returned, and if a DataFrame is passed
 # in, either a DataFrame or a Series is returned.
-FrameOrSeriesUnion = Union["DataFrame", "NDFrame", "Series"]
+FrameOrSeriesUnion = Union["DataFrame", "Series"]
 
 # FrameOrSeries is stricter and ensures that the same subclass of NDFrame always is
 # used. E.g. `def func(a: FrameOrSeries) -> FrameOrSeries: ...` means that if a
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 091129707228f..eb195f836ebed 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 
-from pandas._typing import FrameOrSeriesUnion, Label
+from pandas._typing import FrameOrSeries, FrameOrSeriesUnion, Label
 
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
@@ -48,7 +48,7 @@ def concat(
 
 @overload
 def concat(
-    objs: Union[Iterable[FrameOrSeriesUnion], Mapping[Label, FrameOrSeriesUnion]],
+    objs: Union[Iterable[FrameOrSeries], Mapping[Label, FrameOrSeries]],
     axis=0,
     join: str = "outer",
     ignore_index: bool = False,
@@ -63,7 +63,7 @@ def concat(
 
 
 def concat(
-    objs: Union[Iterable[FrameOrSeriesUnion], Mapping[Label, FrameOrSeriesUnion]],
+    objs: Union[Iterable[FrameOrSeries], Mapping[Label, FrameOrSeries]],
     axis=0,
     join="outer",
     ignore_index: bool = False,

From 39f857ebdbf5a11003b6e8125ddb249a9b26534c Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Thu, 9 Apr 2020 17:16:34 +0800
Subject: [PATCH 36/44] renamed `differences` method to `compare`

---
 doc/source/reference/frame.rst                |  2 +-
 doc/source/reference/series.rst               |  2 +-
 doc/source/whatsnew/v1.1.0.rst                |  4 ++--
 pandas/core/frame.py                          | 16 +++++++-------
 pandas/core/generic.py                        |  6 ++---
 pandas/core/series.py                         | 16 +++++++-------
 .../{test_differences.py => test_compare.py}  | 22 +++++++++----------
 .../{test_differences.py => test_compare.py}  | 22 +++++++++----------
 8 files changed, 45 insertions(+), 45 deletions(-)
 rename pandas/tests/frame/methods/{test_differences.py => test_compare.py} (89%)
 rename pandas/tests/series/methods/{test_differences.py => test_compare.py} (82%)

diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
index 925e48875356b..825035d427259 100644
--- a/doc/source/reference/frame.rst
+++ b/doc/source/reference/frame.rst
@@ -247,10 +247,10 @@ Combining / comparing / joining / merging
 
    DataFrame.append
    DataFrame.assign
+   DataFrame.compare
    DataFrame.join
    DataFrame.merge
    DataFrame.update
-   DataFrame.differences
 
 Time series-related
 ~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
index 8ac2052b5c693..6e81da64fb53a 100644
--- a/doc/source/reference/series.rst
+++ b/doc/source/reference/series.rst
@@ -246,9 +246,9 @@ Combining / comparing / joining / merging
    :toctree: api/
 
    Series.append
+   Series.differences
    Series.replace
    Series.update
-   Series.differences
 
 Time series-related
 -------------------
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 57e7bdb05dfe6..983ab17bea9ef 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -39,7 +39,7 @@ For example:
 Comparing two `DataFrame` or two `Series` and summarizing the differences
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-We've added :meth:`DataFrame.differences` and :meth:`Series.differences` for comparing two `DataFrame` or two `Series` (:issue:`30429`)
+We've added :meth:`DataFrame.compare` and :meth:`Series.compare` for comparing two `DataFrame` or two `Series` (:issue:`30429`)
 
 .. ipython:: python
 
@@ -59,7 +59,7 @@ We've added :meth:`DataFrame.differences` and :meth:`Series.differences` for com
 
    df
    df2
-   df.differences(df2)
+   df.compare(df2)
 
 .. _whatsnew_110.timestamp_fold_support:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 2fa2e00193c48..d028f5215870b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5338,7 +5338,7 @@ def _construct_result(self, result) -> "DataFrame":
 
 See Also
 --------
-Series.differences : Show differences.
+Series.compare : Compare with another Series and show differences.
 
 Notes
 -----
@@ -5375,7 +5375,7 @@ def _construct_result(self, result) -> "DataFrame":
 
 Align the differences on columns
 
->>> df.differences(df2)
+>>> df.compare(df2)
   col1       col3
   self other self other
 0    a     c  NaN   NaN
@@ -5383,7 +5383,7 @@ def _construct_result(self, result) -> "DataFrame":
 
 Stack the differences on rows
 
->>> df.differences(df2, align_axis=0)
+>>> df.compare(df2, align_axis=0)
         col1  col3
 0 self     a   NaN
   other    c   NaN
@@ -5392,7 +5392,7 @@ def _construct_result(self, result) -> "DataFrame":
 
 Keep all original rows and columns
 
->>> df.differences(df2, keep_shape=True)
+>>> df.compare(df2, keep_shape=True)
   col1       col2       col3
   self other self other self other
 0    a     c  NaN   NaN  NaN   NaN
@@ -5403,7 +5403,7 @@ def _construct_result(self, result) -> "DataFrame":
 
 Keep all original rows and columns and also all original values
 
->>> df.differences(df2, keep_shape=True, keep_equal=True)
+>>> df.compare(df2, keep_shape=True, keep_equal=True)
   col1       col2       col3
   self other self other self other
 0    a     c  1.0   1.0  1.0   1.0
@@ -5413,15 +5413,15 @@ def _construct_result(self, result) -> "DataFrame":
 4    a     a  5.0   5.0  5.0   5.0
 """
     )
-    @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
-    def differences(
+    @Appender(_shared_docs["compare"] % _shared_doc_kwargs)
+    def compare(
         self,
         other: "DataFrame",
         align_axis: Axis = 1,
         keep_shape: bool = False,
         keep_equal: bool = False,
     ) -> "DataFrame":
-        return super().differences(
+        return super().compare(
             other=other,
             align_axis=align_axis,
             keep_shape=keep_shape,
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 29efec97ea34a..bfe9c7a750258 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8253,7 +8253,7 @@ def ranker(data):
         return ranker(data)
 
     _shared_docs[
-        "differences"
+        "compare"
     ] = """
         Compare to another %(klass)s and show the differences.
 
@@ -8279,8 +8279,8 @@ def ranker(data):
             Otherwise, equal values are shown as NaNs.
         """
 
-    @Appender(_shared_docs["differences"] % _shared_doc_kwargs)
-    def differences(
+    @Appender(_shared_docs["compare"] % _shared_doc_kwargs)
+    def compare(
         self,
         other,
         align_axis: Axis = 1,
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 9a8e9ec544aaa..e36bc13982643 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2640,7 +2640,7 @@ def _binop(self, other, func, level=None, fill_value=None):
 
 See Also
 --------
-DataFrame.differences : Show differences.
+DataFrame.compare : Compare with another DataFrame and show differences.
 
 Notes
 -----
@@ -2653,14 +2653,14 @@ def _binop(self, other, func, level=None, fill_value=None):
 
 Align the differences on columns
 
->>> s1.differences(s2)
+>>> s1.compare(s2)
   self other
 1    b     a
 3    d     b
 
 Stack the differences on indices
 
->>> s1.differences(s2, align_axis=0)
+>>> s1.compare(s2, align_axis=0)
 1  self     b
    other    a
 3  self     d
@@ -2669,7 +2669,7 @@ def _binop(self, other, func, level=None, fill_value=None):
 
 Keep all original rows
 
->>> s1.differences(s2, keep_shape=True)
+>>> s1.compare(s2, keep_shape=True)
   self other
 0  NaN   NaN
 1    b     a
@@ -2679,7 +2679,7 @@ def _binop(self, other, func, level=None, fill_value=None):
 
 Keep all original rows and also all original values
 
->>> s1.differences(s2, keep_shape=True, keep_equal=True)
+>>> s1.compare(s2, keep_shape=True, keep_equal=True)
   self other
 0    a     a
 1    b     a
@@ -2688,15 +2688,15 @@ def _binop(self, other, func, level=None, fill_value=None):
 4    e     e
 """
     )
-    @Appender(generic._shared_docs["differences"] % _shared_doc_kwargs)
-    def differences(
+    @Appender(generic._shared_docs["compare"] % _shared_doc_kwargs)
+    def compare(
         self,
         other: "Series",
         align_axis: Axis = 1,
         keep_shape: bool = False,
         keep_equal: bool = False,
     ) -> FrameOrSeriesUnion:
-        return super().differences(
+        return super().compare(
             other=other,
             align_axis=align_axis,
             keep_shape=keep_shape,
diff --git a/pandas/tests/frame/methods/test_differences.py b/pandas/tests/frame/methods/test_compare.py
similarity index 89%
rename from pandas/tests/frame/methods/test_differences.py
rename to pandas/tests/frame/methods/test_compare.py
index c4524ea9dda75..147e4eae4c0f2 100644
--- a/pandas/tests/frame/methods/test_differences.py
+++ b/pandas/tests/frame/methods/test_compare.py
@@ -6,7 +6,7 @@
 
 
 @pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"])
-def test_differences_axis(align_axis):
+def test_compare_axis(align_axis):
     df = pd.DataFrame(
         {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
         columns=["col1", "col2", "col3"],
@@ -15,7 +15,7 @@ def test_differences_axis(align_axis):
     df2.loc[0, "col1"] = "c"
     df2.loc[2, "col3"] = 4.0
 
-    result = df.differences(df2, align_axis=align_axis)
+    result = df.compare(df2, align_axis=align_axis)
 
     if align_axis in (1, "columns"):
         indices = pd.Index([0, 2])
@@ -42,10 +42,10 @@ def test_differences_axis(align_axis):
         (True, False),
         (False, True),
         (True, True),
-        # False, False case is already covered in test_differences_axis
+        # False, False case is already covered in test_compare_axis
     ],
 )
-def test_differences_various_formats(keep_shape, keep_equal):
+def test_compare_various_formats(keep_shape, keep_equal):
     df = pd.DataFrame(
         {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
         columns=["col1", "col2", "col3"],
@@ -54,7 +54,7 @@ def test_differences_various_formats(keep_shape, keep_equal):
     df2.loc[0, "col1"] = "c"
     df2.loc[2, "col3"] = 4.0
 
-    result = df.differences(df2, keep_shape=keep_shape, keep_equal=keep_equal)
+    result = df.compare(df2, keep_shape=keep_shape, keep_equal=keep_equal)
 
     if keep_shape:
         indices = pd.Index([0, 1, 2])
@@ -90,7 +90,7 @@ def test_differences_various_formats(keep_shape, keep_equal):
     tm.assert_frame_equal(result, expected)
 
 
-def test_differences_with_equal_nulls():
+def test_compare_with_equal_nulls():
     # We want to make sure two NaNs are considered the same
     # and dropped where applicable
     df = pd.DataFrame(
@@ -100,14 +100,14 @@ def test_differences_with_equal_nulls():
     df2 = df.copy()
     df2.loc[0, "col1"] = "c"
 
-    result = df.differences(df2)
+    result = df.compare(df2)
     indices = pd.Index([0])
     columns = pd.MultiIndex.from_product([["col1"], ["self", "other"]])
     expected = pd.DataFrame([["a", "c"]], index=indices, columns=columns)
     tm.assert_frame_equal(result, expected)
 
 
-def test_differences_with_non_equal_nulls():
+def test_compare_with_non_equal_nulls():
     # We want to make sure the relevant NaNs do not get dropped
     # even if the entire row or column are NaNs
     df = pd.DataFrame(
@@ -118,7 +118,7 @@ def test_differences_with_non_equal_nulls():
     df2.loc[0, "col1"] = "c"
     df2.loc[2, "col3"] = np.nan
 
-    result = df.differences(df2)
+    result = df.compare(df2)
 
     indices = pd.Index([0, 2])
     columns = pd.MultiIndex.from_product([["col1", "col3"], ["self", "other"]])
@@ -131,7 +131,7 @@ def test_differences_with_non_equal_nulls():
 
 
 @pytest.mark.parametrize("align_axis", [0, 1])
-def test_differences_multi_index(align_axis):
+def test_compare_multi_index(align_axis):
     df = pd.DataFrame(
         {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]}
     )
@@ -142,7 +142,7 @@ def test_differences_multi_index(align_axis):
     df2.iloc[0, 0] = "c"
     df2.iloc[2, 2] = 4.0
 
-    result = df.differences(df2, align_axis=align_axis)
+    result = df.compare(df2, align_axis=align_axis)
 
     if align_axis == 0:
         indices = pd.MultiIndex.from_arrays(
diff --git a/pandas/tests/series/methods/test_differences.py b/pandas/tests/series/methods/test_compare.py
similarity index 82%
rename from pandas/tests/series/methods/test_differences.py
rename to pandas/tests/series/methods/test_compare.py
index 30b972636cbf2..2a5a1fed30226 100644
--- a/pandas/tests/series/methods/test_differences.py
+++ b/pandas/tests/series/methods/test_compare.py
@@ -6,11 +6,11 @@
 
 
 @pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"])
-def test_differences_axis(align_axis):
+def test_compare_axis(align_axis):
     s1 = pd.Series(["a", "b", "c"])
     s2 = pd.Series(["x", "b", "z"])
 
-    result = s1.differences(s2, align_axis=align_axis)
+    result = s1.compare(s2, align_axis=align_axis)
 
     if align_axis in (1, "columns"):
         indices = pd.Index([0, 2])
@@ -31,14 +31,14 @@ def test_differences_axis(align_axis):
         (True, False),
         (False, True),
         (True, True),
-        # False, False case is already covered in test_differences_axis
+        # False, False case is already covered in test_compare_axis
     ],
 )
-def test_differences_various_formats(keep_shape, keep_equal):
+def test_compare_various_formats(keep_shape, keep_equal):
     s1 = pd.Series(["a", "b", "c"])
     s2 = pd.Series(["x", "b", "z"])
 
-    result = s1.differences(s2, keep_shape=keep_shape, keep_equal=keep_equal)
+    result = s1.compare(s2, keep_shape=keep_shape, keep_equal=keep_equal)
 
     if keep_shape:
         indices = pd.Index([0, 1, 2])
@@ -62,35 +62,35 @@ def test_differences_various_formats(keep_shape, keep_equal):
     tm.assert_frame_equal(result, expected)
 
 
-def test_differences_with_equal_nulls():
+def test_compare_with_equal_nulls():
     # We want to make sure two NaNs are considered the same
     # and dropped where applicable
     s1 = pd.Series(["a", "b", np.nan])
     s2 = pd.Series(["x", "b", np.nan])
 
-    result = s1.differences(s2)
+    result = s1.compare(s2)
     expected = pd.DataFrame([["a", "x"]], columns=["self", "other"])
     tm.assert_frame_equal(result, expected)
 
 
-def test_differences_with_non_equal_nulls():
+def test_compare_with_non_equal_nulls():
     # We want to make sure the relevant NaNs do not get dropped
     s1 = pd.Series(["a", "b", "c"])
     s2 = pd.Series(["x", "b", np.nan])
 
-    result = s1.differences(s2, align_axis=0)
+    result = s1.compare(s2, align_axis=0)
 
     indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
     expected = pd.Series(["a", "x", "c", np.nan], index=indices)
     tm.assert_series_equal(result, expected)
 
 
-def test_differences_multi_index():
+def test_compare_multi_index():
     index = pd.MultiIndex.from_arrays([[0, 0, 1], [0, 1, 2]])
     s1 = pd.Series(["a", "b", "c"], index=index)
     s2 = pd.Series(["x", "b", "z"], index=index)
 
-    result = s1.differences(s2, align_axis=0)
+    result = s1.compare(s2, align_axis=0)
 
     indices = pd.MultiIndex.from_arrays(
         [[0, 0, 1, 1], [0, 0, 2, 2], ["self", "other", "self", "other"]]

From 6c62b0e94d67d9f6e0de88c7e4a14b122f588aab Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Thu, 9 Apr 2020 17:50:43 +0800
Subject: [PATCH 37/44] correction of method name in
 doc/source/reference/series.rst

---
 doc/source/reference/series.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
index 66447157b4cb1..797ade9594c7d 100644
--- a/doc/source/reference/series.rst
+++ b/doc/source/reference/series.rst
@@ -246,7 +246,7 @@ Combining / comparing / joining / merging
    :toctree: api/
 
    Series.append
-   Series.differences
+   Series.compare
    Series.replace
    Series.update
 

From 098d40cf831f3d2635ce15f55d98c459e42acccd Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sat, 11 Apr 2020 11:22:36 +0800
Subject: [PATCH 38/44] added type checking in `compare` method and reformatted
 whatsnew a bit

---
 doc/source/whatsnew/v1.1.0.rst | 7 +++++--
 pandas/core/generic.py         | 6 ++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 91941a5dec586..6491efff90780 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -51,14 +51,17 @@ We've added :meth:`DataFrame.compare` and :meth:`Series.compare` for comparing t
        },
        columns=["col1", "col2", "col3"],
    )
+   df
+
+.. ipython:: python
+
    df2 = df.copy()
    df2.loc[0, 'col1'] = 'c'
    df2.loc[2, 'col3'] = 4.0
+   df2
 
 .. ipython:: python
 
-   df
-   df2
    df.compare(df2)
 
 .. _whatsnew_110.timestamp_fold_support:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 79227d29b8c3f..9fe1d0407879c 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8283,6 +8283,12 @@ def compare(
     ):
         from pandas.core.reshape.concat import concat
 
+        if type(self) is not type(other):
+            cls_self, cls_other = type(self).__name__, type(other).__name__
+            raise TypeError(
+                f"can only compare '{cls_self}' (not '{cls_other}') with '{cls_self}'"
+            )
+
         mask = ~((self == other) | (self.isna() & other.isna()))
         keys = ["self", "other"]
 

From 4223eb4ce768145ddf43a0c54468c9fbcd408e1f Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Sat, 11 Apr 2020 11:45:31 +0800
Subject: [PATCH 39/44] removed unintended line break

---
 pandas/core/series.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 213d5681f5340..ec5383835a939 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2738,7 +2738,6 @@ def compare(
         )
 
     def combine(self, other, func, fill_value=None) -> "Series":
-
         """
         Combine the Series with a Series or scalar according to `func`.
 

From 91758c8c6a03238fc4204187bacb1072ff3a5b74 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Wed, 29 Apr 2020 17:10:48 +0800
Subject: [PATCH 40/44] resolved a linting issue

---
 pandas/core/series.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 7213ee0862580..4278e434a4c10 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -23,7 +23,15 @@
 from pandas._config import get_option
 
 from pandas._libs import lib, properties, reshape, tslibs
-from pandas._typing import ArrayLike, Axis, DtypeObj, FrameOrSeriesUnion, IndexKeyFunc, Label, ValueKeyFunc
+from pandas._typing import (
+    ArrayLike,
+    Axis,
+    DtypeObj,
+    FrameOrSeriesUnion,
+    IndexKeyFunc,
+    Label,
+    ValueKeyFunc,
+)
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import Appender, Substitution, doc
 from pandas.util._validators import validate_bool_kwarg, validate_percentile

From 774ff5dd1a305c54e5017966872002d95d7d9522 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Thu, 30 Apr 2020 11:01:50 +0800
Subject: [PATCH 41/44] updated whatsnew entry

---
 doc/source/whatsnew/v1.1.0.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 1ecffebdd6b44..b92e728d9708f 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -36,6 +36,8 @@ For example:
    ser["2014"]
    ser.loc["May 2015"]
 
+.. _whatsnew_110.dataframe_or_series_comparing:
+
 Comparing two `DataFrame` or two `Series` and summarizing the differences
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

From 0189623e5d4a7d4d946a06985d91d7965cb46cc7 Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Fri, 15 May 2020 10:50:27 +0800
Subject: [PATCH 42/44] added doc in user guide merging.rst and more tests

---
 doc/source/user_guide/merging.rst           | 67 ++++++++++++++++++++-
 doc/source/whatsnew/v1.1.0.rst              |  2 +
 pandas/core/generic.py                      |  2 +
 pandas/tests/frame/methods/test_compare.py  | 15 +++++
 pandas/tests/series/methods/test_compare.py | 15 +++++
 5 files changed, 98 insertions(+), 3 deletions(-)

diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst
index 0450c81958a51..56ff8c1fc7c9b 100644
--- a/doc/source/user_guide/merging.rst
+++ b/doc/source/user_guide/merging.rst
@@ -10,15 +10,18 @@
    p = doctools.TablePlotter()
 
 
-****************************
-Merge, join, and concatenate
-****************************
+************************************
+Merge, join, concatenate and compare
+************************************
 
 pandas provides various facilities for easily combining together Series or
 DataFrame with various kinds of set logic for the indexes
 and relational algebra functionality in the case of join / merge-type
 operations.
 
+In addition, pandas also provides utilities to compare two Series or DataFrame
+and summarize their differences.
+
 .. _merging.concat:
 
 Concatenating objects
@@ -1477,3 +1480,61 @@ exclude exact matches on time. Note that though we exclude the exact matches
                  by='ticker',
                  tolerance=pd.Timedelta('10ms'),
                  allow_exact_matches=False)
+
+.. _merging.compare:
+
+Comparing objects
+-----------------
+
+The :meth:`~Series.compare` and :meth:`~DataFrame.compare` methods allow you to
+compare two DataFrame or Series, respectively, and summarize their differences.
+
+This feature was added in :ref:`V1.1.0 <whatsnew_110.dataframe_or_series_comparing>`.
+
+For example, you might want to compare two `DataFrame` and stack their differences
+side by side.
+
+.. ipython:: python
+
+   df = pd.DataFrame(
+       {
+           "col1": ["a", "a", "b", "b", "a"],
+           "col2": [1.0, 2.0, 3.0, np.nan, 5.0],
+           "col3": [1.0, 2.0, 3.0, 4.0, 5.0]
+       },
+       columns=["col1", "col2", "col3"],
+   )
+   df
+
+.. ipython:: python
+
+   df2 = df.copy()
+   df2.loc[0, 'col1'] = 'c'
+   df2.loc[2, 'col3'] = 4.0
+   df2
+
+.. ipython:: python
+
+   df.compare(df2)
+
+By default, if two corresponding values are equal, they will be shown as ``NaN``.
+Furthermore, if all values in an entire row / column, the row / column will be
+omitted from the result. The remaining differences will be aligned on columns.
+
+If you wish, you may choose to stack the differences on rows.
+
+.. ipython:: python
+
+   df.compare(df2, align_axis=0)
+
+If you wish to keep all original rows and columns, set `keep_shape` argument
+to ``True``.
+
+.. ipython:: python
+
+   df.compare(df2, keep_shape=True)
+
+You may also keep all the original values even if they are equal.
+
+.. ipython:: python
+   df.compare(df2, keep_shape=True, keep_equal=True)
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 6fff71ed18b2d..db6b1ec5cf416 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -67,6 +67,8 @@ We've added :meth:`DataFrame.compare` and :meth:`Series.compare` for comparing t
 
    df.compare(df2)
 
+See :ref:`User Guide <merging.compare>` for more details.
+
 
 .. _whatsnew_110.groupby_key:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index ad076f401cb07..2c33b474fc9a7 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8410,6 +8410,8 @@ def ranker(data):
         "compare"
     ] = """
         Compare to another %(klass)s and show the differences.
+        
+        .. versionadded:: 1.1.0
 
         Parameters
         ----------
diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py
index 147e4eae4c0f2..3a89364b6c5be 100644
--- a/pandas/tests/frame/methods/test_compare.py
+++ b/pandas/tests/frame/methods/test_compare.py
@@ -7,6 +7,7 @@
 
 @pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"])
 def test_compare_axis(align_axis):
+    # GH#30429
     df = pd.DataFrame(
         {"col1": ["a", "b", "c"], "col2": [1.0, 2.0, np.nan], "col3": [1.0, 2.0, 3.0]},
         columns=["col1", "col2", "col3"],
@@ -163,3 +164,17 @@ def test_compare_multi_index(align_axis):
 
     expected = pd.DataFrame(data=data, index=indices, columns=columns)
     tm.assert_frame_equal(result, expected)
+
+
+def test_compare_unaligned_objects():
+    # test DataFrames with different indices
+    with pytest.raises(ValueError, match='Can only compare identically-labeled DataFrame objects'):
+        df1 = pd.DataFrame([1, 2, 3], index=['a', 'b', 'c'])
+        df2 = pd.DataFrame([1, 2, 3], index=['a', 'b', 'd'])
+        df1.compare(df2)
+
+    # test DataFrames with different shapes
+    with pytest.raises(ValueError, match='Can only compare identically-labeled DataFrame objects'):
+        df1 = pd.DataFrame(np.ones((3, 3)))
+        df2 = pd.DataFrame(np.zeros((2, 1)))
+        df1.compare(df2)
diff --git a/pandas/tests/series/methods/test_compare.py b/pandas/tests/series/methods/test_compare.py
index 2a5a1fed30226..5222da9330b38 100644
--- a/pandas/tests/series/methods/test_compare.py
+++ b/pandas/tests/series/methods/test_compare.py
@@ -7,6 +7,7 @@
 
 @pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"])
 def test_compare_axis(align_axis):
+    # GH#30429
     s1 = pd.Series(["a", "b", "c"])
     s2 = pd.Series(["x", "b", "z"])
 
@@ -97,3 +98,17 @@ def test_compare_multi_index():
     )
     expected = pd.Series(["a", "x", "c", "z"], index=indices)
     tm.assert_series_equal(result, expected)
+
+
+def test_compare_unaligned_objects():
+    # test Series with different indices
+    with pytest.raises(ValueError, match='Can only compare identically-labeled Series objects'):
+        ser1 = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
+        ser2 = pd.Series([1, 2, 3], index=['a', 'b', 'd'])
+        ser1.compare(ser2)
+
+    # test Series with different lengths
+    with pytest.raises(ValueError, match='Can only compare identically-labeled Series objects'):
+        ser1 = pd.Series([1, 2, 3])
+        ser2 = pd.Series([1, 2, 3, 4])
+        ser1.compare(ser2)

From b0b3e24c4815e0c17ebddbdaba389f0696f0a97e Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Fri, 15 May 2020 11:33:17 +0800
Subject: [PATCH 43/44] removed trailing space in docstring and blackified code

---
 pandas/core/generic.py                      |  2 +-
 pandas/tests/frame/methods/test_compare.py  | 10 ++++++----
 pandas/tests/series/methods/test_compare.py | 10 ++++++----
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 2c33b474fc9a7..14389093a0a85 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -8410,7 +8410,7 @@ def ranker(data):
         "compare"
     ] = """
         Compare to another %(klass)s and show the differences.
-        
+
         .. versionadded:: 1.1.0
 
         Parameters
diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py
index 3a89364b6c5be..468811eba0d39 100644
--- a/pandas/tests/frame/methods/test_compare.py
+++ b/pandas/tests/frame/methods/test_compare.py
@@ -168,13 +168,15 @@ def test_compare_multi_index(align_axis):
 
 def test_compare_unaligned_objects():
     # test DataFrames with different indices
-    with pytest.raises(ValueError, match='Can only compare identically-labeled DataFrame objects'):
-        df1 = pd.DataFrame([1, 2, 3], index=['a', 'b', 'c'])
-        df2 = pd.DataFrame([1, 2, 3], index=['a', 'b', 'd'])
+    msg = "Can only compare identically-labeled DataFrame objects"
+    with pytest.raises(ValueError, match=msg):
+        df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"])
+        df2 = pd.DataFrame([1, 2, 3], index=["a", "b", "d"])
         df1.compare(df2)
 
     # test DataFrames with different shapes
-    with pytest.raises(ValueError, match='Can only compare identically-labeled DataFrame objects'):
+    msg = "Can only compare identically-labeled DataFrame objects"
+    with pytest.raises(ValueError, match=msg):
         df1 = pd.DataFrame(np.ones((3, 3)))
         df2 = pd.DataFrame(np.zeros((2, 1)))
         df1.compare(df2)
diff --git a/pandas/tests/series/methods/test_compare.py b/pandas/tests/series/methods/test_compare.py
index 5222da9330b38..8570800048898 100644
--- a/pandas/tests/series/methods/test_compare.py
+++ b/pandas/tests/series/methods/test_compare.py
@@ -102,13 +102,15 @@ def test_compare_multi_index():
 
 def test_compare_unaligned_objects():
     # test Series with different indices
-    with pytest.raises(ValueError, match='Can only compare identically-labeled Series objects'):
-        ser1 = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
-        ser2 = pd.Series([1, 2, 3], index=['a', 'b', 'd'])
+    msg = "Can only compare identically-labeled Series objects"
+    with pytest.raises(ValueError, match=msg):
+        ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
+        ser2 = pd.Series([1, 2, 3], index=["a", "b", "d"])
         ser1.compare(ser2)
 
     # test Series with different lengths
-    with pytest.raises(ValueError, match='Can only compare identically-labeled Series objects'):
+    msg = "Can only compare identically-labeled Series objects"
+    with pytest.raises(ValueError, match=msg):
         ser1 = pd.Series([1, 2, 3])
         ser2 = pd.Series([1, 2, 3, 4])
         ser1.compare(ser2)

From 007eeb71ffecca19d5b54ce4ea81d4b2517bcefe Mon Sep 17 00:00:00 2001
From: fujiaxiang <fujiaxiang1995@gmail.com>
Date: Wed, 27 May 2020 11:35:16 +0800
Subject: [PATCH 44/44] added one more example in docstring of
 DataFrame.compare

---
 pandas/core/frame.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b46bdb1393b86..4911617b8eed4 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5819,6 +5819,14 @@ def _construct_result(self, result) -> "DataFrame":
 2 self   NaN   3.0
   other  NaN   4.0
 
+Keep the equal values
+
+>>> df.compare(df2, keep_equal=True)
+  col1       col3
+  self other self other
+0    a     c  1.0   1.0
+2    b     b  3.0   4.0
+
 Keep all original rows and columns
 
 >>> df.compare(df2, keep_shape=True)