From 17f7a6e7a28bc50bbd27fcfb9ecf8558d43d28b5 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 8 Jul 2021 14:10:02 -0400 Subject: [PATCH 1/2] REGR: DataFrame.agg with axis=1, EA dtype, and duplicate index --- doc/source/whatsnew/v1.3.1.rst | 3 ++- pandas/core/frame.py | 4 +--- pandas/tests/apply/test_frame_apply.py | 11 ++++++++++ pandas/tests/base/test_transpose.py | 29 ++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.3.1.rst b/doc/source/whatsnew/v1.3.1.rst index 9c17a22bf6d52..a06159c110c9b 100644 --- a/doc/source/whatsnew/v1.3.1.rst +++ b/doc/source/whatsnew/v1.3.1.rst @@ -16,6 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Pandas could not be built on PyPy (:issue:`42355`) - :class:`DataFrame` constructed with with an older version of pandas could not be unpickled (:issue:`42345`) +- Fixed regression in :meth:`DataFrame.agg` dropping values when the DataFrame had an Extension Array dtype, a duplicate index, and ``axis=1`` (:issue:`42380`) - .. --------------------------------------------------------------------------- @@ -24,7 +25,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2466154f78294..e0e1022e2047d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3338,12 +3338,10 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: ): # We have EAs with the same dtype. We can preserve that dtype in transpose. dtype = dtypes[0] - arr_type = dtype.construct_array_type() values = self.values - new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values] result = self._constructor( - dict(zip(self.index, new_values)), index=self.columns + values.T, index=self.columns, columns=self.index, dtype=dtype ) else: diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 14266a2c29a7f..995f404dc49d3 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -53,6 +53,17 @@ def test_apply_axis1_with_ea(): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "data, dtype", + [(1, None), (1, CategoricalDtype([1])), (Timestamp("2013-01-01", tz="UTC"), None)], +) +def test_agg_axis1_duplicate_index(data, dtype): + # GH 42380 + expected = DataFrame([[data], [data]], index=["a", "a"], dtype=dtype) + result = expected.agg(lambda x: x, axis=1) + tm.assert_frame_equal(result, expected) + + def test_apply_mixed_datetimelike(): # mixed datetimelike # GH 7778 diff --git a/pandas/tests/base/test_transpose.py b/pandas/tests/base/test_transpose.py index 5ba278368834c..246f33d27476c 100644 --- a/pandas/tests/base/test_transpose.py +++ b/pandas/tests/base/test_transpose.py @@ -1,6 +1,10 @@ import numpy as np import pytest +from pandas import ( + CategoricalDtype, + DataFrame, +) import pandas._testing as tm @@ -25,3 +29,28 @@ def test_numpy_transpose(index_or_series_obj): with pytest.raises(ValueError, match=msg): np.transpose(obj, axes=1) + + +@pytest.mark.parametrize( + "data, transposed_data, index, columns, dtype", + [ + ([[1], [2]], [[1, 2]], ["a", "a"], ["b"], int), + ([[1], [2]], [[1, 2]], ["a", "a"], ["b"], CategoricalDtype([1, 2])), + ([[1, 2]], [[1], [2]], ["b"], ["a", "a"], int), + ([[1, 2]], [[1], [2]], ["b"], ["a", "a"], CategoricalDtype([1, 2])), + ([[1, 2], [3, 4]], [[1, 3], [2, 4]], ["a", "a"], ["b", "b"], int), + ( + [[1, 2], [3, 4]], + [[1, 3], [2, 4]], + ["a", "a"], + ["b", "b"], + CategoricalDtype([1, 2, 3, 4]), + ), + ], +) +def test_duplicate_labels(data, transposed_data, index, columns, dtype): + # GH 42380 + df = DataFrame(data, index=index, columns=columns, dtype=dtype) + result = df.T + expected = DataFrame(transposed_data, index=columns, columns=index, dtype=dtype) + tm.assert_frame_equal(result, expected) From 980b185caf97e7f72fb0aaca7db6731c745a992a Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 8 Jul 2021 15:50:27 -0400 Subject: [PATCH 2/2] Use _from_arrays --- pandas/core/frame.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6f6e3fdbbf8c1..43adb4df7fcb4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3338,10 +3338,12 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: ): # We have EAs with the same dtype. We can preserve that dtype in transpose. dtype = dtypes[0] + arr_type = dtype.construct_array_type() values = self.values - result = self._constructor( - values.T, index=self.columns, columns=self.index, dtype=dtype + new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values] + result = type(self)._from_arrays( + new_values, index=self.columns, columns=self.index ) else: