From d24c3a8c181cf872fb484e90bc60a197423080c9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 26 Apr 2020 10:59:10 +0100 Subject: [PATCH 1/2] don't use getloc, which may return a slice --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/reshape/concat.py | 8 ++++---- pandas/tests/reshape/test_concat.py | 15 +++++++++++++++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 845f7773c263c..761408c5ccf17 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -665,6 +665,7 @@ Reshaping - Bug in :meth:`concat` where when passing a non-dict mapping as ``objs`` would raise a ``TypeError`` (:issue:`32863`) - :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregating non-existant column (:issue:`32755`) - Bug in :meth:`DataFrame.unstack` when MultiIndexed columns and MultiIndexed rows were used (:issue:`32624`, :issue:`24729` and :issue:`28306`) +- Bug in :func:`concat` was not allowing for concatenation of ``DataFrame`` and ``Series`` with duplicate keys (:issue:`33654`) Sparse diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index a868e663b06a5..8aaa419b67e42 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -619,10 +619,10 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde for hlevel, level in zip(zipped, levels): to_concat = [] for key, index in zip(hlevel, indexes): - try: - i = level.get_loc(key) - except KeyError as err: - raise ValueError(f"Key {key} not in level {level}") from err + mask = level == key + if not any(mask): + raise ValueError(f"Key {key} not in level {level}") + i = np.nonzero(level == key)[0][0] to_concat.append(np.repeat(i, len(index))) codes_list.append(np.concatenate(to_concat)) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index c4025640bb49f..d5f58c6bc4f6e 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2802,3 +2802,18 @@ def test_concat_multiindex_datetime_object_index(): ) result = concat([s, s2], axis=1) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("keys", [["e", "f", "f"], ["f", "e", "f"]]) +def test_duplicate_keys(keys): + # GH 33654 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + s1 = Series([7, 8, 9], name="c") + s2 = Series([10, 11, 12], name="d") + result = concat([df, s1, s2], axis=1, keys=keys) + expected_values = [[1, 4, 7, 10], [2, 5, 8, 11], [3, 6, 9, 12]] + expected_columns = pd.MultiIndex.from_tuples( + [(keys[0], "a"), (keys[0], "b"), (keys[1], "c"), (keys[2], "d")] + ) + expected = DataFrame(expected_values, columns=expected_columns) + tm.assert_frame_equal(result, expected) From 575ebbd57823811e79548d56a6b5f00ce704eaa8 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Mon, 27 Apr 2020 06:27:29 +0100 Subject: [PATCH 2/2] any(mask) -> mask.any() --- pandas/core/reshape/concat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 8aaa419b67e42..2f66cbf44788d 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -620,7 +620,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde to_concat = [] for key, index in zip(hlevel, indexes): mask = level == key - if not any(mask): + if not mask.any(): raise ValueError(f"Key {key} not in level {level}") i = np.nonzero(level == key)[0][0]