diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 7ad7e8f5a27b0..23a772ae6c405 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -723,6 +723,7 @@ Reshaping - Bug in :meth:`concat` where when passing a non-dict mapping as ``objs`` would raise a ``TypeError`` (:issue:`32863`) - :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregating non-existant column (:issue:`32755`) - Bug in :meth:`DataFrame.unstack` when MultiIndexed columns and MultiIndexed rows were used (:issue:`32624`, :issue:`24729` and :issue:`28306`) +- Bug in :func:`concat` was not allowing for concatenation of ``DataFrame`` and ``Series`` with duplicate keys (:issue:`33654`) - Bug in :func:`cut` raised an error when non-unique labels (:issue:`33141`) diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index a868e663b06a5..2f66cbf44788d 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -619,10 +619,10 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde for hlevel, level in zip(zipped, levels): to_concat = [] for key, index in zip(hlevel, indexes): - try: - i = level.get_loc(key) - except KeyError as err: - raise ValueError(f"Key {key} not in level {level}") from err + mask = level == key + if not mask.any(): + raise ValueError(f"Key {key} not in level {level}") + i = np.nonzero(level == key)[0][0] to_concat.append(np.repeat(i, len(index))) codes_list.append(np.concatenate(to_concat)) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 7c01664df0607..6625ab86cfed4 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -2802,3 +2802,18 @@ def test_concat_multiindex_datetime_object_index(): ) result = concat([s, s2], axis=1) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("keys", [["e", "f", "f"], ["f", "e", "f"]]) +def test_duplicate_keys(keys): + # GH 33654 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + s1 = Series([7, 8, 9], name="c") + s2 = Series([10, 11, 12], name="d") + result = concat([df, s1, s2], axis=1, keys=keys) + expected_values = [[1, 4, 7, 10], [2, 5, 8, 11], [3, 6, 9, 12]] + expected_columns = pd.MultiIndex.from_tuples( + [(keys[0], "a"), (keys[0], "b"), (keys[1], "c"), (keys[2], "d")] + ) + expected = DataFrame(expected_values, columns=expected_columns) + tm.assert_frame_equal(result, expected)