Skip to content

Commit f81c4ee

Browse files
Nico CernekMarco Gorelli
authored andcommitted
add failing test to check row order preservation
correct the imports broken commit with a bunch of print statements and comments add test for left merge swap left and right keys when how == "right" correct old test: right-merge row order is now the same as the right df clean up spacing and delete temp code add whatsnew replace .from_records with default constructor add GH issue # to tests revert commit ed54bec change logic to swap left and right if how==right clean formatting rename vars and add comment for clarity combine tests into one update whatsnew Update doc/source/whatsnew/v1.0.0.rst Co-Authored-By: William Ayd <[email protected]> add before and after examples linting cleanup changes requested by jreback update docs
1 parent 56cc7f4 commit f81c4ee

File tree

3 files changed

+87
-14
lines changed

3 files changed

+87
-14
lines changed

doc/source/whatsnew/v1.0.0.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,8 +1244,13 @@ Reshaping
12441244
- Bug in :func:`melt` where supplying mixed strings and numeric values for ``id_vars`` or ``value_vars`` would incorrectly raise a ``ValueError`` (:issue:`29718`)
12451245
- Dtypes are now preserved when transposing a ``DataFrame`` where each column is the same extension dtype (:issue:`30091`)
12461246
- Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`)
1247+
<<<<<<< HEAD
12471248
- Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`)
12481249
- Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`)
1250+
- :meth:`DataFrame.merge` now preserves right frame's row order when executing a right merge (:issue:`27453`)
1251+
=======
1252+
>>>>>>> 2b1b67592... changes requested by jreback
1253+
-
12491254

12501255
Sparse
12511256
^^^^^^

pandas/core/reshape/merge.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -567,10 +567,10 @@ def __init__(
567567
indicator: bool = False,
568568
validate=None,
569569
):
570-
_left = _validate_operand(left)
571-
_right = _validate_operand(right)
572-
self.left = self.orig_left = _left
573-
self.right = self.orig_right = _right
570+
left = validate_operand(left)
571+
right = validate_operand(right)
572+
self.left = self.orig_left = left
573+
self.right = self.orig_right = right
574574
self.how = how
575575
self.axis = axis
576576

@@ -1292,6 +1292,9 @@ def _get_join_indexers(
12921292
right_keys
12931293
), "left_key and right_keys must be the same length"
12941294

1295+
# bind `sort` arg. of _factorize_keys
1296+
fkeys = partial(_factorize_keys, sort=sort)
1297+
12951298
# get left & right join labels and num. of levels at each location
12961299
mapped = (
12971300
_factorize_keys(left_keys[n], right_keys[n], sort=sort)
@@ -1306,15 +1309,20 @@ def _get_join_indexers(
13061309
# factorize keys to a dense i8 space
13071310
# `count` is the num. of unique keys
13081311
# set(lkey) | set(rkey) == range(count)
1309-
lkey, rkey, count = _factorize_keys(lkey, rkey, sort=sort)
13101312

1313+
# flip left and right keys if performing a right merge
1314+
# to preserve right merge row order (GH 27453)
1315+
if how == "right":
1316+
factorized_rkey, factorized_lkey, count = fkeys(rkey, lkey)
1317+
else:
1318+
factorized_lkey, factorized_rkey, count = fkeys(lkey, rkey)
13111319
# preserve left frame order if how == 'left' and sort == False
13121320
kwargs = copy.copy(kwargs)
13131321
if how == "left":
13141322
kwargs["sort"] = sort
13151323
join_func = _join_functions[how]
13161324

1317-
return join_func(lkey, rkey, count, **kwargs)
1325+
return join_func(factorized_lkey, factorized_rkey, count, **kwargs)
13181326

13191327

13201328
def _restore_dropped_levels_multijoin(

pandas/tests/reshape/merge/test_merge.py

Lines changed: 68 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,17 +1286,17 @@ def test_merge_on_index_with_more_values(self, how, index, expected_index):
12861286
# GH 24212
12871287
# pd.merge gets [0, 1, 2, -1, -1, -1] as left_indexer, ensure that
12881288
# -1 is interpreted as a missing value instead of the last element
1289-
df1 = pd.DataFrame({"a": [1, 2, 3], "key": [0, 2, 2]}, index=index)
1290-
df2 = pd.DataFrame({"b": [1, 2, 3, 4, 5]})
1289+
df1 = pd.DataFrame({"a": [0, 1, 2], "key": [0, 1, 2]}, index=index)
1290+
df2 = pd.DataFrame({"b": [0, 1, 2, 3, 4, 5]})
12911291
result = df1.merge(df2, left_on="key", right_index=True, how=how)
12921292
expected = pd.DataFrame(
12931293
[
1294-
[1.0, 0, 1],
1295-
[2.0, 2, 3],
1296-
[3.0, 2, 3],
1297-
[np.nan, 1, 2],
1298-
[np.nan, 3, 4],
1299-
[np.nan, 4, 5],
1294+
[0, 0, 0],
1295+
[1, 1, 1],
1296+
[2, 2, 2],
1297+
[np.nan, 3, 3],
1298+
[np.nan, 4, 4],
1299+
[np.nan, 5, 5],
13001300
],
13011301
columns=["a", "key", "b"],
13021302
)
@@ -2167,3 +2167,63 @@ def test_merge_datetime_upcast_dtype():
21672167
}
21682168
)
21692169
tm.assert_frame_equal(result, expected)
2170+
2171+
2172+
@pytest.mark.parametrize("how", ["left", "right"])
2173+
def test_merge_preserves_row_order(how):
2174+
# GH 27453
2175+
population = [
2176+
("Jenn", "Jamaica", 3),
2177+
("Beth", "Bulgaria", 7),
2178+
("Carl", "Canada", 30),
2179+
]
2180+
columns = ["name", "country", "population"]
2181+
population_df = DataFrame(population, columns=columns)
2182+
2183+
people = [("Abe", "America"), ("Beth", "Bulgaria"), ("Carl", "Canada")]
2184+
columns = ["name", "country"]
2185+
people_df = DataFrame(people, columns=columns)
2186+
2187+
expected_data = [
2188+
("Abe", "America", np.nan),
2189+
("Beth", "Bulgaria", 7),
2190+
("Carl", "Canada", 30),
2191+
]
2192+
expected_cols = ["name", "country", "population"]
2193+
expected = DataFrame(expected_data, columns=expected_cols)
2194+
2195+
result = pop.merge(ppl, on=("name", "country"), how="right")
2196+
2197+
tm.assert_frame_equal(result, expected)
2198+
2199+
2200+
def test_left_merge_preserves_row_order():
2201+
# GH 27453
2202+
population = [
2203+
("Jenn", "Jamaica", 3),
2204+
("Beth", "Bulgaria", 7),
2205+
("Carl", "Canada", 30),
2206+
]
2207+
columns = ["name", "country", "population"]
2208+
pop = DataFrame(population, columns=columns)
2209+
2210+
people = [("Abe", "America"), ("Beth", "Bulgaria"), ("Carl", "Canada")]
2211+
columns = ["name", "country"]
2212+
ppl = DataFrame(people, columns=columns)
2213+
2214+
expected_data = [
2215+
("Abe", "America", np.nan),
2216+
("Beth", "Bulgaria", 7),
2217+
("Carl", "Canada", 30),
2218+
]
2219+
expected_cols = ["name", "country", "population"]
2220+
expected = DataFrame(expected_data, columns=expected_cols)
2221+
2222+
result = ppl.merge(pop, on=("name", "country"), how="left")
2223+
if how == "right":
2224+
left_df, right_df = population_df, people_df
2225+
elif how == "left":
2226+
left_df, right_df = people_df, population_df
2227+
2228+
result = left_df.merge(right_df, on=("name", "country"), how=how)
2229+
tm.assert_frame_equal(expected, result)

0 commit comments

Comments
 (0)