From f57975da709e5632ba5504a4daaa8062df0d0221 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 16:11:47 +0000 Subject: [PATCH 01/15] move tests to create section breaks --- .../tests/indexing/multiindex/test_getitem.py | 102 ++++++++++-------- 1 file changed, 55 insertions(+), 47 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 88e96329105dd..919761b92c725 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -32,6 +32,10 @@ def dataframe_with_duplicate_index(): return DataFrame(data, index=index, columns=columns) +# ---------------------------------------------------------------------------- +# test indexing of Series with multi-level Index +# ---------------------------------------------------------------------------- + @pytest.mark.parametrize('access_method', [lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)]) @@ -115,53 +119,6 @@ def test_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1, tm.assert_series_equal(result, expected) -@pytest.mark.parametrize('columns_indexer', [ - ([], slice(None)), - (['foo'], []) -]) -def test_getitem_duplicates_multiindex_empty_indexer(columns_indexer): - # GH 8737 - # empty indexer - multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'], - ['alpha', 'beta'])) - df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index) - df = df.sort_index(level=0, axis=1) - - expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0]) - result = df.loc[:, columns_indexer] - tm.assert_frame_equal(result, expected) - - -def test_getitem_duplicates_multiindex_non_scalar_type_object(): - # regression from < 0.14.0 - # GH 7914 - df = DataFrame([[np.mean, np.median], ['mean', 'median']], - columns=MultiIndex.from_tuples([('functs', 'mean'), - ('functs', 'median')]), - index=['function', 'name']) - result = df.loc['function', ('functs', 'mean')] - expected = np.mean - assert result == expected - - -def test_getitem_simple(multiindex_dataframe_random_data): - df = multiindex_dataframe_random_data.T - expected = df.values[:, 0] - result = df['foo', 'one'].values - tm.assert_almost_equal(result, expected) - - -@pytest.mark.parametrize('indexer,msg', [ - (lambda df: df[('foo', 'four')], r"\('foo', 'four'\)"), - (lambda df: df['foobar'], "'foobar'") -]) -def test_getitem_simple_key_error( - multiindex_dataframe_random_data, indexer, msg): - df = multiindex_dataframe_random_data.T - with pytest.raises(KeyError, match=msg): - indexer(df) - - @pytest.mark.parametrize('indexer', [ lambda s: s[2000, 3], lambda s: s.loc[2000, 3] @@ -227,6 +184,57 @@ def test_series_getitem_corner_generator( tm.assert_series_equal(result, expected) +# ---------------------------------------------------------------------------- +# test indexing of DataFrame with multi-level Index +# ---------------------------------------------------------------------------- + +@pytest.mark.parametrize('columns_indexer', [ + ([], slice(None)), + (['foo'], []) +]) +def test_getitem_duplicates_multiindex_empty_indexer(columns_indexer): + # GH 8737 + # empty indexer + multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'], + ['alpha', 'beta'])) + df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index) + df = df.sort_index(level=0, axis=1) + + expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0]) + result = df.loc[:, columns_indexer] + tm.assert_frame_equal(result, expected) + + +def test_getitem_duplicates_multiindex_non_scalar_type_object(): + # regression from < 0.14.0 + # GH 7914 + df = DataFrame([[np.mean, np.median], ['mean', 'median']], + columns=MultiIndex.from_tuples([('functs', 'mean'), + ('functs', 'median')]), + index=['function', 'name']) + result = df.loc['function', ('functs', 'mean')] + expected = np.mean + assert result == expected + + +def test_getitem_simple(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data.T + expected = df.values[:, 0] + result = df['foo', 'one'].values + tm.assert_almost_equal(result, expected) + + +@pytest.mark.parametrize('indexer,msg', [ + (lambda df: df[('foo', 'four')], r"\('foo', 'four'\)"), + (lambda df: df['foobar'], "'foobar'") +]) +def test_getitem_simple_key_error( + multiindex_dataframe_random_data, indexer, msg): + df = multiindex_dataframe_random_data.T + with pytest.raises(KeyError, match=msg): + indexer(df) + + def test_frame_getitem_multicolumn_empty_level(): df = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) df.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], From 986d89fa25fb6208ce5d647468a8c31bdffdfb07 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 16:17:53 +0000 Subject: [PATCH 02/15] move test_getitem_duplicates_multiindex_missing_indexers to test_loc --- .../tests/indexing/multiindex/test_getitem.py | 37 ------------------- pandas/tests/indexing/multiindex/test_loc.py | 37 +++++++++++++++++++ 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 919761b92c725..840c784d42880 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -3,7 +3,6 @@ from pandas.compat import range, u, zip -import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series import pandas.core.common as com from pandas.core.indexing import IndexingError @@ -83,42 +82,6 @@ def test_getitem_duplicates_multiindex(level0_value): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize('indexer, is_level1, expected_error', [ - ([], False, None), # empty ok - (['A'], False, None), - (['A', 'D'], False, None), - (['D'], False, r"\['D'\] not in index"), # not any values found - (pd.IndexSlice[:, ['foo']], True, None), - (pd.IndexSlice[:, ['foo', 'bah']], True, None) -]) -def test_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1, - expected_error): - # GH 7866 - # multi-index slicing with missing indexers - idx = MultiIndex.from_product([['A', 'B', 'C'], - ['foo', 'bar', 'baz']], - names=['one', 'two']) - s = Series(np.arange(9, dtype='int64'), index=idx).sort_index() - - if indexer == []: - expected = s.iloc[[]] - elif is_level1: - expected = Series([0, 3, 6], index=MultiIndex.from_product( - [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index() - else: - exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']], - names=['one', 'two']) - expected = Series(np.arange(3, dtype='int64'), - index=exp_idx).sort_index() - - if expected_error is not None: - with pytest.raises(KeyError, match=expected_error): - s.loc[indexer] - else: - result = s.loc[indexer] - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('indexer', [ lambda s: s[2000, 3], lambda s: s.loc[2000, 3] diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 75995a24a2ad1..78c94532e7965 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -4,6 +4,7 @@ import numpy as np import pytest +import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series from pandas.util import testing as tm @@ -247,3 +248,39 @@ def convert_nested_indexer(indexer_type, keys): index=MultiIndex.from_product(keys)) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize('indexer, is_level1, expected_error', [ + ([], False, None), # empty ok + (['A'], False, None), + (['A', 'D'], False, None), + (['D'], False, r"\['D'\] not in index"), # not any values found + (pd.IndexSlice[:, ['foo']], True, None), + (pd.IndexSlice[:, ['foo', 'bah']], True, None) +]) +def test_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1, + expected_error): + # GH 7866 + # multi-index slicing with missing indexers + idx = MultiIndex.from_product([['A', 'B', 'C'], + ['foo', 'bar', 'baz']], + names=['one', 'two']) + s = Series(np.arange(9, dtype='int64'), index=idx).sort_index() + + if indexer == []: + expected = s.iloc[[]] + elif is_level1: + expected = Series([0, 3, 6], index=MultiIndex.from_product( + [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index() + else: + exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']], + names=['one', 'two']) + expected = Series(np.arange(3, dtype='int64'), + index=exp_idx).sort_index() + + if expected_error is not None: + with pytest.raises(KeyError, match=expected_error): + s.loc[indexer] + else: + result = s.loc[indexer] + tm.assert_series_equal(result, expected) From ab60d32bba0cb322e04bdc631ebcd072f03e8bab Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 16:20:32 +0000 Subject: [PATCH 03/15] move test_series_getitem_fancy to test_loc.py --- pandas/tests/indexing/multiindex/test_getitem.py | 14 -------------- pandas/tests/indexing/multiindex/test_loc.py | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 840c784d42880..7b293f94687d7 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -109,20 +109,6 @@ def test_series_getitem_returns_scalar( assert result == expected -@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") -@pytest.mark.parametrize('indexer', [ - lambda s: s.loc[[(2000, 3, 10), (2000, 3, 13)]], - lambda s: s.ix[[(2000, 3, 10), (2000, 3, 13)]] -]) -def test_series_getitem_fancy( - multiindex_year_month_day_dataframe_random_data, indexer): - s = multiindex_year_month_day_dataframe_random_data['A'] - expected = s.reindex(s.index[49:51]) - - result = indexer(s) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('indexer,error,msg', [ (lambda s: s.__getitem__((2000, 3, 4)), KeyError, '356'), (lambda s: s[(2000, 3, 4)], KeyError, '356'), diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 78c94532e7965..19055c77164db 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -284,3 +284,17 @@ def test_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1, else: result = s.loc[indexer] tm.assert_series_equal(result, expected) + + +@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") +@pytest.mark.parametrize('indexer', [ + lambda s: s.loc[[(2000, 3, 10), (2000, 3, 13)]], + lambda s: s.ix[[(2000, 3, 10), (2000, 3, 13)]] +]) +def test_series_getitem_fancy( + multiindex_year_month_day_dataframe_random_data, indexer): + s = multiindex_year_month_day_dataframe_random_data['A'] + expected = s.reindex(s.index[49:51]) + + result = indexer(s) + tm.assert_series_equal(result, expected) From 046243cdd44befcb2d2fe59665bdc5ec0c9b6f77 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 16:23:40 +0000 Subject: [PATCH 04/15] move test_getitem_duplicates_multiindex_empty_indexer to test_loc.py --- .../tests/indexing/multiindex/test_getitem.py | 19 +------------------ pandas/tests/indexing/multiindex/test_loc.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 7b293f94687d7..23dfab5ed7b0f 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas.compat import range, u, zip +from pandas.compat import u, zip from pandas import DataFrame, Index, MultiIndex, Series import pandas.core.common as com @@ -137,23 +137,6 @@ def test_series_getitem_corner_generator( # test indexing of DataFrame with multi-level Index # ---------------------------------------------------------------------------- -@pytest.mark.parametrize('columns_indexer', [ - ([], slice(None)), - (['foo'], []) -]) -def test_getitem_duplicates_multiindex_empty_indexer(columns_indexer): - # GH 8737 - # empty indexer - multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'], - ['alpha', 'beta'])) - df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index) - df = df.sort_index(level=0, axis=1) - - expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0]) - result = df.loc[:, columns_indexer] - tm.assert_frame_equal(result, expected) - - def test_getitem_duplicates_multiindex_non_scalar_type_object(): # regression from < 0.14.0 # GH 7914 diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 19055c77164db..7c3027c467592 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -298,3 +298,20 @@ def test_series_getitem_fancy( result = indexer(s) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize('columns_indexer', [ + ([], slice(None)), + (['foo'], []) +]) +def test_getitem_duplicates_multiindex_empty_indexer(columns_indexer): + # GH 8737 + # empty indexer + multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'], + ['alpha', 'beta'])) + df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index) + df = df.sort_index(level=0, axis=1) + + expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0]) + result = df.loc[:, columns_indexer] + tm.assert_frame_equal(result, expected) From 9c3f4d314f799d507d2e2aa6d58dc70757b80085 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 16:26:09 +0000 Subject: [PATCH 05/15] move test_getitem_duplicates_multiindex_non_scalar_type_object --- pandas/tests/indexing/multiindex/test_getitem.py | 12 ------------ pandas/tests/indexing/multiindex/test_loc.py | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 23dfab5ed7b0f..a1e1e0d3d0b15 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -137,18 +137,6 @@ def test_series_getitem_corner_generator( # test indexing of DataFrame with multi-level Index # ---------------------------------------------------------------------------- -def test_getitem_duplicates_multiindex_non_scalar_type_object(): - # regression from < 0.14.0 - # GH 7914 - df = DataFrame([[np.mean, np.median], ['mean', 'median']], - columns=MultiIndex.from_tuples([('functs', 'mean'), - ('functs', 'median')]), - index=['function', 'name']) - result = df.loc['function', ('functs', 'mean')] - expected = np.mean - assert result == expected - - def test_getitem_simple(multiindex_dataframe_random_data): df = multiindex_dataframe_random_data.T expected = df.values[:, 0] diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 7c3027c467592..c446ac78ecc69 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -315,3 +315,15 @@ def test_getitem_duplicates_multiindex_empty_indexer(columns_indexer): expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0]) result = df.loc[:, columns_indexer] tm.assert_frame_equal(result, expected) + + +def test_getitem_duplicates_multiindex_non_scalar_type_object(): + # regression from < 0.14.0 + # GH 7914 + df = DataFrame([[np.mean, np.median], ['mean', 'median']], + columns=MultiIndex.from_tuples([('functs', 'mean'), + ('functs', 'median')]), + index=['function', 'name']) + result = df.loc['function', ('functs', 'mean')] + expected = np.mean + assert result == expected From 1e59f62175cad6548b64a883e450543462c00968 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 16:40:34 +0000 Subject: [PATCH 06/15] more explicit KeyError checks --- .../tests/indexing/multiindex/test_getitem.py | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index a1e1e0d3d0b15..9e39a216ee3dd 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -68,12 +68,10 @@ def test_getitem_duplicates_multiindex(level0_value): # confirm indexing on missing value raises KeyError if level0_value != 'A': - msg = "'A'" - with pytest.raises(KeyError, match=msg): + with pytest.raises(KeyError, match=r"^'A'$"): df.val['A'] - msg = "'X'" - with pytest.raises(KeyError, match=msg): + with pytest.raises(KeyError, match=r"^'X'$"): df.val['X'] result = df.val[level0_value] @@ -109,9 +107,9 @@ def test_series_getitem_returns_scalar( assert result == expected -@pytest.mark.parametrize('indexer,error,msg', [ - (lambda s: s.__getitem__((2000, 3, 4)), KeyError, '356'), - (lambda s: s[(2000, 3, 4)], KeyError, '356'), +@pytest.mark.parametrize('indexer,expected_error,expected_error_msg', [ + (lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^356$"), + (lambda s: s[(2000, 3, 4)], KeyError, r"^356$"), (lambda s: s.loc[(2000, 3, 4)], IndexingError, 'Too many indexers'), (lambda s: s.__getitem__(len(s)), IndexError, 'index out of bounds'), (lambda s: s[len(s)], IndexError, 'index out of bounds'), @@ -119,9 +117,10 @@ def test_series_getitem_returns_scalar( 'single positional indexer is out-of-bounds') ]) def test_series_getitem_indexing_errors( - multiindex_year_month_day_dataframe_random_data, indexer, error, msg): + multiindex_year_month_day_dataframe_random_data, indexer, + expected_error, expected_error_msg): s = multiindex_year_month_day_dataframe_random_data['A'] - with pytest.raises(error, match=msg): + with pytest.raises(expected_error, match=expected_error_msg): indexer(s) @@ -144,14 +143,14 @@ def test_getitem_simple(multiindex_dataframe_random_data): tm.assert_almost_equal(result, expected) -@pytest.mark.parametrize('indexer,msg', [ - (lambda df: df[('foo', 'four')], r"\('foo', 'four'\)"), - (lambda df: df['foobar'], "'foobar'") +@pytest.mark.parametrize('indexer,expected_error_msg', [ + (lambda df: df[('foo', 'four')], r"^\('foo', 'four'\)$"), + (lambda df: df['foobar'], r"^'foobar'$") ]) def test_getitem_simple_key_error( - multiindex_dataframe_random_data, indexer, msg): + multiindex_dataframe_random_data, indexer, expected_error_msg): df = multiindex_dataframe_random_data.T - with pytest.raises(KeyError, match=msg): + with pytest.raises(KeyError, match=expected_error_msg): indexer(df) @@ -202,9 +201,8 @@ def test_getitem_int(frame_random_data_integer_multi_index): def test_getitem_int_raises_exception(frame_random_data_integer_multi_index): df = frame_random_data_integer_multi_index - msg = "3" - with pytest.raises(KeyError, match=msg): - df.loc.__getitem__(3) + with pytest.raises(KeyError, match=r"^3$"): + df.loc[3] def test_getitem_iloc(multiindex_dataframe_random_data): @@ -246,7 +244,7 @@ def test_getitem_lowerdim_corner(multiindex_dataframe_random_data): df = multiindex_dataframe_random_data # test setup - check key not in dataframe - with pytest.raises(KeyError, match="11"): + with pytest.raises(KeyError, match=r"^11$"): df.loc[('bar', 'three'), 'B'] # in theory should be inserting in a sorted space???? From 6e81758ba1e800e3c241d6d48bcb1f3467f16b7a Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 16:42:32 +0000 Subject: [PATCH 07/15] move test_getitem_tuple_plus_slice --- pandas/tests/indexing/multiindex/test_getitem.py | 12 ------------ pandas/tests/indexing/multiindex/test_loc.py | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 9e39a216ee3dd..79557a395b376 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -165,18 +165,6 @@ def test_frame_getitem_multicolumn_empty_level(): tm.assert_frame_equal(result, expected) -def test_getitem_tuple_plus_slice(): - # GH 671 - df = DataFrame({'a': np.arange(10), - 'b': np.arange(10), - 'c': np.random.randn(10), - 'd': np.random.randn(10)} - ).set_index(['a', 'b']) - expected = df.loc[0, 0] - result = df.loc[(0, 0), :] - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize('indexer,expected_slice', [ (lambda df: df['foo'], slice(3)), (lambda df: df['bar'], slice(3, 5)), diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index c446ac78ecc69..b6b2257775195 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -327,3 +327,15 @@ def test_getitem_duplicates_multiindex_non_scalar_type_object(): result = df.loc['function', ('functs', 'mean')] expected = np.mean assert result == expected + + +def test_getitem_tuple_plus_slice(): + # GH 671 + df = DataFrame({'a': np.arange(10), + 'b': np.arange(10), + 'c': np.random.randn(10), + 'd': np.random.randn(10)} + ).set_index(['a', 'b']) + expected = df.loc[0, 0] + result = df.loc[(0, 0), :] + tm.assert_series_equal(result, expected) From 7ebd299c913e4d2de88558a1deb9a6d41907629d Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 16:46:43 +0000 Subject: [PATCH 08/15] move test_getitem_int + raises_exception --- .../tests/indexing/multiindex/test_getitem.py | 22 ------------------- pandas/tests/indexing/multiindex/test_loc.py | 22 +++++++++++++++++++ 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 79557a395b376..a398e93f78796 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -9,14 +9,6 @@ from pandas.util import testing as tm -@pytest.fixture -def frame_random_data_integer_multi_index(): - levels = [[0, 1], [0, 1, 2]] - codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] - index = MultiIndex(levels=levels, codes=codes) - return DataFrame(np.random.randn(6, 2), index=index) - - @pytest.fixture def dataframe_with_duplicate_index(): """Fixture for DataFrame used in tests for gh-4145 and gh-4146""" @@ -179,20 +171,6 @@ def test_getitem_toplevel( tm.assert_frame_equal(result, expected) -def test_getitem_int(frame_random_data_integer_multi_index): - df = frame_random_data_integer_multi_index - result = df.loc[1] - expected = df[-3:] - expected.index = expected.index.droplevel(0) - tm.assert_frame_equal(result, expected) - - -def test_getitem_int_raises_exception(frame_random_data_integer_multi_index): - df = frame_random_data_integer_multi_index - with pytest.raises(KeyError, match=r"^3$"): - df.loc[3] - - def test_getitem_iloc(multiindex_dataframe_random_data): df = multiindex_dataframe_random_data result = df.iloc[2] diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index b6b2257775195..cf21ff1406b76 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -16,6 +16,14 @@ def single_level_multiindex(): codes=[[0, 1, 2, 3]], names=['first']) +@pytest.fixture +def frame_random_data_integer_multi_index(): + levels = [[0, 1], [0, 1, 2]] + codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + index = MultiIndex(levels=levels, codes=codes) + return DataFrame(np.random.randn(6, 2), index=index) + + @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") class TestMultiIndexLoc(object): @@ -339,3 +347,17 @@ def test_getitem_tuple_plus_slice(): expected = df.loc[0, 0] result = df.loc[(0, 0), :] tm.assert_series_equal(result, expected) + + +def test_getitem_int(frame_random_data_integer_multi_index): + df = frame_random_data_integer_multi_index + result = df.loc[1] + expected = df[-3:] + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) + + +def test_getitem_int_raises_exception(frame_random_data_integer_multi_index): + df = frame_random_data_integer_multi_index + with pytest.raises(KeyError, match=r"^3$"): + df.loc[3] From 1bc97fe52cbb6e59b2efce5b392e876205ade689 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 16:49:15 +0000 Subject: [PATCH 09/15] move test_getitem_iloc --- pandas/tests/indexing/multiindex/test_getitem.py | 7 ------- pandas/tests/indexing/multiindex/test_iloc.py | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index a398e93f78796..ea7c4e717183a 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -171,13 +171,6 @@ def test_getitem_toplevel( tm.assert_frame_equal(result, expected) -def test_getitem_iloc(multiindex_dataframe_random_data): - df = multiindex_dataframe_random_data - result = df.iloc[2] - expected = df.xs(df.index[2]) - tm.assert_series_equal(result, expected) - - def test_frame_setitem_view_direct(multiindex_dataframe_random_data): # this works because we are modifying the underlying array # really a no-no diff --git a/pandas/tests/indexing/multiindex/test_iloc.py b/pandas/tests/indexing/multiindex/test_iloc.py index a1681c1239aa3..bdd505804c82b 100644 --- a/pandas/tests/indexing/multiindex/test_iloc.py +++ b/pandas/tests/indexing/multiindex/test_iloc.py @@ -142,3 +142,10 @@ def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k): df['k'] = expected_k expected = df.k tm.assert_series_equal(series, expected) + + +def test_getitem_iloc(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.iloc[2] + expected = df.xs(df.index[2]) + tm.assert_series_equal(result, expected) From 13b6f877a15a4764b1f9b196f92430b9e7f76fdb Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 17:03:50 +0000 Subject: [PATCH 10/15] move test_frame_setitem_* --- .../tests/indexing/multiindex/test_getitem.py | 29 ------------------- .../tests/indexing/multiindex/test_setitem.py | 29 +++++++++++++++++++ 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index ea7c4e717183a..96a7b19db7c51 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -4,7 +4,6 @@ from pandas.compat import u, zip from pandas import DataFrame, Index, MultiIndex, Series -import pandas.core.common as com from pandas.core.indexing import IndexingError from pandas.util import testing as tm @@ -171,34 +170,6 @@ def test_getitem_toplevel( tm.assert_frame_equal(result, expected) -def test_frame_setitem_view_direct(multiindex_dataframe_random_data): - # this works because we are modifying the underlying array - # really a no-no - df = multiindex_dataframe_random_data.T - df['foo'].values[:] = 0 - assert (df['foo'].values == 0).all() - - -def test_frame_setitem_copy_raises(multiindex_dataframe_random_data): - # will raise/warn as its chained assignment - df = multiindex_dataframe_random_data.T - msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(com.SettingWithCopyError, match=msg): - df['foo']['one'] = 2 - - -def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data.T - expected = frame - df = frame.copy() - msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(com.SettingWithCopyError, match=msg): - df['foo']['one'] = 2 - - result = df - tm.assert_frame_equal(result, expected) - - def test_getitem_lowerdim_corner(multiindex_dataframe_random_data): df = multiindex_dataframe_random_data diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index d49ca34edd0fd..f8f037dbda46b 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -7,6 +7,7 @@ import pandas as pd from pandas import ( DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna) +import pandas.core.common as com from pandas.util import testing as tm @@ -408,3 +409,31 @@ def test_astype_assignment_with_dups(self): df['A'] = df['A'].astype(np.float64) tm.assert_index_equal(df.index, index) + + +def test_frame_setitem_view_direct(multiindex_dataframe_random_data): + # this works because we are modifying the underlying array + # really a no-no + df = multiindex_dataframe_random_data.T + df['foo'].values[:] = 0 + assert (df['foo'].values == 0).all() + + +def test_frame_setitem_copy_raises(multiindex_dataframe_random_data): + # will raise/warn as its chained assignment + df = multiindex_dataframe_random_data.T + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + df['foo']['one'] = 2 + + +def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data.T + expected = frame + df = frame.copy() + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + df['foo']['one'] = 2 + + result = df + tm.assert_frame_equal(result, expected) From 74f10b6e796bed4fb674e75bf5325f8b43761f62 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 17:06:24 +0000 Subject: [PATCH 11/15] move test_getitem_lowerdim_corner --- pandas/tests/indexing/multiindex/test_getitem.py | 14 -------------- pandas/tests/indexing/multiindex/test_loc.py | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 96a7b19db7c51..5c2dba732750f 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -170,20 +170,6 @@ def test_getitem_toplevel( tm.assert_frame_equal(result, expected) -def test_getitem_lowerdim_corner(multiindex_dataframe_random_data): - df = multiindex_dataframe_random_data - - # test setup - check key not in dataframe - with pytest.raises(KeyError, match=r"^11$"): - df.loc[('bar', 'three'), 'B'] - - # in theory should be inserting in a sorted space???? - df.loc[('bar', 'three'), 'B'] = 0 - expected = 0 - result = df.sort_index().loc[('bar', 'three'), 'B'] - assert result == expected - - @pytest.mark.parametrize('unicode_strings', [True, False]) def test_mixed_depth_get(unicode_strings): # If unicode_strings is True, the column labels in dataframe diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index cf21ff1406b76..d6e360a630e4f 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -361,3 +361,17 @@ def test_getitem_int_raises_exception(frame_random_data_integer_multi_index): df = frame_random_data_integer_multi_index with pytest.raises(KeyError, match=r"^3$"): df.loc[3] + + +def test_getitem_lowerdim_corner(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + + # test setup - check key not in dataframe + with pytest.raises(KeyError, match=r"^11$"): + df.loc[('bar', 'three'), 'B'] + + # in theory should be inserting in a sorted space???? + df.loc[('bar', 'three'), 'B'] = 0 + expected = 0 + result = df.sort_index().loc[('bar', 'three'), 'B'] + assert result == expected From 2503360864102643039ff77abea5cb8462f84550 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 17:12:54 +0000 Subject: [PATCH 12/15] add section break for mi with duplicates --- .../tests/indexing/multiindex/test_getitem.py | 36 ++++++++++--------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 5c2dba732750f..6c9a31a36ce34 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -8,20 +8,6 @@ from pandas.util import testing as tm -@pytest.fixture -def dataframe_with_duplicate_index(): - """Fixture for DataFrame used in tests for gh-4145 and gh-4146""" - data = [['a', 'd', 'e', 'c', 'f', 'b'], - [1, 4, 5, 3, 6, 2], - [1, 4, 5, 3, 6, 2]] - index = ['h1', 'h3', 'h5'] - columns = MultiIndex( - levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']], - codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]], - names=['main', 'sub']) - return DataFrame(data, index=index, columns=columns) - - # ---------------------------------------------------------------------------- # test indexing of Series with multi-level Index # ---------------------------------------------------------------------------- @@ -197,9 +183,27 @@ def test_mixed_depth_get(unicode_strings): tm.assert_series_equal(result, expected) +# ---------------------------------------------------------------------------- +# test indexing of DataFrame with multi-level Index with duplicates +# ---------------------------------------------------------------------------- + +@pytest.fixture +def dataframe_with_duplicate_index(): + """Fixture for DataFrame used in tests for gh-4145 and gh-4146""" + data = [['a', 'd', 'e', 'c', 'f', 'b'], + [1, 4, 5, 3, 6, 2], + [1, 4, 5, 3, 6, 2]] + index = ['h1', 'h3', 'h5'] + columns = MultiIndex( + levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']], + codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]], + names=['main', 'sub']) + return DataFrame(data, index=index, columns=columns) + + @pytest.mark.parametrize('indexer', [ - lambda df: df.loc[:, ('A', 'A1')], - lambda df: df[('A', 'A1')] + lambda df: df[('A', 'A1')], + lambda df: df.loc[:, ('A', 'A1')] ]) def test_mi_access(dataframe_with_duplicate_index, indexer): # GH 4145 From c6251e83c5fdbbf72fe3fbbc1710274b15a9e1a6 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 17:18:47 +0000 Subject: [PATCH 13/15] add series/frame to test names --- pandas/tests/indexing/multiindex/test_getitem.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 6c9a31a36ce34..d938335621a47 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -31,7 +31,7 @@ def test_series_getitem_multiindex(access_method, level1_value, expected): @pytest.mark.parametrize('level0_value', ['D', 'A']) -def test_getitem_duplicates_multiindex(level0_value): +def test_series_getitem_duplicates_multiindex(level0_value): # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise # the appropriate error, only in PY3 of course! @@ -124,7 +124,7 @@ def test_getitem_simple(multiindex_dataframe_random_data): (lambda df: df[('foo', 'four')], r"^\('foo', 'four'\)$"), (lambda df: df['foobar'], r"^'foobar'$") ]) -def test_getitem_simple_key_error( +def test_frame_getitem_simple_key_error( multiindex_dataframe_random_data, indexer, expected_error_msg): df = multiindex_dataframe_random_data.T with pytest.raises(KeyError, match=expected_error_msg): @@ -147,7 +147,7 @@ def test_frame_getitem_multicolumn_empty_level(): (lambda df: df['bar'], slice(3, 5)), (lambda df: df.loc[:, 'bar'], slice(3, 5)) ]) -def test_getitem_toplevel( +def test_frame_getitem_toplevel( multiindex_dataframe_random_data, indexer, expected_slice): df = multiindex_dataframe_random_data.T expected = df.reindex(columns=df.columns[expected_slice]) @@ -157,7 +157,7 @@ def test_getitem_toplevel( @pytest.mark.parametrize('unicode_strings', [True, False]) -def test_mixed_depth_get(unicode_strings): +def test_frame_mixed_depth_get(unicode_strings): # If unicode_strings is True, the column labels in dataframe # construction will use unicode strings in Python 2 (pull request # #17099). @@ -205,7 +205,7 @@ def dataframe_with_duplicate_index(): lambda df: df[('A', 'A1')], lambda df: df.loc[:, ('A', 'A1')] ]) -def test_mi_access(dataframe_with_duplicate_index, indexer): +def test_frame_mi_access(dataframe_with_duplicate_index, indexer): # GH 4145 df = dataframe_with_duplicate_index index = Index(['h1', 'h3', 'h5']) @@ -216,7 +216,7 @@ def test_mi_access(dataframe_with_duplicate_index, indexer): tm.assert_frame_equal(result, expected) -def test_mi_access_returns_series(dataframe_with_duplicate_index): +def test_frame_mi_access_returns_series(dataframe_with_duplicate_index): # GH 4146, not returning a block manager when selecting a unique index # from a duplicate index # as of 4879, this returns a Series (which is similar to what happens @@ -227,7 +227,7 @@ def test_mi_access_returns_series(dataframe_with_duplicate_index): tm.assert_series_equal(result, expected) -def test_mi_access_returns_frame(dataframe_with_duplicate_index): +def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index): # selecting a non_unique from the 2nd level df = dataframe_with_duplicate_index expected = DataFrame([['d', 4, 4], ['e', 5, 5]], From 5ccf94a57360d659c8b6dd664552d447124b2af4 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 18:29:27 +0000 Subject: [PATCH 14/15] fix CI errors --- pandas/tests/indexing/multiindex/test_getitem.py | 6 +++--- pandas/tests/indexing/multiindex/test_loc.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index d938335621a47..b7fdbee0b7185 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -7,11 +7,11 @@ from pandas.core.indexing import IndexingError from pandas.util import testing as tm - # ---------------------------------------------------------------------------- # test indexing of Series with multi-level Index # ---------------------------------------------------------------------------- + @pytest.mark.parametrize('access_method', [lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)]) @@ -85,8 +85,8 @@ def test_series_getitem_returns_scalar( @pytest.mark.parametrize('indexer,expected_error,expected_error_msg', [ - (lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^356$"), - (lambda s: s[(2000, 3, 4)], KeyError, r"^356$"), + (lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^356L?$"), + (lambda s: s[(2000, 3, 4)], KeyError, r"^356L?$"), (lambda s: s.loc[(2000, 3, 4)], IndexingError, 'Too many indexers'), (lambda s: s.__getitem__(len(s)), IndexError, 'index out of bounds'), (lambda s: s[len(s)], IndexError, 'index out of bounds'), diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index d6e360a630e4f..ef85fc0d25328 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -367,7 +367,7 @@ def test_getitem_lowerdim_corner(multiindex_dataframe_random_data): df = multiindex_dataframe_random_data # test setup - check key not in dataframe - with pytest.raises(KeyError, match=r"^11$"): + with pytest.raises(KeyError, match=r"^11L?$"): df.loc[('bar', 'three'), 'B'] # in theory should be inserting in a sorted space???? From f57ad2d9f0192e0eba8c0ebaff63c91ef4676404 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sat, 12 Jan 2019 19:37:56 +0000 Subject: [PATCH 15/15] rename tests moved to test_loc.py --- pandas/tests/indexing/multiindex/test_loc.py | 21 ++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index ef85fc0d25328..ea451d40eb5d3 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -266,8 +266,8 @@ def convert_nested_indexer(indexer_type, keys): (pd.IndexSlice[:, ['foo']], True, None), (pd.IndexSlice[:, ['foo', 'bah']], True, None) ]) -def test_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1, - expected_error): +def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1, + expected_error): # GH 7866 # multi-index slicing with missing indexers idx = MultiIndex.from_product([['A', 'B', 'C'], @@ -299,7 +299,7 @@ def test_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1, lambda s: s.loc[[(2000, 3, 10), (2000, 3, 13)]], lambda s: s.ix[[(2000, 3, 10), (2000, 3, 13)]] ]) -def test_series_getitem_fancy( +def test_series_loc_getitem_fancy( multiindex_year_month_day_dataframe_random_data, indexer): s = multiindex_year_month_day_dataframe_random_data['A'] expected = s.reindex(s.index[49:51]) @@ -312,7 +312,7 @@ def test_series_getitem_fancy( ([], slice(None)), (['foo'], []) ]) -def test_getitem_duplicates_multiindex_empty_indexer(columns_indexer): +def test_loc_getitem_duplicates_multiindex_empty_indexer(columns_indexer): # GH 8737 # empty indexer multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'], @@ -325,7 +325,7 @@ def test_getitem_duplicates_multiindex_empty_indexer(columns_indexer): tm.assert_frame_equal(result, expected) -def test_getitem_duplicates_multiindex_non_scalar_type_object(): +def test_loc_getitem_duplicates_multiindex_non_scalar_type_object(): # regression from < 0.14.0 # GH 7914 df = DataFrame([[np.mean, np.median], ['mean', 'median']], @@ -337,7 +337,7 @@ def test_getitem_duplicates_multiindex_non_scalar_type_object(): assert result == expected -def test_getitem_tuple_plus_slice(): +def test_loc_getitem_tuple_plus_slice(): # GH 671 df = DataFrame({'a': np.arange(10), 'b': np.arange(10), @@ -349,7 +349,7 @@ def test_getitem_tuple_plus_slice(): tm.assert_series_equal(result, expected) -def test_getitem_int(frame_random_data_integer_multi_index): +def test_loc_getitem_int(frame_random_data_integer_multi_index): df = frame_random_data_integer_multi_index result = df.loc[1] expected = df[-3:] @@ -357,13 +357,14 @@ def test_getitem_int(frame_random_data_integer_multi_index): tm.assert_frame_equal(result, expected) -def test_getitem_int_raises_exception(frame_random_data_integer_multi_index): +def test_loc_getitem_int_raises_exception( + frame_random_data_integer_multi_index): df = frame_random_data_integer_multi_index - with pytest.raises(KeyError, match=r"^3$"): + with pytest.raises(KeyError, match=r"^3L?$"): df.loc[3] -def test_getitem_lowerdim_corner(multiindex_dataframe_random_data): +def test_loc_getitem_lowerdim_corner(multiindex_dataframe_random_data): df = multiindex_dataframe_random_data # test setup - check key not in dataframe