diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 0e30b104bf9d2..ad5d2315f7e33 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -17,7 +17,6 @@ qcut, ) import pandas.util.testing as tm -from pandas.util.testing import assert_equal, assert_frame_equal, assert_series_equal def cartesian_product_for_groupers(result, args, names): @@ -159,7 +158,7 @@ def f(x): exp_idx = CategoricalIndex(levels, categories=cats.categories, ordered=True) expected = expected.reindex(exp_idx) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) grouped = data.groupby(cats, observed=False) desc_result = grouped.describe() @@ -172,7 +171,7 @@ def f(x): ord_labels, ordered=True, categories=["foo", "bar", "baz", "qux"] ) expected = ord_data.groupby(exp_cats, sort=False, observed=False).describe() - assert_frame_equal(desc_result, expected) + tm.assert_frame_equal(desc_result, expected) # GH 10460 expc = Categorical.from_codes(np.arange(4).repeat(8), levels, ordered=True) @@ -206,7 +205,7 @@ def test_level_get_group(observed): ) result = g.get_group("a") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # GH#21636 flaky on py37; may be related to older numpy, see discussion @@ -232,21 +231,21 @@ def test_apply(ordered): # is coming back as Series([0., 1., 0.], index=["missing", "dense", "values"]) # when we expect Series(0., index=["values"]) result = grouped.apply(lambda x: np.mean(x)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # we coerce back to ints expected = expected.astype("int") result = grouped.mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = grouped.agg(np.mean) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # but for transform we should still get back the original index idx = MultiIndex.from_arrays([missing, dense], names=["missing", "dense"]) expected = Series(1, index=idx) result = grouped.apply(lambda x: 1) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_observed(observed): @@ -335,7 +334,7 @@ def test_observed(observed): c, i = key result = groups_double_key.get_group(key) expected = df[(df.cat == c) & (df.ints == i)] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # gh-8869 # with as_index @@ -522,7 +521,7 @@ def test_datetime(): expected.index, categories=expected.index, ordered=True ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) grouped = data.groupby(cats, observed=False) desc_result = grouped.describe() @@ -531,7 +530,7 @@ def test_datetime(): ord_labels = cats.take(idx) ord_data = data.take(idx) expected = ord_data.groupby(ord_labels, observed=False).describe() - assert_frame_equal(desc_result, expected) + tm.assert_frame_equal(desc_result, expected) tm.assert_index_equal(desc_result.index, expected.index) tm.assert_index_equal( desc_result.index.get_level_values(0), expected.index.get_level_values(0) @@ -560,7 +559,7 @@ def test_categorical_index(): expected.index = CategoricalIndex( Categorical.from_codes([0, 1, 2, 3], levels, ordered=True), name="cats" ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # with a cat column, should produce a cat index result = df.groupby("cats", observed=False).sum() @@ -568,7 +567,7 @@ def test_categorical_index(): expected.index = CategoricalIndex( Categorical.from_codes([0, 1, 2, 3], levels, ordered=True), name="cats" ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_describe_categorical_columns(): @@ -757,7 +756,7 @@ def test_categorical_no_compress(): exp.index = CategoricalIndex( exp.index, categories=cats.categories, ordered=cats.ordered ) - assert_series_equal(result, exp) + tm.assert_series_equal(result, exp) codes = np.array([0, 0, 0, 1, 1, 1, 3, 3, 3]) cats = Categorical.from_codes(codes, [0, 1, 2, 3], ordered=True) @@ -767,7 +766,7 @@ def test_categorical_no_compress(): exp.index = CategoricalIndex( exp.index, categories=cats.categories, ordered=cats.ordered ) - assert_series_equal(result, exp) + tm.assert_series_equal(result, exp) cats = Categorical( ["a", "a", "a", "b", "b", "b", "c", "c", "c"], @@ -829,12 +828,12 @@ def test_sort2(): col = "range" result_sort = df.groupby(col, sort=True, observed=False).first() - assert_frame_equal(result_sort, expected_sort) + tm.assert_frame_equal(result_sort, expected_sort) # when categories is ordered, group is ordered by category's order expected_sort = result_sort result_sort = df.groupby(col, sort=False, observed=False).first() - assert_frame_equal(result_sort, expected_sort) + tm.assert_frame_equal(result_sort, expected_sort) df["range"] = Categorical(df["range"], ordered=False) index = CategoricalIndex( @@ -857,10 +856,10 @@ def test_sort2(): # this is an unordered categorical, but we allow this #### result_sort = df.groupby(col, sort=True, observed=False).first() - assert_frame_equal(result_sort, expected_sort) + tm.assert_frame_equal(result_sort, expected_sort) result_nosort = df.groupby(col, sort=False, observed=False).first() - assert_frame_equal(result_nosort, expected_nosort) + tm.assert_frame_equal(result_nosort, expected_nosort) def test_sort_datetimelike(): @@ -912,10 +911,14 @@ def test_sort_datetimelike(): ) col = "dt" - assert_frame_equal(result_sort, df.groupby(col, sort=True, observed=False).first()) + tm.assert_frame_equal( + result_sort, df.groupby(col, sort=True, observed=False).first() + ) # when categories is ordered, group is ordered by category's order - assert_frame_equal(result_sort, df.groupby(col, sort=False, observed=False).first()) + tm.assert_frame_equal( + result_sort, df.groupby(col, sort=False, observed=False).first() + ) # ordered = False df["dt"] = Categorical(df["dt"], ordered=False) @@ -942,8 +945,10 @@ def test_sort_datetimelike(): result_nosort.index = CategoricalIndex(index, categories=index, name="dt") col = "dt" - assert_frame_equal(result_sort, df.groupby(col, sort=True, observed=False).first()) - assert_frame_equal( + tm.assert_frame_equal( + result_sort, df.groupby(col, sort=True, observed=False).first() + ) + tm.assert_frame_equal( result_nosort, df.groupby(col, sort=False, observed=False).first() ) @@ -1022,7 +1027,7 @@ def test_groupby_multiindex_categorical_datetime(): names=["key1", "key2"], ) expected = DataFrame({"values": [0, 4, 8, 3, 4, 5, 6, np.nan, 2]}, index=idx) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -1058,7 +1063,7 @@ def test_groupby_agg_observed_true_single_column(as_index, expected): result = df.groupby(["a", "b"], as_index=as_index, observed=True)["x"].sum() - assert_equal(result, expected) + tm.assert_equal(result, expected) @pytest.mark.parametrize("fill_value", [None, np.nan, pd.NaT]) @@ -1070,7 +1075,7 @@ def test_shift(fill_value): [None, "a", "b", "c"], categories=["a", "b", "c", "d"], ordered=False ) res = ct.shift(1, fill_value=fill_value) - assert_equal(res, expected) + tm.assert_equal(res, expected) @pytest.fixture @@ -1112,7 +1117,7 @@ def test_seriesgroupby_observed_true(df_cat, operation, kwargs): expected = Series(data=[1, 3, 2, 4], index=index, name="C") grouped = df_cat.groupby(["A", "B"], observed=True)["C"] result = getattr(grouped, operation)(sum) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) @pytest.mark.parametrize("operation", ["agg", "apply"]) @@ -1130,7 +1135,7 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation): expected = Series(data=[2, 4, np.nan, 1, np.nan, 3], index=index, name="C") grouped = df_cat.groupby(["A", "B"], observed=observed)["C"] result = getattr(grouped, operation)(sum) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -1185,7 +1190,7 @@ def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data): result = df_cat.groupby(["A", "B"], observed=observed)["C"].apply( lambda x: OrderedDict([("min", x.min()), ("max", x.max())]) ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) @pytest.mark.parametrize("code", [([1, 0, 0]), ([0, 0, 0])]) @@ -1195,7 +1200,7 @@ def test_groupby_categorical_axis_1(code): cat = pd.Categorical.from_codes(code, categories=list("abc")) result = df.groupby(cat, axis=1).mean() expected = df.T.groupby(cat, axis=0).mean().T - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_groupby_cat_preserves_structure(observed, ordered_fixture): @@ -1212,7 +1217,7 @@ def test_groupby_cat_preserves_structure(observed, ordered_fixture): .reset_index() ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_get_nonexistent_category(): diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 7e5180a5c7b2b..9882f12714d2d 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -4,7 +4,7 @@ import pytest from pandas import DataFrame, MultiIndex, Period, Series, Timedelta, Timestamp -from pandas.util.testing import assert_frame_equal, assert_series_equal +import pandas.util.testing as tm class TestCounting: @@ -15,8 +15,8 @@ def test_cumcount(self): expected = Series([0, 1, 2, 0, 3]) - assert_series_equal(expected, g.cumcount()) - assert_series_equal(expected, sg.cumcount()) + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) def test_cumcount_empty(self): ge = DataFrame().groupby(level=0) @@ -25,8 +25,8 @@ def test_cumcount_empty(self): # edge case, as this is usually considered float e = Series(dtype="int64") - assert_series_equal(e, ge.cumcount()) - assert_series_equal(e, se.cumcount()) + tm.assert_series_equal(e, ge.cumcount()) + tm.assert_series_equal(e, se.cumcount()) def test_cumcount_dupe_index(self): df = DataFrame( @@ -37,8 +37,8 @@ def test_cumcount_dupe_index(self): expected = Series([0, 1, 2, 0, 3], index=[0] * 5) - assert_series_equal(expected, g.cumcount()) - assert_series_equal(expected, sg.cumcount()) + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) def test_cumcount_mi(self): mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) @@ -48,8 +48,8 @@ def test_cumcount_mi(self): expected = Series([0, 1, 2, 0, 3], index=mi) - assert_series_equal(expected, g.cumcount()) - assert_series_equal(expected, sg.cumcount()) + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) def test_cumcount_groupby_not_col(self): df = DataFrame( @@ -60,8 +60,8 @@ def test_cumcount_groupby_not_col(self): expected = Series([0, 1, 2, 0, 3], index=[0] * 5) - assert_series_equal(expected, g.cumcount()) - assert_series_equal(expected, sg.cumcount()) + tm.assert_series_equal(expected, g.cumcount()) + tm.assert_series_equal(expected, sg.cumcount()) def test_ngroup(self): df = DataFrame({"A": list("aaaba")}) @@ -70,8 +70,8 @@ def test_ngroup(self): expected = Series([0, 0, 0, 1, 0]) - assert_series_equal(expected, g.ngroup()) - assert_series_equal(expected, sg.ngroup()) + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) def test_ngroup_distinct(self): df = DataFrame({"A": list("abcde")}) @@ -80,8 +80,8 @@ def test_ngroup_distinct(self): expected = Series(range(5), dtype="int64") - assert_series_equal(expected, g.ngroup()) - assert_series_equal(expected, sg.ngroup()) + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) def test_ngroup_one_group(self): df = DataFrame({"A": [0] * 5}) @@ -90,8 +90,8 @@ def test_ngroup_one_group(self): expected = Series([0] * 5) - assert_series_equal(expected, g.ngroup()) - assert_series_equal(expected, sg.ngroup()) + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) def test_ngroup_empty(self): ge = DataFrame().groupby(level=0) @@ -100,14 +100,14 @@ def test_ngroup_empty(self): # edge case, as this is usually considered float e = Series(dtype="int64") - assert_series_equal(e, ge.ngroup()) - assert_series_equal(e, se.ngroup()) + tm.assert_series_equal(e, ge.ngroup()) + tm.assert_series_equal(e, se.ngroup()) def test_ngroup_series_matches_frame(self): df = DataFrame({"A": list("aaaba")}) s = Series(list("aaaba")) - assert_series_equal(df.groupby(s).ngroup(), s.groupby(s).ngroup()) + tm.assert_series_equal(df.groupby(s).ngroup(), s.groupby(s).ngroup()) def test_ngroup_dupe_index(self): df = DataFrame({"A": list("aaaba")}, index=[0] * 5) @@ -116,8 +116,8 @@ def test_ngroup_dupe_index(self): expected = Series([0, 0, 0, 1, 0], index=[0] * 5) - assert_series_equal(expected, g.ngroup()) - assert_series_equal(expected, sg.ngroup()) + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) def test_ngroup_mi(self): mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) @@ -126,8 +126,8 @@ def test_ngroup_mi(self): sg = g.A expected = Series([0, 0, 0, 1, 0], index=mi) - assert_series_equal(expected, g.ngroup()) - assert_series_equal(expected, sg.ngroup()) + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) def test_ngroup_groupby_not_col(self): df = DataFrame({"A": list("aaaba")}, index=[0] * 5) @@ -136,8 +136,8 @@ def test_ngroup_groupby_not_col(self): expected = Series([0, 0, 0, 1, 0], index=[0] * 5) - assert_series_equal(expected, g.ngroup()) - assert_series_equal(expected, sg.ngroup()) + tm.assert_series_equal(expected, g.ngroup()) + tm.assert_series_equal(expected, sg.ngroup()) def test_ngroup_descending(self): df = DataFrame(["a", "a", "b", "a", "b"], columns=["A"]) @@ -146,9 +146,9 @@ def test_ngroup_descending(self): ascending = Series([0, 0, 1, 0, 1]) descending = Series([1, 1, 0, 1, 0]) - assert_series_equal(descending, (g.ngroups - 1) - ascending) - assert_series_equal(ascending, g.ngroup(ascending=True)) - assert_series_equal(descending, g.ngroup(ascending=False)) + tm.assert_series_equal(descending, (g.ngroups - 1) - ascending) + tm.assert_series_equal(ascending, g.ngroup(ascending=True)) + tm.assert_series_equal(descending, g.ngroup(ascending=False)) def test_ngroup_matches_cumcount(self): # verify one manually-worked out case works @@ -162,8 +162,8 @@ def test_ngroup_matches_cumcount(self): expected_ngroup = Series([0, 1, 2, 0, 3]) expected_cumcount = Series([0, 0, 0, 1, 0]) - assert_series_equal(g_ngroup, expected_ngroup) - assert_series_equal(g_cumcount, expected_cumcount) + tm.assert_series_equal(g_ngroup, expected_ngroup) + tm.assert_series_equal(g_cumcount, expected_cumcount) def test_ngroup_cumcount_pair(self): # brute force comparison for all small series @@ -175,8 +175,8 @@ def test_ngroup_cumcount_pair(self): ngroupd = [order.index(val) for val in p] cumcounted = [p[:i].count(val) for i, val in enumerate(p)] - assert_series_equal(g.ngroup(), Series(ngroupd)) - assert_series_equal(g.cumcount(), Series(cumcounted)) + tm.assert_series_equal(g.ngroup(), Series(ngroupd)) + tm.assert_series_equal(g.cumcount(), Series(cumcounted)) def test_ngroup_respects_groupby_order(self): np.random.seed(0) @@ -191,8 +191,8 @@ def test_ngroup_respects_groupby_order(self): for j, ind in enumerate(group.index): df.loc[ind, "group_index"] = j - assert_series_equal(Series(df["group_id"].values), g.ngroup()) - assert_series_equal(Series(df["group_index"].values), g.cumcount()) + tm.assert_series_equal(Series(df["group_id"].values), g.ngroup()) + tm.assert_series_equal(Series(df["group_index"].values), g.cumcount()) @pytest.mark.parametrize( "datetimelike", @@ -214,7 +214,7 @@ def test_count_with_datetimelike(self, datetimelike): res = df.groupby("x").count() expected = DataFrame({"y": [2, 1]}, index=["a", "b"]) expected.index.name = "x" - assert_frame_equal(expected, res) + tm.assert_frame_equal(expected, res) def test_count_with_only_nans_in_first_group(self): # GH21956 @@ -222,4 +222,4 @@ def test_count_with_only_nans_in_first_group(self): result = df.groupby(["A", "B"]).C.count() mi = MultiIndex(levels=[[], ["a", "b"]], codes=[[], []], names=["A", "B"]) expected = Series([], index=mi, dtype=np.int64, name="C") - assert_series_equal(result, expected, check_index_type=False) + tm.assert_series_equal(result, expected, check_index_type=False) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 43e2a6f040414..e17181f55fdba 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -12,11 +12,6 @@ from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, read_csv import pandas.core.common as com import pandas.util.testing as tm -from pandas.util.testing import ( - assert_almost_equal, - assert_frame_equal, - assert_series_equal, -) def test_repr(): @@ -43,17 +38,19 @@ def test_basic(dtype): agged = grouped.aggregate(np.mean) assert agged[1] == 1 - assert_series_equal(agged, grouped.agg(np.mean)) # shorthand - assert_series_equal(agged, grouped.mean()) - assert_series_equal(grouped.agg(np.sum), grouped.sum()) + tm.assert_series_equal(agged, grouped.agg(np.mean)) # shorthand + tm.assert_series_equal(agged, grouped.mean()) + tm.assert_series_equal(grouped.agg(np.sum), grouped.sum()) expected = grouped.apply(lambda x: x * x.sum()) transformed = grouped.transform(lambda x: x * x.sum()) assert transformed[7] == 12 - assert_series_equal(transformed, expected) + tm.assert_series_equal(transformed, expected) value_grouped = data.groupby(data) - assert_series_equal(value_grouped.aggregate(np.mean), agged, check_index_type=False) + tm.assert_series_equal( + value_grouped.aggregate(np.mean), agged, check_index_type=False + ) # complex agg agged = grouped.aggregate([np.mean, np.std]) @@ -78,7 +75,7 @@ def test_groupby_nonobject_dtype(mframe, df_mixed_floats): result = grouped.sum() expected = mframe.groupby(key.astype("O")).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # GH 3911, mixed frame non-conversion df = df_mixed_floats.copy() @@ -93,7 +90,7 @@ def max_value(group): [np.dtype("object")] * 2 + [np.dtype("float64")] * 2 + [np.dtype("int64")], index=["A", "B", "C", "D", "value"], ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_groupby_return_type(): @@ -151,7 +148,7 @@ def f(grp): expected = df.groupby("A").first()[["B"]] result = df.groupby("A").apply(f)[["B"]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def f(grp): if grp.name == "Tiger": @@ -161,7 +158,7 @@ def f(grp): result = df.groupby("A").apply(f)[["B"]] e = expected.copy() e.loc["Tiger"] = np.nan - assert_frame_equal(result, e) + tm.assert_frame_equal(result, e) def f(grp): if grp.name == "Pony": @@ -171,7 +168,7 @@ def f(grp): result = df.groupby("A").apply(f)[["B"]] e = expected.copy() e.loc["Pony"] = np.nan - assert_frame_equal(result, e) + tm.assert_frame_equal(result, e) # 5592 revisited, with datetimes def f(grp): @@ -182,7 +179,7 @@ def f(grp): result = df.groupby("A").apply(f)[["C"]] e = df.groupby("A").first()[["C"]] e.loc["Pony"] = pd.NaT - assert_frame_equal(result, e) + tm.assert_frame_equal(result, e) # scalar outputs def f(grp): @@ -194,7 +191,7 @@ def f(grp): e = df.groupby("A").first()["C"].copy() e.loc["Pony"] = np.nan e.name = None - assert_series_equal(result, e) + tm.assert_series_equal(result, e) def test_pass_args_kwargs(ts, tsframe): @@ -212,29 +209,29 @@ def f(x, q=None, axis=0): agg_expected = ts_grouped.quantile(0.8) trans_expected = ts_grouped.transform(g) - assert_series_equal(apply_result, agg_expected) - assert_series_equal(agg_result, agg_expected) - assert_series_equal(trans_result, trans_expected) + tm.assert_series_equal(apply_result, agg_expected) + tm.assert_series_equal(agg_result, agg_expected) + tm.assert_series_equal(trans_result, trans_expected) agg_result = ts_grouped.agg(f, q=80) apply_result = ts_grouped.apply(f, q=80) trans_result = ts_grouped.transform(f, q=80) - assert_series_equal(agg_result, agg_expected) - assert_series_equal(apply_result, agg_expected) - assert_series_equal(trans_result, trans_expected) + tm.assert_series_equal(agg_result, agg_expected) + tm.assert_series_equal(apply_result, agg_expected) + tm.assert_series_equal(trans_result, trans_expected) # DataFrame df_grouped = tsframe.groupby(lambda x: x.month) agg_result = df_grouped.agg(np.percentile, 80, axis=0) apply_result = df_grouped.apply(DataFrame.quantile, 0.8) expected = df_grouped.quantile(0.8) - assert_frame_equal(apply_result, expected, check_names=False) - assert_frame_equal(agg_result, expected) + tm.assert_frame_equal(apply_result, expected, check_names=False) + tm.assert_frame_equal(agg_result, expected) agg_result = df_grouped.agg(f, q=80) apply_result = df_grouped.apply(DataFrame.quantile, q=0.8) - assert_frame_equal(agg_result, expected) - assert_frame_equal(apply_result, expected, check_names=False) + tm.assert_frame_equal(agg_result, expected) + tm.assert_frame_equal(apply_result, expected, check_names=False) def test_len(): @@ -280,7 +277,7 @@ def test_with_na_groups(dtype): agged = grouped.agg(len) expected = Series([4, 2], index=["bar", "foo"]) - assert_series_equal(agged, expected, check_dtype=False) + tm.assert_series_equal(agged, expected, check_dtype=False) # assert issubclass(agged.dtype.type, np.integer) @@ -291,7 +288,7 @@ def f(x): agged = grouped.agg(f) expected = Series([4, 2], index=["bar", "foo"]) - assert_series_equal(agged, expected, check_dtype=False) + tm.assert_series_equal(agged, expected, check_dtype=False) assert issubclass(agged.dtype.type, np.dtype(dtype).type) @@ -335,7 +332,7 @@ def f3(x): # correct result result1 = df.groupby("a").apply(f1) result2 = df2.groupby("a").apply(f1) - assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result2) # should fail (not the same number of levels) msg = "Cannot concat indices that do not have the same number of levels" @@ -356,13 +353,13 @@ def test_attr_wrapper(ts): result = grouped.std() expected = grouped.agg(lambda x: np.std(x, ddof=1)) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # this is pretty cool result = grouped.describe() expected = {name: gp.describe() for name, gp in grouped} expected = DataFrame(expected).T - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # get attribute result = grouped.dtype @@ -386,7 +383,7 @@ def test_frame_groupby(tsframe): tscopy = tsframe.copy() tscopy["weekday"] = [x.weekday() for x in tscopy.index] stragged = tscopy.groupby("weekday").aggregate(np.mean) - assert_frame_equal(stragged, aggregated, check_names=False) + tm.assert_frame_equal(stragged, aggregated, check_names=False) # transform grouped = tsframe.head(30).groupby(lambda x: x.weekday()) @@ -426,7 +423,7 @@ def test_frame_groupby_columns(tsframe): # transform tf = lambda x: x - x.mean() groupedT = tsframe.T.groupby(mapping, axis=0) - assert_frame_equal(groupedT.transform(tf).T, grouped.transform(tf)) + tm.assert_frame_equal(groupedT.transform(tf).T, grouped.transform(tf)) # iterate for k, v in grouped: @@ -469,7 +466,7 @@ def test_multi_func(df): expected = df.groupby(["A", "B"]).mean() # TODO groupby get drops names - assert_frame_equal( + tm.assert_frame_equal( agged.loc[:, ["C", "D"]], expected.loc[:, ["C", "D"]], check_names=False ) @@ -493,7 +490,7 @@ def test_multi_key_multiple_functions(df): agged = grouped.agg([np.mean, np.std]) expected = DataFrame({"mean": grouped.agg(np.mean), "std": grouped.agg(np.std)}) - assert_frame_equal(agged, expected) + tm.assert_frame_equal(agged, expected) def test_frame_multi_key_function_list(): @@ -554,7 +551,7 @@ def test_frame_multi_key_function_list(): ) assert isinstance(agged.index, MultiIndex) assert isinstance(expected.index, MultiIndex) - assert_frame_equal(agged, expected) + tm.assert_frame_equal(agged, expected) @pytest.mark.parametrize("op", [lambda x: x.sum(), lambda x: x.mean()]) @@ -580,14 +577,14 @@ def test_groupby_multiple_columns(df, op): result_col = op(grouped[col]) pivoted = result1[col] exp = expected[col] - assert_series_equal(result_col, exp) - assert_series_equal(pivoted, exp) + tm.assert_series_equal(result_col, exp) + tm.assert_series_equal(pivoted, exp) # test single series works the same result = data["C"].groupby([data["A"], data["B"]]).mean() expected = data.groupby(["A", "B"]).mean()["C"] - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_groupby_as_index_agg(df): @@ -597,12 +594,12 @@ def test_groupby_as_index_agg(df): result = grouped.agg(np.mean) expected = grouped.mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result2 = grouped.agg(OrderedDict([["C", np.mean], ["D", np.sum]])) expected2 = grouped.mean() expected2["D"] = grouped.sum()["D"] - assert_frame_equal(result2, expected2) + tm.assert_frame_equal(result2, expected2) grouped = df.groupby("A", as_index=True) expected3 = grouped["C"].sum() @@ -610,7 +607,7 @@ def test_groupby_as_index_agg(df): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result3 = grouped["C"].agg({"Q": np.sum}) - assert_frame_equal(result3, expected3) + tm.assert_frame_equal(result3, expected3) # multi-key @@ -618,17 +615,17 @@ def test_groupby_as_index_agg(df): result = grouped.agg(np.mean) expected = grouped.mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result2 = grouped.agg(OrderedDict([["C", np.mean], ["D", np.sum]])) expected2 = grouped.mean() expected2["D"] = grouped.sum()["D"] - assert_frame_equal(result2, expected2) + tm.assert_frame_equal(result2, expected2) expected3 = grouped["C"].sum() expected3 = DataFrame(expected3).rename(columns={"C": "Q"}) result3 = grouped["C"].agg({"Q": np.sum}) - assert_frame_equal(result3, expected3) + tm.assert_frame_equal(result3, expected3) # GH7115 & GH8112 & GH8582 df = DataFrame(np.random.randint(0, 100, (50, 3)), columns=["jim", "joe", "jolie"]) @@ -636,7 +633,7 @@ def test_groupby_as_index_agg(df): gr = df.groupby(ts) gr.nth(0) # invokes set_selection_from_grouper internally - assert_frame_equal(gr.apply(sum), df.groupby(ts).apply(sum)) + tm.assert_frame_equal(gr.apply(sum), df.groupby(ts).apply(sum)) for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]: gr = df.groupby(ts, as_index=False) @@ -645,7 +642,7 @@ def test_groupby_as_index_agg(df): gr = df.groupby(ts.values, as_index=True) right = getattr(gr, attr)().reset_index(drop=True) - assert_frame_equal(left, right) + tm.assert_frame_equal(left, right) def test_as_index_series_return_frame(df): @@ -655,22 +652,22 @@ def test_as_index_series_return_frame(df): result = grouped["C"].agg(np.sum) expected = grouped.agg(np.sum).loc[:, ["A", "C"]] assert isinstance(result, DataFrame) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result2 = grouped2["C"].agg(np.sum) expected2 = grouped2.agg(np.sum).loc[:, ["A", "B", "C"]] assert isinstance(result2, DataFrame) - assert_frame_equal(result2, expected2) + tm.assert_frame_equal(result2, expected2) result = grouped["C"].sum() expected = grouped.sum().loc[:, ["A", "C"]] assert isinstance(result, DataFrame) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result2 = grouped2["C"].sum() expected2 = grouped2.sum().loc[:, ["A", "B", "C"]] assert isinstance(result2, DataFrame) - assert_frame_equal(result2, expected2) + tm.assert_frame_equal(result2, expected2) def test_as_index_series_column_slice_raises(df): @@ -691,7 +688,7 @@ def test_groupby_as_index_cython(df): expected = data.groupby(["A"]).mean() expected.insert(0, "A", expected.index) expected.index = np.arange(len(expected)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # multi-key grouped = data.groupby(["A", "B"], as_index=False) @@ -702,7 +699,7 @@ def test_groupby_as_index_cython(df): expected.insert(0, "A", arrays[0]) expected.insert(1, "B", arrays[1]) expected.index = np.arange(len(expected)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_groupby_as_index_series_scalar(df): @@ -712,7 +709,7 @@ def test_groupby_as_index_series_scalar(df): result = grouped["C"].agg(len) expected = grouped.agg(len).loc[:, ["A", "B", "C"]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_groupby_as_index_corner(df, ts): @@ -729,7 +726,7 @@ def test_groupby_multiple_key(df): df = tm.makeTimeDataFrame() grouped = df.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]) agged = grouped.sum() - assert_almost_equal(df.values, agged.values) + tm.assert_almost_equal(df.values, agged.values) grouped = df.T.groupby( [lambda x: x.year, lambda x: x.month, lambda x: x.day], axis=1 @@ -737,10 +734,10 @@ def test_groupby_multiple_key(df): agged = grouped.agg(lambda x: x.sum()) tm.assert_index_equal(agged.index, df.columns) - assert_almost_equal(df.T.values, agged.values) + tm.assert_almost_equal(df.T.values, agged.values) agged = grouped.agg(lambda x: x.sum()) - assert_almost_equal(df.T.values, agged.values) + tm.assert_almost_equal(df.T.values, agged.values) def test_groupby_multi_corner(df): @@ -752,7 +749,7 @@ def test_groupby_multi_corner(df): expected = df.groupby(["A", "B"]).mean() expected["bad"] = np.nan - assert_frame_equal(agged, expected) + tm.assert_frame_equal(agged, expected) def test_omit_nuisance(df): @@ -760,18 +757,18 @@ def test_omit_nuisance(df): result = grouped.mean() expected = df.loc[:, ["A", "C", "D"]].groupby("A").mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) agged = grouped.agg(np.mean) exp = grouped.mean() - assert_frame_equal(agged, exp) + tm.assert_frame_equal(agged, exp) df = df.loc[:, ["A", "C", "D"]] df["E"] = datetime.now() grouped = df.groupby("A") result = grouped.agg(np.sum) expected = grouped.sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # won't work with axis = 1 grouped = df.groupby({"A": 0, "C": 0, "D": 1, "E": 1}, axis=1) @@ -785,7 +782,7 @@ def test_omit_nuisance_python_multiple(three_group): agged = grouped.agg(np.mean) exp = grouped.mean() - assert_frame_equal(agged, exp) + tm.assert_frame_equal(agged, exp) def test_empty_groups_corner(mframe): @@ -803,12 +800,12 @@ def test_empty_groups_corner(mframe): grouped = df.groupby(["k1", "k2"]) result = grouped.agg(np.mean) expected = grouped.mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) grouped = mframe[3:5].groupby(level=0) agged = grouped.apply(lambda x: x.mean()) agged_A = grouped["A"].apply(np.mean) - assert_series_equal(agged["A"], agged_A) + tm.assert_series_equal(agged["A"], agged_A) assert agged.index.name == "first" @@ -863,8 +860,8 @@ def test_groupby_level_mapper(mframe): expected1 = mframe.groupby(mapped_level1).sum() expected0.index.name, expected1.index.name = "first", "second" - assert_frame_equal(result0, expected0) - assert_frame_equal(result1, expected1) + tm.assert_frame_equal(result0, expected0) + tm.assert_frame_equal(result1, expected1) def test_groupby_level_nonmulti(): @@ -905,10 +902,10 @@ def test_groupby_complex(): expected = Series((1 + 2j, 5 + 10j)) result = a.groupby(level=0).sum() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = a.sum(level=0) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_mutate_groups(): @@ -941,7 +938,7 @@ def f_no_copy(x): grpby_copy = df.groupby("cat1").apply(f_copy) grpby_no_copy = df.groupby("cat1").apply(f_no_copy) - assert_series_equal(grpby_copy, grpby_no_copy) + tm.assert_series_equal(grpby_copy, grpby_no_copy) def test_no_mutate_but_looks_like(): @@ -953,7 +950,7 @@ def test_no_mutate_but_looks_like(): result1 = df.groupby("key", group_keys=True).apply(lambda x: x[:].key) result2 = df.groupby("key", group_keys=True).apply(lambda x: x.key) - assert_series_equal(result1, result2) + tm.assert_series_equal(result1, result2) def test_groupby_series_indexed_differently(): @@ -968,7 +965,7 @@ def test_groupby_series_indexed_differently(): grouped = s1.groupby(s2) agged = grouped.mean() exp = s1.groupby(s2.reindex(s1.index).get).mean() - assert_series_equal(agged, exp) + tm.assert_series_equal(agged, exp) def test_groupby_with_hier_columns(): @@ -1014,7 +1011,7 @@ def test_grouping_ndarray(df): result = grouped.sum() expected = df.groupby("A").sum() - assert_frame_equal( + tm.assert_frame_equal( result, expected, check_names=False ) # Note: no names when grouping by value @@ -1033,7 +1030,7 @@ def test_groupby_wrong_multi_labels(): result = grouped.agg(np.mean) expected = grouped.mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_groupby_series_with_name(df): @@ -1073,7 +1070,7 @@ def test_consistency_name(): expected = df.groupby(["A"]).B.count() result = df.B.groupby(df.A).count() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_groupby_name_propagation(df): @@ -1100,7 +1097,7 @@ def test_groupby_nonstring_columns(): grouped = df.groupby(0) result = grouped.mean() expected = df.groupby(df[0]).mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_groupby_mixed_type_columns(): @@ -1173,13 +1170,13 @@ def test_groupby_dtype_inference_empty(): result = df.groupby("x").first() exp_index = Index([], name="x", dtype=np.float64) expected = DataFrame({"range": Series([], index=exp_index, dtype="int64")}) - assert_frame_equal(result, expected, by_blocks=True) + tm.assert_frame_equal(result, expected, by_blocks=True) def test_groupby_list_infer_array_like(df): result = df.groupby(list(df["A"])).mean() expected = df.groupby(df["A"]).mean() - assert_frame_equal(result, expected, check_names=False) + tm.assert_frame_equal(result, expected, check_names=False) with pytest.raises(KeyError, match=r"^'foo'$"): df.groupby(list(df["A"][:-1])) @@ -1201,7 +1198,7 @@ def test_groupby_keys_same_size_as_index(): result = df.groupby([pd.Grouper(level=0, freq=freq), "metric"]).mean() expected = df.set_index([df.index, "metric"]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_groupby_one_row(): @@ -1347,7 +1344,7 @@ def test_dont_clobber_name_column(): ) result = df.groupby("key").apply(lambda x: x) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) def test_skip_group_keys(): @@ -1360,7 +1357,7 @@ def test_skip_group_keys(): pieces = [group.sort_values(by="A")[:3] for key, group in grouped] expected = pd.concat(pieces) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) grouped = tsf["A"].groupby(lambda x: x.month, group_keys=False) result = grouped.apply(lambda x: x.sort_values()[:3]) @@ -1368,7 +1365,7 @@ def test_skip_group_keys(): pieces = [group.sort_values()[:3] for key, group in grouped] expected = pd.concat(pieces) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_no_nonsense_name(float_frame): @@ -1402,7 +1399,7 @@ def g(group): expected = df.groupby("A")["C"].apply(g) assert isinstance(result, Series) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) @pytest.mark.parametrize("grouper", ["A", ["A", "B"]]) @@ -1473,9 +1470,9 @@ def test_groupby_sort_multiindex_series(): mseries_result = Series([0, 2, 4], index=index) result = mseries.groupby(level=["a", "b"], sort=False).first() - assert_series_equal(result, mseries_result) + tm.assert_series_equal(result, mseries_result) result = mseries.groupby(level=["a", "b"], sort=True).first() - assert_series_equal(result, mseries_result.sort_index()) + tm.assert_series_equal(result, mseries_result.sort_index()) def test_groupby_reindex_inside_function(): @@ -1507,7 +1504,7 @@ def afunc(data): closure_bad = grouped.agg({"high": agg_before(11, np.max)}) closure_good = grouped.agg({"high": agg_before(11, np.max, True)}) - assert_frame_equal(closure_bad, closure_good) + tm.assert_frame_equal(closure_bad, closure_good) def test_groupby_multiindex_missing_pair(): @@ -1588,26 +1585,26 @@ def test_index_label_overlaps_location(): g = df.groupby(list("ababb")) actual = g.filter(lambda x: len(x) > 2) expected = df.iloc[[1, 3, 4]] - assert_frame_equal(actual, expected) + tm.assert_frame_equal(actual, expected) ser = df[0] g = ser.groupby(list("ababb")) actual = g.filter(lambda x: len(x) > 2) expected = ser.take([1, 3, 4]) - assert_series_equal(actual, expected) + tm.assert_series_equal(actual, expected) # ... and again, with a generic Index of floats df.index = df.index.astype(float) g = df.groupby(list("ababb")) actual = g.filter(lambda x: len(x) > 2) expected = df.iloc[[1, 3, 4]] - assert_frame_equal(actual, expected) + tm.assert_frame_equal(actual, expected) ser = df[0] g = ser.groupby(list("ababb")) actual = g.filter(lambda x: len(x) > 2) expected = ser.take([1, 3, 4]) - assert_series_equal(actual, expected) + tm.assert_series_equal(actual, expected) def test_transform_doesnt_clobber_ints(): @@ -1652,7 +1649,7 @@ def test_groupby_preserves_sort(sort_column, group_column): g = df.groupby(group_column) def test_sort(x): - assert_frame_equal(x, x.sort_values(by=sort_column)) + tm.assert_frame_equal(x, x.sort_values(by=sort_column)) g.apply(test_sort) @@ -1681,7 +1678,7 @@ def test_group_shift_with_null_key(): ) result = g.shift(-1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_group_shift_with_fill_value(): @@ -1703,7 +1700,7 @@ def test_group_shift_with_fill_value(): ) result = g.shift(-1, fill_value=0)[["Z"]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_pivot_table_values_key_error(): @@ -1734,7 +1731,7 @@ def test_empty_dataframe_groupby(): expected = DataFrame(columns=["B", "C"], dtype=np.float64) expected.index.name = "A" - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_tuple_warns(): @@ -1816,7 +1813,7 @@ def test_groupby_multiindex_nat(): result = ser.groupby(level=1).mean() expected = pd.Series([3.0, 2.5], index=["a", "b"]) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_groupby_empty_list_raises(): @@ -1841,7 +1838,7 @@ def test_groupby_multiindex_series_keys_len_equal_group_axis(): ei = pd.MultiIndex.from_arrays(index_array, names=index_names) expected = pd.Series([3], index=ei) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_groupby_groups_in_BaseGrouper(): @@ -1869,7 +1866,7 @@ def test_groupby_axis_1(group_name): results = df.groupby(group_name, axis=1).sum() expected = df.T.groupby(group_name).sum().T - assert_frame_equal(results, expected) + tm.assert_frame_equal(results, expected) # test on MI column iterables = [["bar", "baz", "foo"], ["one", "two"]] @@ -1877,7 +1874,7 @@ def test_groupby_axis_1(group_name): df = pd.DataFrame(np.arange(18).reshape(3, 6), index=[0, 1, 0], columns=mi) results = df.groupby(group_name, axis=1).sum() expected = df.T.groupby(group_name).sum().T - assert_frame_equal(results, expected) + tm.assert_frame_equal(results, expected) @pytest.mark.parametrize( @@ -1943,7 +1940,7 @@ def test_shift_bfill_ffill_tz(tz_naive_fixture, op, expected): grouped = df.groupby("id") result = getattr(grouped, op)() expected = DataFrame(expected).assign(time=lambda x: x.time.dt.tz_localize(tz)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_groupby_only_none_group(): @@ -1953,4 +1950,4 @@ def test_groupby_only_none_group(): actual = df.groupby("g")["x"].transform("sum") expected = pd.Series([np.nan], name="x") - assert_series_equal(actual, expected) + tm.assert_series_equal(actual, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index ab25d183ae3ff..e1fd8d7da6833 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -15,11 +15,6 @@ ) from pandas.core.groupby.grouper import Grouping import pandas.util.testing as tm -from pandas.util.testing import ( - assert_almost_equal, - assert_frame_equal, - assert_series_equal, -) # selection # -------------------------------- @@ -57,12 +52,12 @@ def test_groupby_duplicated_column_errormsg(self): def test_column_select_via_attr(self, df): result = df.groupby("A").C.sum() expected = df.groupby("A")["C"].sum() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) df["mean"] = 1.5 result = df.groupby("A").mean() expected = df.groupby("A").agg(np.mean) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_getitem_list_of_columns(self): df = DataFrame( @@ -81,9 +76,9 @@ def test_getitem_list_of_columns(self): expected = df.loc[:, ["A", "C", "D"]].groupby("A").mean() - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) - assert_frame_equal(result3, expected) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected) def test_getitem_numeric_column_names(self): # GH #13731 @@ -101,9 +96,9 @@ def test_getitem_numeric_column_names(self): expected = df.loc[:, [0, 2, 4]].groupby(0).mean() - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) - assert_frame_equal(result3, expected) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result2, expected) + tm.assert_frame_equal(result3, expected) # grouping @@ -153,13 +148,13 @@ def test_grouper_multilevel_freq(self): result = df.groupby( [pd.Grouper(level="foo", freq="W"), pd.Grouper(level="bar", freq="W")] ).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Check integer level result = df.groupby( [pd.Grouper(level=0, freq="W"), pd.Grouper(level=1, freq="W")] ).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_grouper_creation_bug(self): @@ -170,14 +165,14 @@ def test_grouper_creation_bug(self): g = df.groupby(pd.Grouper(key="A")) result = g.sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = g.apply(lambda x: x.sum()) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) g = df.groupby(pd.Grouper(key="A", axis=0)) result = g.sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # GH14334 # pd.Grouper(key=...) may be passed in a list @@ -188,7 +183,7 @@ def test_grouper_creation_bug(self): expected = df.groupby("A").sum() g = df.groupby([pd.Grouper(key="A")]) result = g.sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Group by two columns # using a combination of strings and Grouper objects @@ -197,17 +192,17 @@ def test_grouper_creation_bug(self): # Group with two Grouper objects g = df.groupby([pd.Grouper(key="A"), pd.Grouper(key="B")]) result = g.sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Group with a string and a Grouper object g = df.groupby(["A", pd.Grouper(key="B")]) result = g.sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Group with a Grouper object and a string g = df.groupby([pd.Grouper(key="A"), "B"]) result = g.sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # GH8866 s = Series( @@ -221,12 +216,12 @@ def test_grouper_creation_bug(self): expected = Series( [28], index=Index([Timestamp("2013-01-31")], freq="M", name="three") ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # just specifying a level breaks result = s.groupby(pd.Grouper(level="one")).sum() expected = s.groupby(level="one").sum() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_grouper_column_and_index(self): # GH 14327 @@ -243,24 +238,24 @@ def test_grouper_column_and_index(self): ) result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean() expected = df_multi.reset_index().groupby(["B", "inner"]).mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Test the reverse grouping order result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean() expected = df_multi.reset_index().groupby(["inner", "B"]).mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Grouping a single-index frame by a column and the index should # be equivalent to resetting the index and grouping by two columns df_single = df_multi.reset_index("outer") result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean() expected = df_single.reset_index().groupby(["B", "inner"]).mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Test the reverse grouping order result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean() expected = df_single.reset_index().groupby(["inner", "B"]).mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_groupby_levels_and_columns(self): # GH9344, GH9049 @@ -294,13 +289,13 @@ def test_groupby_categorical_index_and_columns(self, observed): categories, categories=categories, ordered=True ) expected = DataFrame(data=expected_data, columns=expected_columns) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # test transposed version df = DataFrame(data.T, index=cat_columns) result = df.groupby(axis=0, level=0, observed=observed).sum() expected = DataFrame(data=expected_data.T, index=expected_columns) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_grouper_getting_correct_binner(self): @@ -323,7 +318,7 @@ def test_grouper_getting_correct_binner(self): names=["one", "two"], ), ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_grouper_iter(self, df): assert sorted(df.groupby("A").grouper) == ["bar", "foo"] @@ -347,7 +342,7 @@ def test_groupby_dict_mapping(self): s = Series({"T1": 5}) result = s.groupby({"T1": "T2"}).agg(sum) expected = s.groupby(["T2"]).agg(sum) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) s = Series([1.0, 2.0, 3.0, 4.0], index=list("abcd")) mapping = {"a": 0, "b": 0, "c": 1, "d": 1} @@ -356,9 +351,9 @@ def test_groupby_dict_mapping(self): result2 = s.groupby(mapping).agg(np.mean) expected = s.groupby([0, 0, 1, 1]).mean() expected2 = s.groupby([0, 0, 1, 1]).mean() - assert_series_equal(result, expected) - assert_series_equal(result, result2) - assert_series_equal(result, expected2) + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, result2) + tm.assert_series_equal(result, expected2) def test_groupby_grouper_f_sanity_checked(self): dates = date_range("01-Jan-2013", periods=12, freq="MS") @@ -390,25 +385,25 @@ def test_multiindex_passthru(self): df.columns = pd.MultiIndex.from_tuples([(0, 1), (1, 1), (2, 1)]) result = df.groupby(axis=1, level=[0, 1]).first() - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) def test_multiindex_negative_level(self, mframe): # GH 13901 result = mframe.groupby(level=-1).sum() expected = mframe.groupby(level="second").sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = mframe.groupby(level=-2).sum() expected = mframe.groupby(level="first").sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = mframe.groupby(level=[-2, -1]).sum() expected = mframe - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = mframe.groupby(level=[-1, "first"]).sum() expected = mframe.groupby(level=["second", "first"]).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_multifunc_select_col_integer_cols(self, df): df.columns = np.arange(len(df.columns)) @@ -483,23 +478,23 @@ def test_groupby_level(self, sort, mframe, df): assert result0.index.name == "first" assert result1.index.name == "second" - assert_frame_equal(result0, expected0) - assert_frame_equal(result1, expected1) + tm.assert_frame_equal(result0, expected0) + tm.assert_frame_equal(result1, expected1) assert result0.index.name == frame.index.names[0] assert result1.index.name == frame.index.names[1] # groupby level name result0 = frame.groupby(level="first", sort=sort).sum() result1 = frame.groupby(level="second", sort=sort).sum() - assert_frame_equal(result0, expected0) - assert_frame_equal(result1, expected1) + tm.assert_frame_equal(result0, expected0) + tm.assert_frame_equal(result1, expected1) # axis=1 result0 = frame.T.groupby(level=0, axis=1, sort=sort).sum() result1 = frame.T.groupby(level=1, axis=1, sort=sort).sum() - assert_frame_equal(result0, expected0.T) - assert_frame_equal(result1, expected1.T) + tm.assert_frame_equal(result0, expected0.T) + tm.assert_frame_equal(result1, expected1.T) # raise exception for non-MultiIndex msg = "level > 0 or level < -1 only valid with MultiIndex" @@ -528,7 +523,7 @@ def test_groupby_level_with_nas(self, sort): s = Series(np.arange(8.0), index=index) result = s.groupby(level=0, sort=sort).sum() expected = Series([6.0, 22.0], index=[0, 1]) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) index = MultiIndex( levels=[[1, 0], [0, 1, 2, 3]], @@ -539,7 +534,7 @@ def test_groupby_level_with_nas(self, sort): s = Series(np.arange(8.0), index=index) result = s.groupby(level=0, sort=sort).sum() expected = Series([6.0, 18.0], index=[0.0, 1.0]) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_groupby_args(self, mframe): # PR8618 and issue 8015 @@ -564,12 +559,12 @@ def test_level_preserve_order(self, sort, labels, mframe): # GH 17537 grouped = mframe.groupby(level=0, sort=sort) exp_labels = np.array(labels, np.intp) - assert_almost_equal(grouped.grouper.labels[0], exp_labels) + tm.assert_almost_equal(grouped.grouper.labels[0], exp_labels) def test_grouping_labels(self, mframe): grouped = mframe.groupby(mframe.index.get_level_values(0)) exp_labels = np.array([2, 2, 2, 0, 0, 1, 1, 3, 3, 3], dtype=np.intp) - assert_almost_equal(grouped.grouper.labels[0], exp_labels) + tm.assert_almost_equal(grouped.grouper.labels[0], exp_labels) def test_list_grouper_with_nat(self): # GH 14715 @@ -603,7 +598,7 @@ def test_evaluate_with_empty_groups(self, func, expected): df = pd.DataFrame({1: [], 2: []}) g = df.groupby(1) result = getattr(g[2], func)(lambda x: x) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_groupby_empty(self): # https://github.com/pandas-dev/pandas/issues/27190 @@ -677,8 +672,8 @@ def test_get_group(self): result1 = g.get_group(key) result2 = g.get_group(Timestamp(key).to_pydatetime()) result3 = g.get_group(str(Timestamp(key))) - assert_frame_equal(result1, result2) - assert_frame_equal(result1, result3) + tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result3) g = df.groupby(["DATE", "label"]) @@ -686,8 +681,8 @@ def test_get_group(self): result1 = g.get_group(key) result2 = g.get_group((Timestamp(key[0]).to_pydatetime(), key[1])) result3 = g.get_group((str(Timestamp(key[0])), key[1])) - assert_frame_equal(result1, result2) - assert_frame_equal(result1, result3) + tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result1, result3) # must pass a same-length tuple with multiple keys msg = "must supply a tuple to get_group with multiple grouping keys" @@ -712,7 +707,7 @@ def test_get_group_empty_bins(self, observed): # IOW '(0, 5]' result = g.get_group(pd.Interval(0, 5)) expected = DataFrame([3, 1], index=[0, 1]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) msg = r"Interval\(10, 15, closed='right'\)" with pytest.raises(KeyError, match=msg): @@ -724,14 +719,14 @@ def test_get_group_grouped_by_tuple(self): gr = df.groupby("ids") expected = DataFrame({"ids": [(1,), (1,)]}, index=[0, 2]) result = gr.get_group((1,)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) dt = pd.to_datetime(["2010-01-01", "2010-01-02", "2010-01-01", "2010-01-02"]) df = DataFrame({"ids": [(x,) for x in dt]}) gr = df.groupby("ids") result = gr.get_group(("2010-01-01",)) expected = DataFrame({"ids": [(dt[0],), (dt[0],)]}, index=[0, 2]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_groupby_with_empty(self): index = pd.DatetimeIndex(()) @@ -812,7 +807,7 @@ def test_multi_iter(self): e1, e2, e3 = expected[i] assert e1 == one assert e2 == two - assert_series_equal(three, e3) + tm.assert_series_equal(three, e3) def test_multi_iter_frame(self, three_group): k1 = np.array(["b", "b", "b", "a", "a", "a"]) @@ -837,7 +832,7 @@ def test_multi_iter_frame(self, three_group): e1, e2, e3 = expected[i] assert e1 == one assert e2 == two - assert_frame_equal(three, e3) + tm.assert_frame_equal(three, e3) # don't iterate through groups with no data df["k1"] = np.array(["b", "b", "b", "a", "a", "a"]) diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py index 71d545e960566..f5c8873ff9417 100644 --- a/pandas/tests/groupby/test_index_as_string.py +++ b/pandas/tests/groupby/test_index_as_string.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas.util.testing import assert_frame_equal, assert_series_equal +import pandas.util.testing as tm @pytest.fixture(params=[["inner"], ["inner", "outer"]]) @@ -49,7 +49,7 @@ def series(): def test_grouper_index_level_as_string(frame, key_strs, groupers): result = frame.groupby(key_strs).mean() expected = frame.groupby(groupers).mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -79,4 +79,4 @@ def test_grouper_index_level_as_string_series(series, levels): # Compute and check result result = series.groupby(levels).mean() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 2c4b56793580c..f83b284a35377 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -3,7 +3,7 @@ import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, isna -from pandas.util.testing import assert_frame_equal, assert_series_equal +import pandas.util.testing as tm def test_first_last_nth(df): @@ -13,24 +13,24 @@ def test_first_last_nth(df): expected = df.loc[[1, 0], ["B", "C", "D"]] expected.index = Index(["bar", "foo"], name="A") expected = expected.sort_index() - assert_frame_equal(first, expected) + tm.assert_frame_equal(first, expected) nth = grouped.nth(0) - assert_frame_equal(nth, expected) + tm.assert_frame_equal(nth, expected) last = grouped.last() expected = df.loc[[5, 7], ["B", "C", "D"]] expected.index = Index(["bar", "foo"], name="A") - assert_frame_equal(last, expected) + tm.assert_frame_equal(last, expected) nth = grouped.nth(-1) - assert_frame_equal(nth, expected) + tm.assert_frame_equal(nth, expected) nth = grouped.nth(1) expected = df.loc[[2, 3], ["B", "C", "D"]].copy() expected.index = Index(["foo", "bar"], name="A") expected = expected.sort_index() - assert_frame_equal(nth, expected) + tm.assert_frame_equal(nth, expected) # it works! grouped["B"].first() @@ -47,11 +47,11 @@ def test_first_last_nth(df): g = df.groupby("A") result = g.first() expected = df.iloc[[1, 2]].set_index("A") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = df.iloc[[1, 2]].set_index("A") result = g.nth(0, dropna="any") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_first_last_nth_dtypes(df_mixed_floats): @@ -66,19 +66,19 @@ def test_first_last_nth_dtypes(df_mixed_floats): expected = df.loc[[1, 0], ["B", "C", "D", "E", "F"]] expected.index = Index(["bar", "foo"], name="A") expected = expected.sort_index() - assert_frame_equal(first, expected) + tm.assert_frame_equal(first, expected) last = grouped.last() expected = df.loc[[5, 7], ["B", "C", "D", "E", "F"]] expected.index = Index(["bar", "foo"], name="A") expected = expected.sort_index() - assert_frame_equal(last, expected) + tm.assert_frame_equal(last, expected) nth = grouped.nth(1) expected = df.loc[[3, 2], ["B", "C", "D", "E", "F"]] expected.index = Index(["bar", "foo"], name="A") expected = expected.sort_index() - assert_frame_equal(nth, expected) + tm.assert_frame_equal(nth, expected) # GH 2763, first/last shifting dtypes idx = list(range(10)) @@ -93,23 +93,23 @@ def test_nth(): df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=["A", "B"]) g = df.groupby("A") - assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index("A")) - assert_frame_equal(g.nth(1), df.iloc[[1]].set_index("A")) - assert_frame_equal(g.nth(2), df.loc[[]].set_index("A")) - assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index("A")) - assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index("A")) - assert_frame_equal(g.nth(-3), df.loc[[]].set_index("A")) - assert_series_equal(g.B.nth(0), df.set_index("A").B.iloc[[0, 2]]) - assert_series_equal(g.B.nth(1), df.set_index("A").B.iloc[[1]]) - assert_frame_equal(g[["B"]].nth(0), df.loc[[0, 2], ["A", "B"]].set_index("A")) + tm.assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index("A")) + tm.assert_frame_equal(g.nth(1), df.iloc[[1]].set_index("A")) + tm.assert_frame_equal(g.nth(2), df.loc[[]].set_index("A")) + tm.assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index("A")) + tm.assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index("A")) + tm.assert_frame_equal(g.nth(-3), df.loc[[]].set_index("A")) + tm.assert_series_equal(g.B.nth(0), df.set_index("A").B.iloc[[0, 2]]) + tm.assert_series_equal(g.B.nth(1), df.set_index("A").B.iloc[[1]]) + tm.assert_frame_equal(g[["B"]].nth(0), df.loc[[0, 2], ["A", "B"]].set_index("A")) exp = df.set_index("A") - assert_frame_equal(g.nth(0, dropna="any"), exp.iloc[[1, 2]]) - assert_frame_equal(g.nth(-1, dropna="any"), exp.iloc[[1, 2]]) + tm.assert_frame_equal(g.nth(0, dropna="any"), exp.iloc[[1, 2]]) + tm.assert_frame_equal(g.nth(-1, dropna="any"), exp.iloc[[1, 2]]) exp["B"] = np.nan - assert_frame_equal(g.nth(7, dropna="any"), exp.iloc[[1, 2]]) - assert_frame_equal(g.nth(2, dropna="any"), exp.iloc[[1, 2]]) + tm.assert_frame_equal(g.nth(7, dropna="any"), exp.iloc[[1, 2]]) + tm.assert_frame_equal(g.nth(2, dropna="any"), exp.iloc[[1, 2]]) # out of bounds, regression from 0.13.1 # GH 6621 @@ -136,11 +136,11 @@ def test_nth(): result = df.groupby(level=0, as_index=False).nth(2) expected = df.iloc[[-1]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.groupby(level=0, as_index=False).nth(3) expected = df.loc[[]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # GH 7559 # from the vbench @@ -149,7 +149,7 @@ def test_nth(): g = df[0] expected = s.groupby(g).first() expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) - assert_series_equal(expected2, expected, check_names=False) + tm.assert_series_equal(expected2, expected, check_names=False) assert expected.name == 1 assert expected2.name == 1 @@ -163,7 +163,7 @@ def test_nth(): # related GH 7287 expected = s.groupby(g, sort=False).first() result = s.groupby(g, sort=False).nth(0, dropna="all") - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) with pytest.raises(ValueError, match="For a DataFrame groupby"): s.groupby(g, sort=False).nth(0, dropna=True) @@ -173,20 +173,20 @@ def test_nth(): g = df.groupby("A") result = g.B.nth(0, dropna="all") expected = g.B.first() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # test multiple nth values df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]], columns=["A", "B"]) g = df.groupby("A") - assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index("A")) - assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index("A")) - assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index("A")) - assert_frame_equal(g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index("A")) - assert_frame_equal(g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index("A")) - assert_frame_equal(g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index("A")) - assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index("A")) - assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index("A")) + tm.assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index("A")) + tm.assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index("A")) + tm.assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index("A")) + tm.assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index("A")) + tm.assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index("A")) business_dates = pd.date_range(start="4/1/2014", end="6/30/2014", freq="B") df = DataFrame(1, index=business_dates, columns=["a", "b"]) @@ -210,7 +210,7 @@ def test_nth(): ] ) expected = DataFrame(1, columns=["a", "b"], index=expected_dates) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_nth_multi_index(three_group): @@ -219,7 +219,7 @@ def test_nth_multi_index(three_group): grouped = three_group.groupby(["A", "B"]) result = grouped.nth(0) expected = grouped.first() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -281,18 +281,18 @@ def test_first_last_tz(data, expected_first, expected_last): result = df.groupby("id", as_index=False).first() expected = DataFrame(expected_first) cols = ["id", "time", "foo"] - assert_frame_equal(result[cols], expected[cols]) + tm.assert_frame_equal(result[cols], expected[cols]) result = df.groupby("id", as_index=False)["time"].first() - assert_frame_equal(result, expected[["id", "time"]]) + tm.assert_frame_equal(result, expected[["id", "time"]]) result = df.groupby("id", as_index=False).last() expected = DataFrame(expected_last) cols = ["id", "time", "foo"] - assert_frame_equal(result[cols], expected[cols]) + tm.assert_frame_equal(result[cols], expected[cols]) result = df.groupby("id", as_index=False)["time"].last() - assert_frame_equal(result, expected[["id", "time"]]) + tm.assert_frame_equal(result, expected[["id", "time"]]) @pytest.mark.parametrize( @@ -322,7 +322,7 @@ def test_first_last_tz_multi_column(method, ts, alpha): }, index=pd.Index([1, 2], name="group"), ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_nth_multi_index_as_expected(): @@ -380,7 +380,7 @@ def test_nth_multi_index_as_expected(): names=["A", "B"], ), ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_groupby_head_tail(): @@ -389,49 +389,49 @@ def test_groupby_head_tail(): g_not_as = df.groupby("A", as_index=False) # as_index= False, much easier - assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1)) - assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1)) + tm.assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1)) + tm.assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1)) empty_not_as = DataFrame( columns=df.columns, index=pd.Index([], dtype=df.index.dtype) ) empty_not_as["A"] = empty_not_as["A"].astype(df.A.dtype) empty_not_as["B"] = empty_not_as["B"].astype(df.B.dtype) - assert_frame_equal(empty_not_as, g_not_as.head(0)) - assert_frame_equal(empty_not_as, g_not_as.tail(0)) - assert_frame_equal(empty_not_as, g_not_as.head(-1)) - assert_frame_equal(empty_not_as, g_not_as.tail(-1)) + tm.assert_frame_equal(empty_not_as, g_not_as.head(0)) + tm.assert_frame_equal(empty_not_as, g_not_as.tail(0)) + tm.assert_frame_equal(empty_not_as, g_not_as.head(-1)) + tm.assert_frame_equal(empty_not_as, g_not_as.tail(-1)) - assert_frame_equal(df, g_not_as.head(7)) # contains all - assert_frame_equal(df, g_not_as.tail(7)) + tm.assert_frame_equal(df, g_not_as.head(7)) # contains all + tm.assert_frame_equal(df, g_not_as.tail(7)) # as_index=True, (used to be different) df_as = df - assert_frame_equal(df_as.loc[[0, 2]], g_as.head(1)) - assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1)) + tm.assert_frame_equal(df_as.loc[[0, 2]], g_as.head(1)) + tm.assert_frame_equal(df_as.loc[[1, 2]], g_as.tail(1)) empty_as = DataFrame(index=df_as.index[:0], columns=df.columns) empty_as["A"] = empty_not_as["A"].astype(df.A.dtype) empty_as["B"] = empty_not_as["B"].astype(df.B.dtype) - assert_frame_equal(empty_as, g_as.head(0)) - assert_frame_equal(empty_as, g_as.tail(0)) - assert_frame_equal(empty_as, g_as.head(-1)) - assert_frame_equal(empty_as, g_as.tail(-1)) + tm.assert_frame_equal(empty_as, g_as.head(0)) + tm.assert_frame_equal(empty_as, g_as.tail(0)) + tm.assert_frame_equal(empty_as, g_as.head(-1)) + tm.assert_frame_equal(empty_as, g_as.tail(-1)) - assert_frame_equal(df_as, g_as.head(7)) # contains all - assert_frame_equal(df_as, g_as.tail(7)) + tm.assert_frame_equal(df_as, g_as.head(7)) # contains all + tm.assert_frame_equal(df_as, g_as.tail(7)) # test with selection - assert_frame_equal(g_as[[]].head(1), df_as.loc[[0, 2], []]) - assert_frame_equal(g_as[["A"]].head(1), df_as.loc[[0, 2], ["A"]]) - assert_frame_equal(g_as[["B"]].head(1), df_as.loc[[0, 2], ["B"]]) - assert_frame_equal(g_as[["A", "B"]].head(1), df_as.loc[[0, 2]]) + tm.assert_frame_equal(g_as[[]].head(1), df_as.loc[[0, 2], []]) + tm.assert_frame_equal(g_as[["A"]].head(1), df_as.loc[[0, 2], ["A"]]) + tm.assert_frame_equal(g_as[["B"]].head(1), df_as.loc[[0, 2], ["B"]]) + tm.assert_frame_equal(g_as[["A", "B"]].head(1), df_as.loc[[0, 2]]) - assert_frame_equal(g_not_as[[]].head(1), df_as.loc[[0, 2], []]) - assert_frame_equal(g_not_as[["A"]].head(1), df_as.loc[[0, 2], ["A"]]) - assert_frame_equal(g_not_as[["B"]].head(1), df_as.loc[[0, 2], ["B"]]) - assert_frame_equal(g_not_as[["A", "B"]].head(1), df_as.loc[[0, 2]]) + tm.assert_frame_equal(g_not_as[[]].head(1), df_as.loc[[0, 2], []]) + tm.assert_frame_equal(g_not_as[["A"]].head(1), df_as.loc[[0, 2], ["A"]]) + tm.assert_frame_equal(g_not_as[["B"]].head(1), df_as.loc[[0, 2], ["B"]]) + tm.assert_frame_equal(g_not_as[["A", "B"]].head(1), df_as.loc[[0, 2]]) def test_group_selection_cache(): @@ -442,26 +442,26 @@ def test_group_selection_cache(): g = df.groupby("A") result1 = g.head(n=2) result2 = g.nth(0) - assert_frame_equal(result1, df) - assert_frame_equal(result2, expected) + tm.assert_frame_equal(result1, df) + tm.assert_frame_equal(result2, expected) g = df.groupby("A") result1 = g.tail(n=2) result2 = g.nth(0) - assert_frame_equal(result1, df) - assert_frame_equal(result2, expected) + tm.assert_frame_equal(result1, df) + tm.assert_frame_equal(result2, expected) g = df.groupby("A") result1 = g.nth(0) result2 = g.head(n=2) - assert_frame_equal(result1, expected) - assert_frame_equal(result2, df) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, df) g = df.groupby("A") result1 = g.nth(0) result2 = g.tail(n=2) - assert_frame_equal(result1, expected) - assert_frame_equal(result2, df) + tm.assert_frame_equal(result1, expected) + tm.assert_frame_equal(result2, df) def test_nth_empty(): @@ -469,13 +469,13 @@ def test_nth_empty(): df = DataFrame(index=[0], columns=["a", "b", "c"]) result = df.groupby("a").nth(10) expected = DataFrame(index=Index([], name="a"), columns=["b", "c"]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.groupby(["a", "b"]).nth(10) expected = DataFrame( index=MultiIndex([[], []], [[], []], names=["a", "b"]), columns=["c"] ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_nth_column_order(): @@ -489,13 +489,13 @@ def test_nth_column_order(): expected = DataFrame( [["b", 100.0], ["c", 200.0]], columns=["C", "B"], index=Index([1, 2], name="A") ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.groupby("A").nth(-1, dropna="any") expected = DataFrame( [["a", 50.0], ["d", 150.0]], columns=["C", "B"], index=Index([1, 2], name="A") ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("dropna", [None, "any", "all"]) @@ -510,4 +510,4 @@ def test_nth_nan_in_grouper(dropna): [[2, 3], [6, 7]], columns=list("bc"), index=Index(["abc", "def"], name="a") ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 7acddec002d98..109382d97440e 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -11,8 +11,7 @@ from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range from pandas.core.groupby.grouper import Grouper from pandas.core.groupby.ops import BinGrouper -from pandas.util import testing as tm -from pandas.util.testing import assert_frame_equal, assert_series_equal +import pandas.util.testing as tm class TestGroupBy: @@ -50,14 +49,14 @@ def test_groupby_with_timegrouper(self): expected.iloc[[0, 6, 18], 0] = np.array([24, 6, 9], dtype="int64") result1 = df.resample("5D").sum() - assert_frame_equal(result1, expected) + tm.assert_frame_equal(result1, expected) df_sorted = df.sort_index() result2 = df_sorted.groupby(pd.Grouper(freq="5D")).sum() - assert_frame_equal(result2, expected) + tm.assert_frame_equal(result2, expected) result3 = df.groupby(pd.Grouper(freq="5D")).sum() - assert_frame_equal(result3, expected) + tm.assert_frame_equal(result3, expected) @pytest.mark.parametrize("should_sort", [True, False]) def test_groupby_with_timegrouper_methods(self, should_sort): @@ -131,7 +130,7 @@ def test_timegrouper_with_reg_groups(self): ).set_index(["Date", "Buyer"]) result = df.groupby([pd.Grouper(freq="A"), "Buyer"]).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = DataFrame( { @@ -146,7 +145,7 @@ def test_timegrouper_with_reg_groups(self): } ).set_index(["Date", "Buyer"]) result = df.groupby([pd.Grouper(freq="6MS"), "Buyer"]).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) df_original = DataFrame( { @@ -184,7 +183,7 @@ def test_timegrouper_with_reg_groups(self): ).set_index(["Date", "Buyer"]) result = df.groupby([pd.Grouper(freq="1D"), "Buyer"]).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.groupby([pd.Grouper(freq="1M"), "Buyer"]).sum() expected = DataFrame( @@ -198,12 +197,12 @@ def test_timegrouper_with_reg_groups(self): ], } ).set_index(["Date", "Buyer"]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # passing the name df = df.reset_index() result = df.groupby([pd.Grouper(freq="1M", key="Date"), "Buyer"]).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) with pytest.raises(KeyError, match="'The grouper name foo is not found'"): df.groupby([pd.Grouper(freq="1M", key="foo"), "Buyer"]).sum() @@ -211,9 +210,9 @@ def test_timegrouper_with_reg_groups(self): # passing the level df = df.set_index("Date") result = df.groupby([pd.Grouper(freq="1M", level="Date"), "Buyer"]).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.groupby([pd.Grouper(freq="1M", level=0), "Buyer"]).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) with pytest.raises(ValueError): df.groupby([pd.Grouper(freq="1M", level="foo"), "Buyer"]).sum() @@ -233,7 +232,7 @@ def test_timegrouper_with_reg_groups(self): ], } ).set_index(["Date", "Buyer"]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # error as we have both a level and a name! with pytest.raises(ValueError): @@ -246,19 +245,19 @@ def test_timegrouper_with_reg_groups(self): {"Quantity": [31], "Date": [datetime(2013, 10, 31, 0, 0)]} ).set_index("Date") result = df.groupby(pd.Grouper(freq="1M")).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.groupby([pd.Grouper(freq="1M")]).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = DataFrame( {"Quantity": [31], "Date": [datetime(2013, 11, 30, 0, 0)]} ).set_index("Date") result = df.groupby(pd.Grouper(freq="1M", key="Date")).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.groupby([pd.Grouper(freq="1M", key="Date")]).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("freq", ["D", "M", "A", "Q-APR"]) def test_timegrouper_with_reg_groups_freq(self, freq): @@ -316,10 +315,10 @@ def test_timegrouper_with_reg_groups_freq(self, freq): .groupby([pd.Grouper(freq=freq), "user_id"])["whole_cost"] .sum() ) - assert_series_equal(result1, expected) + tm.assert_series_equal(result1, expected) result2 = df.groupby([pd.Grouper(freq=freq), "user_id"])["whole_cost"].sum() - assert_series_equal(result2, expected) + tm.assert_series_equal(result2, expected) def test_timegrouper_get_group(self): # GH 6914 @@ -353,7 +352,7 @@ def test_timegrouper_get_group(self): for t, expected in zip(dt_list, expected_list): dt = pd.Timestamp(t) result = grouped.get_group(dt) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # multiple grouping expected_list = [ @@ -368,7 +367,7 @@ def test_timegrouper_get_group(self): for (b, t), expected in zip(g_list, expected_list): dt = pd.Timestamp(t) result = grouped.get_group((b, dt)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # with index df_original = df_original.set_index("Date") @@ -385,7 +384,7 @@ def test_timegrouper_get_group(self): for t, expected in zip(dt_list, expected_list): dt = pd.Timestamp(t) result = grouped.get_group(dt) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_timegrouper_apply_return_type_series(self): # Using `apply` with the `TimeGrouper` should give the @@ -400,7 +399,7 @@ def sumfunc_series(x): expected = df.groupby(pd.Grouper(key="date")).apply(sumfunc_series) result = df_dt.groupby(pd.Grouper(freq="M", key="date")).apply(sumfunc_series) - assert_frame_equal( + tm.assert_frame_equal( result.reset_index(drop=True), expected.reset_index(drop=True) ) @@ -417,7 +416,7 @@ def sumfunc_value(x): expected = df.groupby(pd.Grouper(key="date")).apply(sumfunc_value) result = df_dt.groupby(Grouper(freq="M", key="date")).apply(sumfunc_value) - assert_series_equal( + tm.assert_series_equal( result.reset_index(drop=True), expected.reset_index(drop=True) ) @@ -493,7 +492,7 @@ def test_groupby_groups_datetimeindex_tz(self): ) result = df.groupby(["datetime", "label"]).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # by level didx = pd.DatetimeIndex(dates, tz="Asia/Tokyo") @@ -513,7 +512,7 @@ def test_groupby_groups_datetimeindex_tz(self): ) result = df.groupby(level=0).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_frame_datetime64_handling_groupby(self): # it works! @@ -550,7 +549,7 @@ def test_groupby_multi_timezone(self): name="date", dtype=object, ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) tz = "America/Chicago" res_values = df.groupby("tz").date.get_group(tz) @@ -561,7 +560,7 @@ def test_groupby_multi_timezone(self): name="date", ) expected = pd.to_datetime(exp_values).dt.tz_localize(tz) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_groupby_groups_periods(self): dates = [ @@ -602,7 +601,7 @@ def test_groupby_groups_periods(self): ) result = df.groupby(["period", "label"]).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # by level didx = pd.PeriodIndex(dates, freq="H") @@ -622,7 +621,7 @@ def test_groupby_groups_periods(self): ) result = df.groupby(level=0).sum() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_groupby_first_datetime64(self): df = DataFrame([(1, 1351036800000000000), (2, 1351036800000000000)]) @@ -644,7 +643,7 @@ def test_groupby_max_datetime64(self): df = DataFrame(dict(A=Timestamp("20130101"), B=np.arange(5))) expected = df.groupby("A")["A"].apply(lambda x: x.max()) result = df.groupby("A")["A"].max() - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_groupby_datetime64_32_bit(self): # GH 6410 / numpy 4328 @@ -653,7 +652,7 @@ def test_groupby_datetime64_32_bit(self): df = DataFrame({"A": range(2), "B": [pd.Timestamp("2000-01-1")] * 2}) result = df.groupby("A")["B"].transform(min) expected = Series([pd.Timestamp("2000-01-1")] * 2, name="B") - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_groupby_with_timezone_selection(self): # GH 11616 @@ -714,10 +713,10 @@ def test_first_last_max_min_on_time_data(self): grouped_test = df_test.groupby("group") grouped_ref = df_ref.groupby("group") - assert_frame_equal(grouped_ref.max(), grouped_test.max()) - assert_frame_equal(grouped_ref.min(), grouped_test.min()) - assert_frame_equal(grouped_ref.first(), grouped_test.first()) - assert_frame_equal(grouped_ref.last(), grouped_test.last()) + tm.assert_frame_equal(grouped_ref.max(), grouped_test.max()) + tm.assert_frame_equal(grouped_ref.min(), grouped_test.min()) + tm.assert_frame_equal(grouped_ref.first(), grouped_test.first()) + tm.assert_frame_equal(grouped_ref.last(), grouped_test.last()) def test_nunique_with_timegrouper_and_nat(self): # GH 17575 @@ -755,4 +754,4 @@ def test_scalar_call_versus_list_call(self): grouped = data_frame.groupby([grouper]) expected = grouped.count() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index d3972e6ba9008..db44a4a57230c 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -19,8 +19,7 @@ date_range, ) from pandas.core.groupby.groupby import DataError -from pandas.util import testing as tm -from pandas.util.testing import assert_frame_equal, assert_series_equal +import pandas.util.testing as tm def assert_fp_equal(a, b): @@ -54,7 +53,7 @@ def test_transform(): .mean() ) result = df.groupby(key).transform(lambda x: x - x.mean()).groupby(key).mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def demean(arr): return arr - arr.mean() @@ -67,7 +66,7 @@ def demean(arr): key = ["one", "two", "one", "two", "one"] result = people.groupby(key).transform(demean).groupby(key).mean() expected = people.groupby(key).apply(demean).groupby(key).mean() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # GH 8430 df = tm.makeTimeDataFrame() @@ -91,10 +90,10 @@ def test_transform_fast(): expected = pd.Series(values, index=df.index, name="val") result = grp.transform(np.mean) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = grp.transform("mean") - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # GH 12737 df = pd.DataFrame( @@ -118,18 +117,18 @@ def test_transform_fast(): {"f": [1.1, 2.1, 2.1, 4.5], "d": dates, "i": [1, 2, 2, 4]}, columns=["f", "i", "d"], ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # selection result = df.groupby("grouping")[["f", "i"]].transform("first") expected = expected[["f", "i"]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # dup columns df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["g", "a", "a"]) result = df.groupby("g").transform("first") expected = df.drop("g", axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_transform_broadcast(tsframe, ts): @@ -179,26 +178,26 @@ def test_transform_axis(tsframe): grouped = ts.groupby(lambda x: x.weekday()) result = ts - grouped.transform("mean") expected = grouped.apply(lambda x: x - x.mean()) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) ts = ts.T grouped = ts.groupby(lambda x: x.weekday(), axis=1) result = ts - grouped.transform("mean") expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # non-monotonic ts = tso.iloc[[1, 0] + list(range(2, len(base)))] grouped = ts.groupby(lambda x: x.weekday()) result = ts - grouped.transform("mean") expected = grouped.apply(lambda x: x - x.mean()) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) ts = ts.T grouped = ts.groupby(lambda x: x.weekday(), axis=1) result = ts - grouped.transform("mean") expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_transform_dtype(): @@ -207,7 +206,7 @@ def test_transform_dtype(): df = DataFrame([[1, 3], [2, 3]]) result = df.groupby(1).transform("mean") expected = DataFrame([[1.5], [1.5]]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_transform_bug(): @@ -216,7 +215,7 @@ def test_transform_bug(): df = DataFrame(dict(A=Timestamp("20130101"), B=np.arange(5))) result = df.groupby("A")["B"].transform(lambda x: x.rank(ascending=False)) expected = Series(np.arange(5, 0, step=-1), name="B") - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_transform_numeric_to_boolean(): @@ -226,11 +225,11 @@ def test_transform_numeric_to_boolean(): df = pd.DataFrame({"A": [1.1, 2.2], "B": [1, 2]}) result = df.groupby("B").A.transform(lambda x: True) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}) result = df.groupby("B").A.transform(lambda x: True) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_transform_datetime_to_timedelta(): @@ -245,11 +244,11 @@ def test_transform_datetime_to_timedelta(): df.groupby("A")["A"].transform(lambda x: x.max() - x.min() + base_time) - base_time ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # this does date math and causes the transform to return timedelta result = df.groupby("A")["A"].transform(lambda x: x.max() - x.min()) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_transform_datetime_to_numeric(): @@ -261,7 +260,7 @@ def test_transform_datetime_to_numeric(): ) expected = Series([-0.5, 0.5], name="b") - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # convert dt to int df = DataFrame({"a": 1, "b": date_range("2015-01-01", periods=2, freq="D")}) @@ -270,7 +269,7 @@ def test_transform_datetime_to_numeric(): ) expected = Series([0, 1], name="b") - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_transform_casting(): @@ -315,7 +314,7 @@ def test_dispatch_transform(tsframe): filled = grouped.fillna(method="pad") fillit = lambda x: x.fillna(method="pad") expected = df.groupby(lambda x: x.month).transform(fillit) - assert_frame_equal(filled, expected) + tm.assert_frame_equal(filled, expected) def test_transform_select_columns(df): @@ -325,7 +324,7 @@ def test_transform_select_columns(df): selection = df[["C", "D"]] expected = selection.groupby(df["A"]).transform(f) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_transform_exclude_nuisance(df): @@ -339,17 +338,17 @@ def test_transform_exclude_nuisance(df): expected = DataFrame(expected) result = df.groupby("A").transform(np.mean) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_transform_function_aliases(df): result = df.groupby("A").transform("mean") expected = df.groupby("A").transform(np.mean) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.groupby("A")["C"].transform("mean") expected = df.groupby("A")["C"].transform(np.mean) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_series_fast_transform_date(): @@ -365,7 +364,7 @@ def test_series_fast_transform_date(): pd.Timestamp("2014-1-4"), ] expected = pd.Series(dates, name="d") - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_transform_length(): @@ -383,7 +382,7 @@ def nsum(x): df.groupby("col1")["col2"].transform(nsum), ] for result in results: - assert_series_equal(result, expected, check_names=False) + tm.assert_series_equal(result, expected, check_names=False) def test_transform_coercion(): @@ -396,7 +395,7 @@ def test_transform_coercion(): expected = g.transform(np.mean) result = g.transform(lambda x: np.mean(x)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_groupby_transform_with_int(): @@ -417,14 +416,14 @@ def test_groupby_transform_with_int(): expected = DataFrame( dict(B=np.nan, C=Series([-1, 0, 1, -1, 0, 1], dtype="float64")) ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # int case df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, C=[1, 2, 3, 1, 2, 3], D="foo")) with np.errstate(all="ignore"): result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) expected = DataFrame(dict(B=np.nan, C=[-1, 0, 1, -1, 0, 1])) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # int that needs float conversion s = Series([2, 3, 4, 10, 5, -1]) @@ -437,12 +436,12 @@ def test_groupby_transform_with_int(): s2 = s.iloc[3:6] s2 = (s2 - s2.mean()) / s2.std() expected = DataFrame(dict(B=np.nan, C=concat([s1, s2]))) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # int downcasting result = df.groupby("A").transform(lambda x: x * 2 / 2) expected = DataFrame(dict(B=1, C=[2, 3, 4, 10, 5, -1])) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_groupby_transform_with_nan_group(): @@ -452,7 +451,7 @@ def test_groupby_transform_with_nan_group(): expected = pd.Series( [1.0, 1.0, 2.0, 3.0, np.nan, 6.0, 6.0, 9.0, 9.0, 9.0], name="a" ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_transform_mixed_type(): @@ -479,7 +478,7 @@ def f(group): with pd.option_context("mode.chained_assignment", None): for key, group in grouped: res = f(group) - assert_frame_equal(res, result.loc[key]) + tm.assert_frame_equal(res, result.loc[key]) def _check_cython_group_transform_cumulative(pd_op, np_op, dtype): @@ -838,11 +837,11 @@ def interweave(list_obj): if as_series: result = getattr(df.groupby("key")["val"], fill_method)(limit=limit) exp = Series(_exp_vals, name="val") - assert_series_equal(result, exp) + tm.assert_series_equal(result, exp) else: result = getattr(df.groupby("key"), fill_method)(limit=limit) exp = DataFrame({"val": _exp_vals}) - assert_frame_equal(result, exp) + tm.assert_frame_equal(result, exp) @pytest.mark.parametrize("fill_method", ["ffill", "bfill"]) @@ -989,7 +988,7 @@ def test_transform_absent_categories(func): df = DataFrame(dict(x=Categorical(x_vals, x_cats), y=y)) result = getattr(df.y.groupby(df.x), func)() expected = df.y - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) @pytest.mark.parametrize("func", ["ffill", "bfill", "shift"]) @@ -1000,7 +999,7 @@ def test_ffill_not_in_axis(func, key, val): result = getattr(df.groupby(**{key: val}), func)() expected = df - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_transform_invalid_name_raises(): @@ -1073,4 +1072,4 @@ def test_transform_lambda_with_datetimetz(): ], name="time", ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected)