From bf1e011eacdc1c2e8012cd0526a6572eb3ad0a53 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 6 Oct 2022 20:00:08 -0700 Subject: [PATCH 1/2] CLN: test_nanops/take.py --- pandas/tests/test_nanops.py | 257 ++++++++++++++++++------------------ pandas/tests/test_take.py | 28 ---- 2 files changed, 129 insertions(+), 156 deletions(-) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index f46d5c8e2590e..efe75e3c043e0 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -299,18 +299,18 @@ def test_nanmean(self, skipna): nanops.nanmean, np.mean, skipna, allow_obj=False, allow_date=False ) - def test_nanmean_overflow(self): + @pytest.mark.parametrize("val", [2**55, -(2**55), 20150515061816532]) + def test_nanmean_overflow(self, val): # GH 10155 # In the previous implementation mean can overflow for int dtypes, it # is now consistent with numpy - for a in [2**55, -(2**55), 20150515061816532]: - s = Series(a, index=range(500), dtype=np.int64) - result = s.mean() - np_result = s.values.mean() - assert result == a - assert result == np_result - assert result.dtype == np.float64 + s = Series(val, index=range(500), dtype=np.int64) + result = s.mean() + np_result = s.values.mean() + assert result == val + assert result == np_result + assert result.dtype == np.float64 @pytest.mark.parametrize( "dtype", @@ -623,7 +623,19 @@ def test_nancov(self): targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1] self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1) - def check_nancomp(self, checkfun, targ0): + @pytest.mark.parametrize( + "op,nanop", + [ + (operator.eq, nanops.naneq), + (operator.ne, nanops.nanne), + (operator.gt, nanops.nangt), + (operator.ge, nanops.nange), + (operator.lt, nanops.nanlt), + (operator.le, nanops.nanle), + ], + ) + def test_nan_comparison(self, op, nanop): + targ0 = op(self.arr_float, self.arr_float1) arr_float = self.arr_float arr_float1 = self.arr_float1 arr_nan = self.arr_nan @@ -633,18 +645,18 @@ def check_nancomp(self, checkfun, targ0): arr_nan_float1 = self.arr_nan_float1 while targ0.ndim: - res0 = checkfun(arr_float, arr_float1) + res0 = nanop(arr_float, arr_float1) tm.assert_almost_equal(targ0, res0) if targ0.ndim > 1: targ1 = np.vstack([targ0, arr_nan]) else: targ1 = np.hstack([targ0, arr_nan]) - res1 = checkfun(arr_float_nan, arr_float1_nan) + res1 = nanop(arr_float_nan, arr_float1_nan) tm.assert_numpy_array_equal(targ1, res1, check_dtype=False) targ2 = arr_nan_nan - res2 = checkfun(arr_float_nan, arr_nan_float1) + res2 = nanop(arr_float_nan, arr_nan_float1) tm.assert_numpy_array_equal(targ2, res2, check_dtype=False) # Lower dimension for next step in the loop @@ -658,48 +670,38 @@ def check_nancomp(self, checkfun, targ0): targ0 = np.take(targ0, 0, axis=-1) @pytest.mark.parametrize( - "op,nanop", + "arr, correct", [ - (operator.eq, nanops.naneq), - (operator.ne, nanops.nanne), - (operator.gt, nanops.nangt), - (operator.ge, nanops.nange), - (operator.lt, nanops.nanlt), - (operator.le, nanops.nanle), + ("arr_complex", False), + ("arr_int", False), + ("arr_bool", False), + ("arr_str", False), + ("arr_utf", False), + ("arr_complex", False), + ("arr_complex_nan", False), + ("arr_nan_nanj", False), + ("arr_nan_infj", True), + ("arr_complex_nan_infj", True), ], ) - def test_nan_comparison(self, op, nanop): - targ0 = op(self.arr_float, self.arr_float1) - self.check_nancomp(nanop, targ0) - - def check_bool(self, func, value, correct): - while getattr(value, "ndim", True): - res0 = func(value) + def test__has_infs_non_float(self, arr, correct): + val = getattr(self, arr) + while getattr(val, "ndim", True): + res0 = nanops._has_infs(val) if correct: assert res0 else: assert not res0 - if not hasattr(value, "ndim"): + if not hasattr(val, "ndim"): break # Reduce dimension for next step in the loop - value = np.take(value, 0, axis=-1) + val = np.take(val, 0, axis=-1) - def test__has_infs(self): - pairs = [ - ("arr_complex", False), - ("arr_int", False), - ("arr_bool", False), - ("arr_str", False), - ("arr_utf", False), - ("arr_complex", False), - ("arr_complex_nan", False), - ("arr_nan_nanj", False), - ("arr_nan_infj", True), - ("arr_complex_nan_infj", True), - ] - pairs_float = [ + @pytest.mark.parametrize( + "arr, correct", + [ ("arr_float", False), ("arr_nan", False), ("arr_float_nan", False), @@ -709,17 +711,25 @@ def test__has_infs(self): ("arr_nan_inf", True), ("arr_float_nan_inf", True), ("arr_nan_nan_inf", True), - ] + ], + ) + @pytest.mark.parametrize("astype", [None, "f4", "f2"]) + def test__has_infs_floats(self, arr, correct, astype): + val = getattr(self, arr) + if astype is not None: + val = val.astype(astype) + while getattr(val, "ndim", True): + res0 = nanops._has_infs(val) + if correct: + assert res0 + else: + assert not res0 - for arr, correct in pairs: - val = getattr(self, arr) - self.check_bool(nanops._has_infs, val, correct) + if not hasattr(val, "ndim"): + break - for arr, correct in pairs_float: - val = getattr(self, arr) - self.check_bool(nanops._has_infs, val, correct) - self.check_bool(nanops._has_infs, val.astype("f4"), correct) - self.check_bool(nanops._has_infs, val.astype("f2"), correct) + # Reduce dimension for next step in the loop + val = np.take(val, 0, axis=-1) def test__bn_ok_dtype(self): assert nanops._bn_ok_dtype(self.arr_float.dtype, "test") @@ -784,46 +794,47 @@ def test_non_convertable_values(self): class TestNanvarFixedValues: # xref GH10242 + # Samples from a normal distribution. + @pytest.fixture + def variance(self): + return 3.0 - def setup_method(self): - # Samples from a normal distribution. - self.variance = variance = 3.0 - self.samples = self.prng.normal(scale=variance**0.5, size=100000) + @pytest.fixture + def samples(self, variance): + return self.prng.normal(scale=variance**0.5, size=100000) - def test_nanvar_all_finite(self): - samples = self.samples + def test_nanvar_all_finite(self, samples, variance): actual_variance = nanops.nanvar(samples) - tm.assert_almost_equal(actual_variance, self.variance, rtol=1e-2) + tm.assert_almost_equal(actual_variance, variance, rtol=1e-2) - def test_nanvar_nans(self): - samples = np.nan * np.ones(2 * self.samples.shape[0]) - samples[::2] = self.samples + def test_nanvar_nans(self, samples, variance): + samples_test = np.nan * np.ones(2 * samples.shape[0]) + samples_test[::2] = samples - actual_variance = nanops.nanvar(samples, skipna=True) - tm.assert_almost_equal(actual_variance, self.variance, rtol=1e-2) + actual_variance = nanops.nanvar(samples_test, skipna=True) + tm.assert_almost_equal(actual_variance, variance, rtol=1e-2) - actual_variance = nanops.nanvar(samples, skipna=False) + actual_variance = nanops.nanvar(samples_test, skipna=False) tm.assert_almost_equal(actual_variance, np.nan, rtol=1e-2) - def test_nanstd_nans(self): - samples = np.nan * np.ones(2 * self.samples.shape[0]) - samples[::2] = self.samples + def test_nanstd_nans(self, samples, variance): + samples_test = np.nan * np.ones(2 * samples.shape[0]) + samples_test[::2] = samples - actual_std = nanops.nanstd(samples, skipna=True) - tm.assert_almost_equal(actual_std, self.variance**0.5, rtol=1e-2) + actual_std = nanops.nanstd(samples_test, skipna=True) + tm.assert_almost_equal(actual_std, variance**0.5, rtol=1e-2) - actual_std = nanops.nanvar(samples, skipna=False) + actual_std = nanops.nanvar(samples_test, skipna=False) tm.assert_almost_equal(actual_std, np.nan, rtol=1e-2) - def test_nanvar_axis(self): + def test_nanvar_axis(self, samples, variance): # Generate some sample data. - samples_norm = self.samples - samples_unif = self.prng.uniform(size=samples_norm.shape[0]) - samples = np.vstack([samples_norm, samples_unif]) + samples_unif = self.prng.uniform(size=samples.shape[0]) + samples = np.vstack([samples, samples_unif]) actual_variance = nanops.nanvar(samples, axis=1) tm.assert_almost_equal( - actual_variance, np.array([self.variance, 1.0 / 12]), rtol=1e-2 + actual_variance, np.array([variance, 1.0 / 12]), rtol=1e-2 ) def test_nanvar_ddof(self): @@ -901,18 +912,21 @@ def prng(self): class TestNanskewFixedValues: # xref GH 11974 + # Test data + skewness value (computed with scipy.stats.skew) + @pytest.fixture + def samples(self): + return np.sin(np.linspace(0, 1, 200)) - def setup_method(self): - # Test data + skewness value (computed with scipy.stats.skew) - self.samples = np.sin(np.linspace(0, 1, 200)) - self.actual_skew = -0.1875895205961754 + @pytest.fixture + def actual_skew(self): + return -0.1875895205961754 - def test_constant_series(self): + @pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5]) + def test_constant_series(self, val): # xref GH 11974 - for val in [3075.2, 3075.3, 3075.5]: - data = val * np.ones(300) - skew = nanops.nanskew(data) - assert skew == 0.0 + data = val * np.ones(300) + skew = nanops.nanskew(data) + assert skew == 0.0 def test_all_finite(self): alpha, beta = 0.3, 0.1 @@ -923,24 +937,24 @@ def test_all_finite(self): right_tailed = self.prng.beta(alpha, beta, size=100) assert nanops.nanskew(right_tailed) > 0 - def test_ground_truth(self): - skew = nanops.nanskew(self.samples) - tm.assert_almost_equal(skew, self.actual_skew) + def test_ground_truth(self, samples, actual_skew): + skew = nanops.nanskew(samples) + tm.assert_almost_equal(skew, actual_skew) - def test_axis(self): - samples = np.vstack([self.samples, np.nan * np.ones(len(self.samples))]) + def test_axis(self, samples, actual_skew): + samples = np.vstack([samples, np.nan * np.ones(len(samples))]) skew = nanops.nanskew(samples, axis=1) - tm.assert_almost_equal(skew, np.array([self.actual_skew, np.nan])) + tm.assert_almost_equal(skew, np.array([actual_skew, np.nan])) - def test_nans(self): - samples = np.hstack([self.samples, np.nan]) + def test_nans(self, samples): + samples = np.hstack([samples, np.nan]) skew = nanops.nanskew(samples, skipna=False) assert np.isnan(skew) - def test_nans_skipna(self): - samples = np.hstack([self.samples, np.nan]) + def test_nans_skipna(self, samples, actual_skew): + samples = np.hstack([samples, np.nan]) skew = nanops.nanskew(samples, skipna=True) - tm.assert_almost_equal(skew, self.actual_skew) + tm.assert_almost_equal(skew, actual_skew) @property def prng(self): @@ -950,11 +964,14 @@ def prng(self): class TestNankurtFixedValues: # xref GH 11974 + # Test data + kurtosis value (computed with scipy.stats.kurtosis) + @pytest.fixture + def samples(self): + return np.sin(np.linspace(0, 1, 200)) - def setup_method(self): - # Test data + kurtosis value (computed with scipy.stats.kurtosis) - self.samples = np.sin(np.linspace(0, 1, 200)) - self.actual_kurt = -1.2058303433799713 + @pytest.fixture + def actual_kurt(self): + return -1.2058303433799713 @pytest.mark.parametrize("val", [3075.2, 3075.3, 3075.5]) def test_constant_series(self, val): @@ -972,24 +989,24 @@ def test_all_finite(self): right_tailed = self.prng.beta(alpha, beta, size=100) assert nanops.nankurt(right_tailed) > 0 - def test_ground_truth(self): - kurt = nanops.nankurt(self.samples) - tm.assert_almost_equal(kurt, self.actual_kurt) + def test_ground_truth(self, samples, actual_kurt): + kurt = nanops.nankurt(samples) + tm.assert_almost_equal(kurt, actual_kurt) - def test_axis(self): - samples = np.vstack([self.samples, np.nan * np.ones(len(self.samples))]) + def test_axis(self, samples, actual_kurt): + samples = np.vstack([samples, np.nan * np.ones(len(samples))]) kurt = nanops.nankurt(samples, axis=1) - tm.assert_almost_equal(kurt, np.array([self.actual_kurt, np.nan])) + tm.assert_almost_equal(kurt, np.array([actual_kurt, np.nan])) - def test_nans(self): - samples = np.hstack([self.samples, np.nan]) + def test_nans(self, samples): + samples = np.hstack([samples, np.nan]) kurt = nanops.nankurt(samples, skipna=False) assert np.isnan(kurt) - def test_nans_skipna(self): - samples = np.hstack([self.samples, np.nan]) + def test_nans_skipna(self, samples, actual_kurt): + samples = np.hstack([samples, np.nan]) kurt = nanops.nankurt(samples, skipna=True) - tm.assert_almost_equal(kurt, self.actual_kurt) + tm.assert_almost_equal(kurt, actual_kurt) @property def prng(self): @@ -1123,22 +1140,6 @@ def test_check_below_min_count__large_shape(min_count, expected_result): @pytest.mark.parametrize("func", ["nanmean", "nansum"]) -@pytest.mark.parametrize( - "dtype", - [ - np.uint8, - np.uint16, - np.uint32, - np.uint64, - np.int8, - np.int16, - np.int32, - np.int64, - np.float16, - np.float32, - np.float64, - ], -) -def test_check_bottleneck_disallow(dtype, func): +def test_check_bottleneck_disallow(any_real_numpy_dtype, func): # GH 42878 bottleneck sometimes produces unreliable results for mean and sum - assert not nanops._bn_ok_dtype(dtype, func) + assert not nanops._bn_ok_dtype(np.dtype(any_real_numpy_dtype).type, func) diff --git a/pandas/tests/test_take.py b/pandas/tests/test_take.py index f0737f7dc4cce..114e28ea4e2a1 100644 --- a/pandas/tests/test_take.py +++ b/pandas/tests/test_take.py @@ -10,34 +10,6 @@ import pandas.core.algorithms as algos -@pytest.fixture(params=[True, False]) -def writeable(request): - return request.param - - -# Check that take_nd works both with writeable arrays -# (in which case fast typed memory-views implementation) -# and read-only arrays alike. -@pytest.fixture( - params=[ - (np.float64, True), - (np.float32, True), - (np.uint64, False), - (np.uint32, False), - (np.uint16, False), - (np.uint8, False), - (np.int64, False), - (np.int32, False), - (np.int16, False), - (np.int8, False), - (np.object_, True), - (np.bool_, False), - ] -) -def dtype_can_hold_na(request): - return request.param - - @pytest.fixture( params=[ (np.int8, np.int16(127), np.int8), From 092f5ff6c192c67a86a20d2e4529458f1aa41ec0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 7 Oct 2022 11:34:45 -0700 Subject: [PATCH 2/2] s -> ser --- pandas/tests/test_nanops.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index efe75e3c043e0..005ef6747da95 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -305,9 +305,9 @@ def test_nanmean_overflow(self, val): # In the previous implementation mean can overflow for int dtypes, it # is now consistent with numpy - s = Series(val, index=range(500), dtype=np.int64) - result = s.mean() - np_result = s.values.mean() + ser = Series(val, index=range(500), dtype=np.int64) + result = ser.mean() + np_result = ser.values.mean() assert result == val assert result == np_result assert result.dtype == np.float64 @@ -328,11 +328,11 @@ def test_returned_dtype(self, dtype): # no float128 available return - s = Series(range(10), dtype=dtype) + ser = Series(range(10), dtype=dtype) group_a = ["mean", "std", "var", "skew", "kurt"] group_b = ["min", "max"] for method in group_a + group_b: - result = getattr(s, method)() + result = getattr(ser, method)() if is_integer_dtype(dtype) and method in group_a: assert result.dtype == np.float64 else: @@ -1103,10 +1103,10 @@ def test_numpy_ops(numpy_op, expected): ) def test_nanops_independent_of_mask_param(operation): # GH22764 - s = Series([1, 2, np.nan, 3, np.nan, 4]) - mask = s.isna() - median_expected = operation(s) - median_result = operation(s, mask=mask) + ser = Series([1, 2, np.nan, 3, np.nan, 4]) + mask = ser.isna() + median_expected = operation(ser) + median_result = operation(ser, mask=mask) assert median_expected == median_result