-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
Closed
Labels
Description
A couple classes of failures
2020-05-27T03:46:00.4640450Z ______________ test_multiple_agg_funcs[rolling-2-expected_vals0] ______________
2020-05-27T03:46:00.4640874Z [gw0] win32 -- Python 3.7.7 D:\a\1\s\test_venv\Scripts\python.exe
2020-05-27T03:46:00.4641216Z
2020-05-27T03:46:00.4641446Z func = 'rolling', window_size = 2
2020-05-27T03:46:00.4641974Z expected_vals = [[nan, nan, nan, nan], [15.0, 20.0, 25.0, 20.0], [25.0, 30.0, 35.0, 30.0], [nan, nan, nan, nan], [20.0, 30.0, 35.0, 30.0], [35.0, 40.0, 60.0, 40.0], ...]
2020-05-27T03:46:00.4642426Z
2020-05-27T03:46:00.4642646Z @pytest.mark.parametrize(
2020-05-27T03:46:00.4642970Z "func,window_size,expected_vals",
2020-05-27T03:46:00.4643468Z [
2020-05-27T03:46:00.4643722Z (
2020-05-27T03:46:00.4643997Z "rolling",
2020-05-27T03:46:00.4644235Z 2,
2020-05-27T03:46:00.4644500Z [
2020-05-27T03:46:00.4644818Z [np.nan, np.nan, np.nan, np.nan],
2020-05-27T03:46:00.4645140Z [15.0, 20.0, 25.0, 20.0],
2020-05-27T03:46:00.4645482Z [25.0, 30.0, 35.0, 30.0],
2020-05-27T03:46:00.4645799Z [np.nan, np.nan, np.nan, np.nan],
2020-05-27T03:46:00.4646157Z [20.0, 30.0, 35.0, 30.0],
2020-05-27T03:46:00.4646503Z [35.0, 40.0, 60.0, 40.0],
2020-05-27T03:46:00.4646811Z [60.0, 80.0, 85.0, 80],
2020-05-27T03:46:00.4647113Z ],
2020-05-27T03:46:00.4647372Z ),
2020-05-27T03:46:00.4647589Z (
2020-05-27T03:46:00.4647855Z "expanding",
2020-05-27T03:46:00.4648104Z None,
2020-05-27T03:46:00.4648376Z [
2020-05-27T03:46:00.4648680Z [10.0, 10.0, 20.0, 20.0],
2020-05-27T03:46:00.4648983Z [15.0, 20.0, 25.0, 20.0],
2020-05-27T03:46:00.4649326Z [20.0, 30.0, 30.0, 20.0],
2020-05-27T03:46:00.4649670Z [10.0, 10.0, 30.0, 30.0],
2020-05-27T03:46:00.4649972Z [20.0, 30.0, 35.0, 30.0],
2020-05-27T03:46:00.4650322Z [26.666667, 40.0, 50.0, 30.0],
2020-05-27T03:46:00.4650683Z [40.0, 80.0, 60.0, 30.0],
2020-05-27T03:46:00.4650946Z ],
2020-05-27T03:46:00.4651208Z ),
2020-05-27T03:46:00.4651414Z ],
2020-05-27T03:46:00.4651646Z )
2020-05-27T03:46:00.4651997Z def test_multiple_agg_funcs(func, window_size, expected_vals):
2020-05-27T03:46:00.4652328Z # GH 15072
2020-05-27T03:46:00.4652605Z df = pd.DataFrame(
2020-05-27T03:46:00.4652839Z [
2020-05-27T03:46:00.4653114Z ["A", 10, 20],
2020-05-27T03:46:00.4653414Z ["A", 20, 30],
2020-05-27T03:46:00.4653669Z ["A", 30, 40],
2020-05-27T03:46:00.4653966Z ["B", 10, 30],
2020-05-27T03:46:00.4654266Z ["B", 30, 40],
2020-05-27T03:46:00.4654520Z ["B", 40, 80],
2020-05-27T03:46:00.4654822Z ["B", 80, 90],
2020-05-27T03:46:00.4655056Z ],
2020-05-27T03:46:00.4655354Z columns=["stock", "low", "high"],
2020-05-27T03:46:00.4655642Z )
2020-05-27T03:46:00.4655833Z
2020-05-27T03:46:00.4656116Z f = getattr(df.groupby("stock"), func)
2020-05-27T03:46:00.4656545Z if window_size:
2020-05-27T03:46:00.4656817Z window = f(window_size)
2020-05-27T03:46:00.4657104Z else:
2020-05-27T03:46:00.4657337Z window = f()
2020-05-27T03:46:00.4657591Z
2020-05-27T03:46:00.4657872Z index = pd.MultiIndex.from_tuples(
2020-05-27T03:46:00.4658235Z [("A", 0), ("A", 1), ("A", 2), ("B", 3), ("B", 4), ("B", 5), ("B", 6)],
2020-05-27T03:46:00.4658625Z names=["stock", None],
2020-05-27T03:46:00.4658902Z )
2020-05-27T03:46:00.4659155Z columns = pd.MultiIndex.from_tuples(
2020-05-27T03:46:00.4659548Z [("low", "mean"), ("low", "max"), ("high", "mean"), ("high", "min")]
2020-05-27T03:46:00.4659860Z )
2020-05-27T03:46:00.4660195Z expected = pd.DataFrame(expected_vals, index=index, columns=columns)
2020-05-27T03:46:00.4660526Z
2020-05-27T03:46:00.4660750Z result = window.agg(
2020-05-27T03:46:00.4661122Z > OrderedDict((("low", ["mean", "max"]), ("high", ["mean", "min"])))
2020-05-27T03:46:00.4661464Z )
2020-05-27T03:46:00.4661630Z
2020-05-27T03:46:00.4661965Z test_venv\lib\site-packages\pandas\tests\window\test_api.py:346:
2020-05-27T03:46:00.4662381Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2020-05-27T03:46:00.4662906Z test_venv\lib\site-packages\pandas\core\window\rolling.py:2033: in aggregate
2020-05-27T03:46:00.4663339Z return super().aggregate(func, *args, **kwargs)
2020-05-27T03:46:00.4663730Z test_venv\lib\site-packages\pandas\core\window\rolling.py:603: in aggregate
2020-05-27T03:46:00.4664169Z result, how = self._aggregate(func, *args, **kwargs)
2020-05-27T03:46:00.4664599Z test_venv\lib\site-packages\pandas\core\base.py:417: in _aggregate
2020-05-27T03:46:00.4664941Z result = _agg(arg, _agg_1dim)
2020-05-27T03:46:00.4665315Z test_venv\lib\site-packages\pandas\core\base.py:384: in _agg
2020-05-27T03:46:00.4665658Z result[fname] = func(fname, agg_how)
2020-05-27T03:46:00.4666056Z test_venv\lib\site-packages\pandas\core\base.py:368: in _agg_1dim
2020-05-27T03:46:00.4666433Z return colg.aggregate(how)
2020-05-27T03:46:00.4666793Z test_venv\lib\site-packages\pandas\core\window\rolling.py:2033: in aggregate
2020-05-27T03:46:00.4667225Z return super().aggregate(func, *args, **kwargs)
2020-05-27T03:46:00.4667657Z test_venv\lib\site-packages\pandas\core\window\rolling.py:603: in aggregate
2020-05-27T03:46:00.4668054Z result, how = self._aggregate(func, *args, **kwargs)
2020-05-27T03:46:00.4668471Z test_venv\lib\site-packages\pandas\core\base.py:475: in _aggregate
2020-05-27T03:46:00.4668902Z return self._aggregate_multiple_funcs(arg, _axis=_axis), None
2020-05-27T03:46:00.4669286Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2020-05-27T03:46:00.4669590Z
2020-05-27T03:46:00.4669874Z self = RollingGroupby [window=2,center=False,axis=0], arg = ['mean', 'max']
2020-05-27T03:46:00.4670218Z _axis = 0
2020-05-27T03:46:00.4670378Z
2020-05-27T03:46:00.4670690Z def _aggregate_multiple_funcs(self, arg, _axis):
2020-05-27T03:46:00.4671087Z from pandas.core.reshape.concat import concat
2020-05-27T03:46:00.4671345Z
2020-05-27T03:46:00.4671605Z if _axis != 0:
2020-05-27T03:46:00.4671974Z raise NotImplementedError("axis other than 0 is not supported")
2020-05-27T03:46:00.4672267Z
2020-05-27T03:46:00.4672562Z if self._selected_obj.ndim == 1:
2020-05-27T03:46:00.4672862Z obj = self._selected_obj
2020-05-27T03:46:00.4673151Z else:
2020-05-27T03:46:00.4673452Z obj = self._obj_with_exclusions
2020-05-27T03:46:00.4673697Z
2020-05-27T03:46:00.4673941Z results = []
2020-05-27T03:46:00.4674209Z keys = []
2020-05-27T03:46:00.4674412Z
2020-05-27T03:46:00.4674664Z # degenerate case
2020-05-27T03:46:00.4674921Z if obj.ndim == 1:
2020-05-27T03:46:00.4675282Z for a in arg:
2020-05-27T03:46:00.4675640Z colg = self._gotitem(obj.name, ndim=1, subset=obj)
2020-05-27T03:46:00.4675954Z try:
2020-05-27T03:46:00.4676277Z new_res = colg.aggregate(a)
2020-05-27T03:46:00.4676528Z
2020-05-27T03:46:00.4676805Z except TypeError:
2020-05-27T03:46:00.4677111Z pass
2020-05-27T03:46:00.4677359Z else:
2020-05-27T03:46:00.4677673Z results.append(new_res)
2020-05-27T03:46:00.4677958Z
2020-05-27T03:46:00.4678210Z # make sure we find a good name
2020-05-27T03:46:00.4678584Z name = com.get_callable_name(a) or a
2020-05-27T03:46:00.4678891Z keys.append(name)
2020-05-27T03:46:00.4679164Z
2020-05-27T03:46:00.4679405Z # multiples
2020-05-27T03:46:00.4679630Z else:
2020-05-27T03:46:00.4679957Z for index, col in enumerate(obj):
2020-05-27T03:46:00.4680401Z colg = self._gotitem(col, ndim=1, subset=obj.iloc[:, index])
2020-05-27T03:46:00.4680756Z try:
2020-05-27T03:46:00.4681075Z new_res = colg.aggregate(arg)
2020-05-27T03:46:00.4681407Z except (TypeError, DataError):
2020-05-27T03:46:00.4681737Z pass
2020-05-27T03:46:00.4682119Z except ValueError as err:
2020-05-27T03:46:00.4682422Z # cannot aggregate
2020-05-27T03:46:00.4682818Z if "Must produce aggregated value" in str(err):
2020-05-27T03:46:00.4683257Z # raised directly in _aggregate_named
2020-05-27T03:46:00.4683562Z pass
2020-05-27T03:46:00.4683915Z elif "no results" in str(err):
2020-05-27T03:46:00.4684337Z # raised directly in _aggregate_multiple_funcs
2020-05-27T03:46:00.4684653Z pass
2020-05-27T03:46:00.4684954Z else:
2020-05-27T03:46:00.4685220Z raise
2020-05-27T03:46:00.4685512Z else:
2020-05-27T03:46:00.4685823Z results.append(new_res)
2020-05-27T03:46:00.4686109Z keys.append(col)
2020-05-27T03:46:00.4686378Z
2020-05-27T03:46:00.4686628Z # if we are empty
2020-05-27T03:46:00.4686893Z if not len(results):
2020-05-27T03:46:00.4687219Z > raise ValueError("no results")
2020-05-27T03:46:00.4687514Z E ValueError: no results
2020-05-27T03:46:00.4687767Z
2020-05-27T03:46:00.4688056Z test_venv\lib\site-packages\pandas\core\base.py:540: ValueError
2020-05-27T03:46:00.4900327Z ________________ test_rolling_apply_args_kwargs[args_kwargs0] _________________
2020-05-27T03:46:00.4910069Z [gw0] win32 -- Python 3.7.7 D:\a\1\s\test_venv\Scripts\python.exe
2020-05-27T03:46:00.4913683Z
2020-05-27T03:46:00.4916587Z args_kwargs = [None, {'par': 10}]
2020-05-27T03:46:00.4920984Z
2020-05-27T03:46:00.4932595Z @pytest.mark.parametrize("args_kwargs", [[None, {"par": 10}], [(10,), None]])
2020-05-27T03:46:00.4934841Z def test_rolling_apply_args_kwargs(args_kwargs):
2020-05-27T03:46:00.4935692Z # GH 33433
2020-05-27T03:46:00.4936381Z def foo(x, par):
2020-05-27T03:46:00.4937029Z return np.sum(x + par)
2020-05-27T03:46:00.4937700Z
2020-05-27T03:46:00.4938401Z df = DataFrame({"gr": [1, 1], "a": [1, 2]})
2020-05-27T03:46:00.4939052Z
2020-05-27T03:46:00.4939885Z idx = Index(["gr", "a"])
2020-05-27T03:46:00.4940561Z expected = DataFrame([[11.0, 11.0], [11.0, 12.0]], columns=idx)
2020-05-27T03:46:00.4941155Z
2020-05-27T03:46:00.4941805Z result = df.rolling(1).apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1])
2020-05-27T03:46:00.4942523Z tm.assert_frame_equal(result, expected)
2020-05-27T03:46:00.4943122Z
2020-05-27T03:46:00.4943684Z result = df.rolling(1).apply(foo, args=(10,))
2020-05-27T03:46:00.4945113Z
2020-05-27T03:46:00.4945760Z midx = MultiIndex.from_tuples([(1, 0), (1, 1)], names=["gr", None])
2020-05-27T03:46:00.4946452Z expected = Series([11.0, 12.0], index=midx, name="a")
2020-05-27T03:46:00.4947078Z
2020-05-27T03:46:00.4947685Z gb_rolling = df.groupby("gr")["a"].rolling(1)
2020-05-27T03:46:00.4948255Z
2020-05-27T03:46:00.4949192Z > result = gb_rolling.apply(foo, args=args_kwargs[0], kwargs=args_kwargs[1])
2020-05-27T03:46:00.4949774Z
2020-05-27T03:46:00.4950430Z test_venv\lib\site-packages\pandas\tests\window\test_apply.py:165:
2020-05-27T03:46:00.4951199Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2020-05-27T03:46:00.4951938Z test_venv\lib\site-packages\pandas\core\window\rolling.py:2067: in apply
2020-05-27T03:46:00.4952625Z kwargs=kwargs,
2020-05-27T03:46:00.4953353Z test_venv\lib\site-packages\pandas\core\window\rolling.py:1391: in apply
2020-05-27T03:46:00.4954036Z kwargs=kwargs,
2020-05-27T03:46:00.4954689Z test_venv\lib\site-packages\pandas\core\window\rolling.py:2196: in _apply
2020-05-27T03:46:00.4955365Z **kwargs,
2020-05-27T03:46:00.4956044Z test_venv\lib\site-packages\pandas\core\window\rolling.py:589: in _apply
2020-05-27T03:46:00.4956693Z result = calc(values)
2020-05-27T03:46:00.4957386Z test_venv\lib\site-packages\pandas\core\window\rolling.py:575: in calc
2020-05-27T03:46:00.4958198Z closed=self.closed,
2020-05-27T03:46:00.4958846Z _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
2020-05-27T03:46:00.4959456Z
2020-05-27T03:46:00.4960053Z self = <pandas.core.window.indexers.GroupbyRollingIndexer object at 0x3A34FA30>
2020-05-27T03:46:00.4960788Z num_values = 2, min_periods = None, center = False, closed = None
2020-05-27T03:46:00.4961381Z
2020-05-27T03:46:00.4961909Z @Appender(get_window_bounds_doc)
2020-05-27T03:46:00.4962523Z def get_window_bounds(
2020-05-27T03:46:00.4963100Z self,
2020-05-27T03:46:00.4963652Z num_values: int = 0,
2020-05-27T03:46:00.4964290Z min_periods: Optional[int] = None,
2020-05-27T03:46:00.4964946Z center: Optional[bool] = None,
2020-05-27T03:46:00.4965552Z closed: Optional[str] = None,
2020-05-27T03:46:00.4966208Z ) -> Tuple[np.ndarray, np.ndarray]:
2020-05-27T03:46:00.4966908Z # 1) For each group, get the indices that belong to the group
2020-05-27T03:46:00.4967605Z # 2) Use the indices to calculate the start & end bounds of the window
2020-05-27T03:46:00.4968322Z # 3) Append the window bounds in group order
2020-05-27T03:46:00.4968991Z start_arrays = []
2020-05-27T03:46:00.4969583Z end_arrays = []
2020-05-27T03:46:00.4970143Z window_indicies_start = 0
2020-05-27T03:46:00.4970823Z for key, indicies in self.groupby_indicies.items():
2020-05-27T03:46:00.4971539Z if self.index_array is not None:
2020-05-27T03:46:00.4972196Z index_array = self.index_array.take(indicies)
2020-05-27T03:46:00.4972843Z else:
2020-05-27T03:46:00.4973467Z index_array = self.index_array
2020-05-27T03:46:00.4974075Z indexer = self.rolling_indexer(
2020-05-27T03:46:00.4974759Z index_array=index_array, window_size=self.window_size,
2020-05-27T03:46:00.4975407Z )
2020-05-27T03:46:00.4975981Z start, end = indexer.get_window_bounds(
2020-05-27T03:46:00.4976661Z len(indicies), min_periods, center, closed
2020-05-27T03:46:00.4977281Z )
2020-05-27T03:46:00.4977908Z # Cannot use groupby_indicies as they might not be monotonic with the object
2020-05-27T03:46:00.4978599Z # we're rolling over
2020-05-27T03:46:00.4979224Z window_indicies = np.arange(
2020-05-27T03:46:00.4979816Z window_indicies_start,
2020-05-27T03:46:00.4980472Z window_indicies_start + len(indicies),
2020-05-27T03:46:00.4981202Z dtype=np.int64,
2020-05-27T03:46:00.4981747Z )
2020-05-27T03:46:00.4982364Z window_indicies_start += len(indicies)
2020-05-27T03:46:00.4983051Z # Extend as we'll be slicing window like [start, end)
2020-05-27T03:46:00.4983789Z window_indicies = np.append(window_indicies, [window_indicies[-1] + 1])
2020-05-27T03:46:00.4984532Z > start_arrays.append(window_indicies.take(start))
2020-05-27T03:46:00.4985326Z E TypeError: Cannot cast array data from dtype('int64') to dtype('int32') according to the rule 'safe'
2020-05-27T03:46:00.4986013Z