From dfd85f2b1ad98da412debd0eb84d66e7f7304206 Mon Sep 17 00:00:00 2001 From: AS Date: Fri, 6 Mar 2020 19:35:59 +0100 Subject: [PATCH 1/6] quick fix, no docs, no tests --- xarray/core/dataarray.py | 3 ++- xarray/core/dataset.py | 3 ++- xarray/core/variable.py | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4e80ef222c2..5f7eeb71053 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2939,6 +2939,7 @@ def quantile( dim: Union[Hashable, Sequence[Hashable], None] = None, interpolation: str = "linear", keep_attrs: bool = None, + skipna: bool = True, ) -> "DataArray": """Compute the qth quantile of the data along the specified dimension. @@ -3015,7 +3016,7 @@ def quantile( """ ds = self._to_temp_dataset().quantile( - q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation + q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation, skipna=skipna ) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 52940e98b27..f562390d2ff 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5140,7 +5140,7 @@ def sortby(self, variables, ascending=True): return aligned_self.isel(**indices) def quantile( - self, q, dim=None, interpolation="linear", numeric_only=False, keep_attrs=None + self, q, dim=None, interpolation="linear", numeric_only=False, keep_attrs=None, skipna=skipna ): """Compute the qth quantile of the data along the specified dimension. @@ -5258,6 +5258,7 @@ def quantile( dim=reduce_dims, interpolation=interpolation, keep_attrs=keep_attrs, + skipna=skipna ) else: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 62f9fde6a2e..39d20d3ebf8 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1678,7 +1678,7 @@ def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): """ return self.broadcast_equals(other, equiv=equiv) - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True): """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -1724,6 +1724,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): """ from .computation import apply_ufunc + _quantile_func = np.nanquantile if skipna else np.quantile if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) @@ -1739,7 +1740,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): def _wrapper(npa, **kwargs): # move quantile axis to end. required for apply_ufunc - return np.moveaxis(np.nanquantile(npa, **kwargs), 0, -1) + return np.moveaxis(_quantile_func(npa, **kwargs), 0, -1) axis = np.arange(-1, -1 * len(dim) - 1, -1) result = apply_ufunc( From efa17edb22a93d05dd549e6ae284edbab15cb717 Mon Sep 17 00:00:00 2001 From: AS Date: Sat, 7 Mar 2020 10:52:32 +0100 Subject: [PATCH 2/6] added tests --- xarray/core/dataset.py | 10 ++++++++-- xarray/tests/test_dataarray.py | 8 +++++--- xarray/tests/test_dataset.py | 24 ++++++++++++++++++++---- xarray/tests/test_variable.py | 8 +++++--- 4 files changed, 38 insertions(+), 12 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f562390d2ff..ec0794a61cb 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5140,7 +5140,13 @@ def sortby(self, variables, ascending=True): return aligned_self.isel(**indices) def quantile( - self, q, dim=None, interpolation="linear", numeric_only=False, keep_attrs=None, skipna=skipna + self, + q, + dim=None, + interpolation="linear", + numeric_only=False, + keep_attrs=None, + skipna=True, ): """Compute the qth quantile of the data along the specified dimension. @@ -5258,7 +5264,7 @@ def quantile( dim=reduce_dims, interpolation=interpolation, keep_attrs=keep_attrs, - skipna=skipna + skipna=skipna, ) else: diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 0a622d279ba..997b0d9255c 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2362,13 +2362,15 @@ def test_reduce_out(self): with pytest.raises(TypeError): orig.mean(out=np.ones(orig.shape)) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) - def test_quantile(self, q, axis, dim): - actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True) - expected = np.nanpercentile(self.dv.values, np.array(q) * 100, axis=axis) + def test_quantile(self, q, axis, dim, skipna): + actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna) + _percentile_func = np.nanpercentile if skipna else np.percentile + expected = _percentile_func(self.dv.values, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) if is_scalar(q): assert "quantile" not in actual.dims diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 7bcf9379ae8..753bfec14b8 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4688,12 +4688,13 @@ def test_reduce_keepdims(self): ) assert_identical(expected, actual) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) - def test_quantile(self, q): + def test_quantile(self, q, skipna): ds = create_test_data(seed=123) for dim in [None, "dim1", ["dim1"]]: - ds_quantile = ds.quantile(q, dim=dim) + ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) if is_scalar(q): assert "quantile" not in ds_quantile.dims else: @@ -4701,12 +4702,27 @@ def test_quantile(self, q): for var, dar in ds.data_vars.items(): assert var in ds_quantile - assert_identical(ds_quantile[var], dar.quantile(q, dim=dim)) + assert_identical( + ds_quantile[var], dar.quantile(q, dim=dim, skipna=skipna) + ) dim = ["dim1", "dim2"] - ds_quantile = ds.quantile(q, dim=dim) + ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) assert "dim3" in ds_quantile.dims assert all(d not in ds_quantile.dims for d in dim) + @pytest.mark.parametrize("skipna", [True, False]) + def test_quantile_skipna(self, skipna): + q = 0.1 + dim = "time" + ds = Dataset({"a": ([dim], np.arange(0, 11))}) + ds = ds.where(ds >= 1) + + result = ds.quantile(q=q, dim=dim, skipna=skipna) + + value = 1.9 if skipna else np.nan + expected = Dataset({"a": value}, coords={"quantile": q}) + assert_identical(result, expected) + @requires_bottleneck def test_rank(self): ds = create_test_data(seed=1234) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index c86ecd0121f..c600f7a77d0 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1511,14 +1511,16 @@ def test_reduce(self): with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"): v.mean(dim="x", allow_lazy=False) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) - def test_quantile(self, q, axis, dim): + def test_quantile(self, q, axis, dim, skipna): v = Variable(["x", "y"], self.d) - actual = v.quantile(q, dim=dim) - expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis) + actual = v.quantile(q, dim=dim, skipna=skipna) + _percentile_func = np.nanpercentile if skipna else np.percentile + expected = _percentile_func(self.d, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) @requires_dask From 432e6c761fa8f8a8e27ba42cafe7292efcc60d4f Mon Sep 17 00:00:00 2001 From: AS Date: Sat, 7 Mar 2020 11:04:39 +0100 Subject: [PATCH 3/6] docstrings --- xarray/core/dataarray.py | 10 ++++++++-- xarray/core/dataset.py | 4 +++- xarray/core/groupby.py | 9 +++++++-- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 5f7eeb71053..7fcb42bf9d2 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2967,6 +2967,8 @@ def quantile( If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -2979,7 +2981,7 @@ def quantile( See Also -------- - numpy.nanquantile, pandas.Series.quantile, Dataset.quantile + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile Examples -------- @@ -3016,7 +3018,11 @@ def quantile( """ ds = self._to_temp_dataset().quantile( - q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation, skipna=skipna + q, + dim=dim, + keep_attrs=keep_attrs, + interpolation=interpolation, + skipna=skipna, ) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ec0794a61cb..f286236dd45 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5177,6 +5177,8 @@ def quantile( object will be returned without attributes. numeric_only : bool, optional If True, only apply ``func`` to variables with a numeric dtype. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -5189,7 +5191,7 @@ def quantile( See Also -------- - numpy.nanquantile, pandas.Series.quantile, DataArray.quantile + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, DataArray.quantile Examples -------- diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index f2a9ebac6eb..4223d9dc255 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -558,7 +558,9 @@ def fillna(self, value): out = ops.fillna(self, value) return out - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + def quantile( + self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True + ): """Compute the qth quantile over each array in the groups and concatenate them together into a new array. @@ -582,6 +584,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): * higher: ``j``. * nearest: ``i`` or ``j``, whichever is nearest. * midpoint: ``(i + j) / 2``. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -595,7 +599,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): See Also -------- - numpy.nanquantile, pandas.Series.quantile, Dataset.quantile, + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile, DataArray.quantile Examples @@ -656,6 +660,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): dim=dim, interpolation=interpolation, keep_attrs=keep_attrs, + skipna=skipna, ) return out From b1c6f3c045e7b530024ad620934f31636886741c Mon Sep 17 00:00:00 2001 From: AS Date: Sat, 7 Mar 2020 11:14:05 +0100 Subject: [PATCH 4/6] added whatsnew --- doc/whats-new.rst | 3 +++ xarray/core/variable.py | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 99ee66fad67..b1110004a7c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -43,6 +43,9 @@ New Features in 0.14.1) is now on by default. To disable, use ``xarray.set_options(display_style="text")``. By `Julia Signell `_. +- implement ``skipna`` in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, + :py:meth:`Dataset.groupby.quantile`, :py:meth:`DataArray.groupby.quantile` + (:issue:`3843`, :pull:`3844`) `Aaron Spring `_. Bug fixes diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 39d20d3ebf8..435edb6f014 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1678,7 +1678,9 @@ def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): """ return self.broadcast_equals(other, equiv=equiv) - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True): + def quantile( + self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True + ): """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -1724,6 +1726,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None, skipna= """ from .computation import apply_ufunc + _quantile_func = np.nanquantile if skipna else np.quantile if keep_attrs is None: From 6ca5385c0a1f73796f1ec435f0f84d73af95064f Mon Sep 17 00:00:00 2001 From: Aaron Spring Date: Sun, 8 Mar 2020 16:43:40 +0100 Subject: [PATCH 5/6] Update doc/whats-new.rst Co-Authored-By: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- doc/whats-new.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d8cc63467e3..4e851a1f5e6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -46,9 +46,10 @@ New Features - :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a first argument, which is then called on the input; replicating pandas' behavior. By `Maximilian Roos `_. -- implement ``skipna`` in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, +- Implement ``skipna`` in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, :py:meth:`Dataset.groupby.quantile`, :py:meth:`DataArray.groupby.quantile` - (:issue:`3843`, :pull:`3844`) `Aaron Spring `_. + (:issue:`3843`, :pull:`3844`) + By `Aaron Spring `_. Bug fixes From 71e33046afb298e93d1a7dfe2ee3795e3e2ffeb6 Mon Sep 17 00:00:00 2001 From: Aaron Spring Date: Sun, 8 Mar 2020 16:43:51 +0100 Subject: [PATCH 6/6] Update doc/whats-new.rst Co-Authored-By: keewis --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4e851a1f5e6..2c30db99bcd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,7 +47,7 @@ New Features first argument, which is then called on the input; replicating pandas' behavior. By `Maximilian Roos `_. - Implement ``skipna`` in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, - :py:meth:`Dataset.groupby.quantile`, :py:meth:`DataArray.groupby.quantile` + :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile` (:issue:`3843`, :pull:`3844`) By `Aaron Spring `_.