From 06243c6754b51d6fd199665eea32b600c4676c9b Mon Sep 17 00:00:00 2001 From: Maximiliano Greco Date: Sat, 31 Mar 2018 17:20:34 +0200 Subject: [PATCH 1/7] BUG: Fixed first last valid index (#20499) Removed old methods from Series and DF Added new methods into NDFrame Created new convenient method _find_first_valid --- pandas/core/frame.py | 25 ------------------------- pandas/core/generic.py | 31 +++++++++++++++++++++++++++++++ pandas/core/series.py | 26 -------------------------- 3 files changed, 31 insertions(+), 51 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9626079660771..35f3a7c20e270 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5015,31 +5015,6 @@ def update(self, other, join='left', overwrite=True, filter_func=None, self[col] = expressions.where(mask, this, that) - # ---------------------------------------------------------------------- - # Misc methods - - def _get_valid_indices(self): - is_valid = self.count(1) > 0 - return self.index[is_valid] - - @Appender(_shared_docs['valid_index'] % { - 'position': 'first', 'klass': 'DataFrame'}) - def first_valid_index(self): - if len(self) == 0: - return None - - valid_indices = self._get_valid_indices() - return valid_indices[0] if len(valid_indices) else None - - @Appender(_shared_docs['valid_index'] % { - 'position': 'last', 'klass': 'DataFrame'}) - def last_valid_index(self): - if len(self) == 0: - return None - - valid_indices = self._get_valid_indices() - return valid_indices[-1] if len(valid_indices) else None - # ---------------------------------------------------------------------- # Data reshaping diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d5cd22732f0a9..ca00624216cda 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8763,6 +8763,37 @@ def transform(self, func, *args, **kwargs): scalar : type of index """ + def _find_first_valid(self, direction=1): + if len(self) == 0: # early stop + return None + is_valid = ~self.isna() + + if self.ndim == 2: + is_valid = is_valid.any(1) # reduce axis 1 + + if direction == 1: + i = is_valid.idxmax() + if not is_valid[i]: + return None + else: + return i + elif direction == -1: + i = is_valid.values[::-1].argmax() + if not is_valid.iat[len(self) - i - 1]: + return None + else: + return self.index[len(self) - i - 1] + + @Appender(_shared_docs['valid_index'] % {'position': 'first', + 'klass': 'NDFrame'}) + def first_valid_index(self): + return self._find_first_valid(1) + + @Appender(_shared_docs['valid_index'] % {'position': 'last', + 'klass': 'NDFrame'}) + def last_valid_index(self): + return self._find_first_valid(-1) + def _doc_parms(cls): """Return a tuple of the doc parms.""" diff --git a/pandas/core/series.py b/pandas/core/series.py index f3630dc43fbd1..808ac5e721fc8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3887,32 +3887,6 @@ def valid(self, inplace=False, **kwargs): "Use .dropna instead.", FutureWarning, stacklevel=2) return self.dropna(inplace=inplace, **kwargs) - @Appender(generic._shared_docs['valid_index'] % { - 'position': 'first', 'klass': 'Series'}) - def first_valid_index(self): - if len(self) == 0: - return None - - mask = isna(self._values) - i = mask.argmin() - if mask[i]: - return None - else: - return self.index[i] - - @Appender(generic._shared_docs['valid_index'] % { - 'position': 'last', 'klass': 'Series'}) - def last_valid_index(self): - if len(self) == 0: - return None - - mask = isna(self._values[::-1]) - i = mask.argmin() - if mask[i]: - return None - else: - return self.index[len(self) - i - 1] - # ---------------------------------------------------------------------- # Time series-oriented methods From 02f87c06e1650ff0a78822e4318f713ef73051f0 Mon Sep 17 00:00:00 2001 From: Maximiliano Greco Date: Sat, 31 Mar 2018 17:22:48 +0200 Subject: [PATCH 2/7] TST: Added DF test cases for first last valid index (#20499) --- pandas/tests/frame/test_timeseries.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index ceb6c942c81b1..c65225cab3d71 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -530,6 +530,16 @@ def test_first_last_valid(self): assert frame.last_valid_index() is None assert frame.first_valid_index() is None + # GH20499: its preserves freq with holes + frame.index = date_range("20110101", periods=N, freq="B") + frame.iloc[1] = 1 + frame.iloc[-2] = 1 + assert frame.first_valid_index() == frame.index[1] + assert frame.last_valid_index() == frame.index[-2] + assert frame.first_valid_index().freq == frame.index.freq + assert frame.last_valid_index().freq == frame.index.freq + + def test_at_time_frame(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) From de5de6e8f96ac4cace92f15e65515ffbf76be2fb Mon Sep 17 00:00:00 2001 From: Maximiliano Greco Date: Sat, 31 Mar 2018 17:23:21 +0200 Subject: [PATCH 3/7] TST: Added Series test cases for first last valid index (#20499) --- pandas/tests/series/test_timeseries.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index baf2619c7b022..c9eae8986d8bd 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -432,6 +432,16 @@ def test_first_last_valid(self): assert empty.last_valid_index() is None assert empty.first_valid_index() is None + # GH20499: its preserves freq with holes + ts.index = date_range("20110101", periods=len(ts), freq="B") + ts.iloc[1] = 1 + ts.iloc[-2] = 1 + assert ts.first_valid_index() == ts.index[1] + assert ts.last_valid_index() == ts.index[-2] + assert ts.first_valid_index().freq == ts.index.freq + assert ts.last_valid_index().freq == ts.index.freq + + def test_mpl_compat_hack(self): result = self.ts[:, np.newaxis] expected = self.ts.values[:, np.newaxis] From 4408e95bce0f51b4c00113ab7b2e519b767102f3 Mon Sep 17 00:00:00 2001 From: Maximiliano Greco Date: Sat, 31 Mar 2018 17:30:52 +0200 Subject: [PATCH 4/7] CLN: Removed blanck lines. --- pandas/tests/frame/test_timeseries.py | 1 - pandas/tests/series/test_timeseries.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index c65225cab3d71..277c3c9bc5c23 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -539,7 +539,6 @@ def test_first_last_valid(self): assert frame.first_valid_index().freq == frame.index.freq assert frame.last_valid_index().freq == frame.index.freq - def test_at_time_frame(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = DataFrame(np.random.randn(len(rng), 2), index=rng) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index c9eae8986d8bd..8e537b137baaf 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -441,7 +441,6 @@ def test_first_last_valid(self): assert ts.first_valid_index().freq == ts.index.freq assert ts.last_valid_index().freq == ts.index.freq - def test_mpl_compat_hack(self): result = self.ts[:, np.newaxis] expected = self.ts.values[:, np.newaxis] From 881b6b23035f770d24384eb089f7eabc56903bee Mon Sep 17 00:00:00 2001 From: Maximiliano Greco Date: Sat, 31 Mar 2018 18:20:16 +0200 Subject: [PATCH 5/7] CLN: Added doc and fixed variable names. --- pandas/core/generic.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ca00624216cda..97521e356277a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8763,7 +8763,18 @@ def transform(self, func, *args, **kwargs): scalar : type of index """ - def _find_first_valid(self, direction=1): + def _find_valid_index(self, how): + """Retrieves the index of the first valid value. + + Parameters + ---------- + how : {'first', 'last'} + Use this parameter to change between the first or last valid index. + + Returns + ------- + idx_first_valid : type of index + """ if len(self) == 0: # early stop return None is_valid = ~self.isna() @@ -8771,28 +8782,32 @@ def _find_first_valid(self, direction=1): if self.ndim == 2: is_valid = is_valid.any(1) # reduce axis 1 - if direction == 1: + if how == 'first': + # First valid value case i = is_valid.idxmax() if not is_valid[i]: return None else: return i - elif direction == -1: + elif how == 'last': + # Last valid value case i = is_valid.values[::-1].argmax() if not is_valid.iat[len(self) - i - 1]: return None else: return self.index[len(self) - i - 1] + else: + raise ValueError @Appender(_shared_docs['valid_index'] % {'position': 'first', 'klass': 'NDFrame'}) def first_valid_index(self): - return self._find_first_valid(1) + return self._find_valid_index('first') @Appender(_shared_docs['valid_index'] % {'position': 'last', 'klass': 'NDFrame'}) def last_valid_index(self): - return self._find_first_valid(-1) + return self._find_valid_index('last') def _doc_parms(cls): From d4d30973d9ea72f6019ded2f9dd1f9741eb29570 Mon Sep 17 00:00:00 2001 From: Maximiliano Greco Date: Sat, 31 Mar 2018 22:14:25 +0200 Subject: [PATCH 6/7] DOC: Add whtasnew note --- doc/source/whatsnew/v0.23.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ce63cb2473bc4..1f5948649c5e2 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1061,7 +1061,7 @@ Indexing - Bug in :meth:`DataFrame.drop_duplicates` where no ``KeyError`` is raised when passing in columns that don't exist on the ``DataFrame`` (issue:`19726`) - Bug in ``Index`` subclasses constructors that ignore unexpected keyword arguments (:issue:`19348`) - Bug in :meth:`Index.difference` when taking difference of an ``Index`` with itself (:issue:`20040`) - +- Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` in presence of entire rows of NaNs in the middle of values (:issue:`20499`). MultiIndex ^^^^^^^^^^ From 48611fae135e5210008f20cca316dddbac6a6298 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 1 Apr 2018 09:38:48 -0400 Subject: [PATCH 7/7] add assert --- pandas/core/generic.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 97521e356277a..1931875799c73 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8775,6 +8775,8 @@ def _find_valid_index(self, how): ------- idx_first_valid : type of index """ + assert how in ['first', 'last'] + if len(self) == 0: # early stop return None is_valid = ~self.isna() @@ -8787,17 +8789,14 @@ def _find_valid_index(self, how): i = is_valid.idxmax() if not is_valid[i]: return None - else: - return i + return i + elif how == 'last': # Last valid value case i = is_valid.values[::-1].argmax() if not is_valid.iat[len(self) - i - 1]: return None - else: - return self.index[len(self) - i - 1] - else: - raise ValueError + return self.index[len(self) - i - 1] @Appender(_shared_docs['valid_index'] % {'position': 'first', 'klass': 'NDFrame'})