From 9569eb3ed1105583fda8174460799f7ba7b43250 Mon Sep 17 00:00:00 2001 From: Nikos Karagiannakis Date: Sun, 18 Mar 2018 16:15:20 +0000 Subject: [PATCH 1/7] TST: Added test for construction Series with dtype=str --- pandas/tests/series/test_constructors.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index e0bfe41645a3f..ca8fd1322fb2f 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -110,6 +110,11 @@ def test_constructor_empty(self, input_class): empty2 = Series(input_class(), index=lrange(10), dtype='float64') assert_series_equal(empty, empty2) + # GH 19853 : with empty string, index and dtype str + empty = Series('', dtype='str', index=range(3)) + assert empty.all() == '' + assert (empty == Series('', index=range(3))).all() + @pytest.mark.parametrize('input_arg', [np.nan, float('nan')]) def test_constructor_nan(self, input_arg): empty = Series(dtype='float64', index=lrange(10)) From a188cf702a6e45bcc5c3cb6f4dafcac568532c6d Mon Sep 17 00:00:00 2001 From: Nikos Karagiannakis Date: Sun, 18 Mar 2018 16:15:46 +0000 Subject: [PATCH 2/7] BUG: Handles case where data is scalar --- pandas/core/series.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index e4801242073a2..8851152406eb1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4059,9 +4059,14 @@ def _try_cast(arr, take_fast_path): if issubclass(subarr.dtype.type, compat.string_types): # GH 16605 # If not empty convert the data to dtype - if not isna(data).all(): - data = np.array(data, dtype=dtype, copy=False) - - subarr = np.array(data, dtype=object, copy=copy) + try: + all_elements_na = isna(data).all() + except AttributeError: + # GH 19853: If data is a scalar, subarr has already the result + pass + else: + if not all_elements_na: + data = np.array(data, dtype=dtype, copy=False) + subarr = np.array(data, dtype=object, copy=copy) return subarr From 1844e6e09f905616561e3b091202c879de2565d6 Mon Sep 17 00:00:00 2001 From: Nikos Karagiannakis Date: Sun, 18 Mar 2018 16:17:15 +0000 Subject: [PATCH 3/7] DOC: added changes to whatsnew/v0.23.0.txt --- doc/source/whatsnew/v0.23.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 4179277291478..6a6c3b2583cb1 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -714,6 +714,7 @@ Other API Changes - ``pd.to_datetime('today')`` now returns a datetime, consistent with ``pd.Timestamp('today')``; previously ``pd.to_datetime('today')`` returned a ``.normalized()`` datetime (:issue:`19935`) - :func:`Series.str.replace` now takes an optional `regex` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`) - :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`) +``Series`` construction with a ``string``, ``dtype=str`` specified, and ``index`` specified will now return an ``object`` dtyped ``Series``, previously this would raise an AttributeError (:issue:`19853`) .. _whatsnew_0230.deprecations: From 55f9998f3c11d33279639bea3a6f34f37b000166 Mon Sep 17 00:00:00 2001 From: Nikos Karagiannakis Date: Mon, 19 Mar 2018 22:59:48 +0000 Subject: [PATCH 4/7] BUG: simplification --- pandas/core/series.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8851152406eb1..dd8eff2a4fa7f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4059,13 +4059,9 @@ def _try_cast(arr, take_fast_path): if issubclass(subarr.dtype.type, compat.string_types): # GH 16605 # If not empty convert the data to dtype - try: - all_elements_na = isna(data).all() - except AttributeError: - # GH 19853: If data is a scalar, subarr has already the result - pass - else: - if not all_elements_na: + # GH 19853: If data is a scalar, subarr has already the result + if not np.isscalar(data): + if not np.all(isna(data)): data = np.array(data, dtype=dtype, copy=False) subarr = np.array(data, dtype=object, copy=copy) From d6aac9080d3e7f773fcf9979cfff5b6c948f761e Mon Sep 17 00:00:00 2001 From: Nikos Karagiannakis Date: Mon, 19 Mar 2018 23:00:19 +0000 Subject: [PATCH 5/7] TST: better testing --- pandas/tests/series/test_constructors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index ca8fd1322fb2f..82b5b1c10fa2d 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -111,9 +111,9 @@ def test_constructor_empty(self, input_class): assert_series_equal(empty, empty2) # GH 19853 : with empty string, index and dtype str - empty = Series('', dtype='str', index=range(3)) - assert empty.all() == '' - assert (empty == Series('', index=range(3))).all() + empty = Series('', dtype=str, index=range(3)) + empty2 = Series('', index=range(3)) + assert_series_equal(empty, empty2) @pytest.mark.parametrize('input_arg', [np.nan, float('nan')]) def test_constructor_nan(self, input_arg): From ba2d2c02b14f1f75aaafe76ba2fe30b2ea0e1dee Mon Sep 17 00:00:00 2001 From: Nikos Karagiannakis Date: Mon, 19 Mar 2018 23:00:47 +0000 Subject: [PATCH 6/7] DOC: better documentation --- doc/source/whatsnew/v0.23.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 6a6c3b2583cb1..2a88b4feaa029 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -714,7 +714,6 @@ Other API Changes - ``pd.to_datetime('today')`` now returns a datetime, consistent with ``pd.Timestamp('today')``; previously ``pd.to_datetime('today')`` returned a ``.normalized()`` datetime (:issue:`19935`) - :func:`Series.str.replace` now takes an optional `regex` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`) - :func:`DatetimeIndex.strftime` and :func:`PeriodIndex.strftime` now return an ``Index`` instead of a numpy array to be consistent with similar accessors (:issue:`20127`) -``Series`` construction with a ``string``, ``dtype=str`` specified, and ``index`` specified will now return an ``object`` dtyped ``Series``, previously this would raise an AttributeError (:issue:`19853`) .. _whatsnew_0230.deprecations: @@ -1033,6 +1032,7 @@ Reshaping - Bug in :class:`Series` constructor with ``Categorical`` where a ```ValueError`` is not raised when an index of different length is given (:issue:`19342`) - Bug in :meth:`DataFrame.astype` where column metadata is lost when converting to categorical or a dictionary of dtypes (:issue:`19920`) - Bug in :func:`cut` and :func:`qcut` where timezone information was dropped (:issue:`19872`) +- Bug in :class:`Series` constructor with a ``dtype=str``, previously raised in some cases (:issue:`19853`) Other ^^^^^ From 41d35cac535d81c0ebd0fa488b2a004758c57b7d Mon Sep 17 00:00:00 2001 From: Nikos Karagiannakis Date: Tue, 20 Mar 2018 02:36:09 +0000 Subject: [PATCH 7/7] BUG: use is_scalar instead of np.isscalar --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index dd8eff2a4fa7f..bcf97832e40ce 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4060,7 +4060,7 @@ def _try_cast(arr, take_fast_path): # GH 16605 # If not empty convert the data to dtype # GH 19853: If data is a scalar, subarr has already the result - if not np.isscalar(data): + if not is_scalar(data): if not np.all(isna(data)): data = np.array(data, dtype=dtype, copy=False) subarr = np.array(data, dtype=object, copy=copy)