diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 0579a80aad28e..1a19033ac61ee 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -277,6 +277,7 @@ Conversion - Fixed a bug where ``FY5253`` date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`) - Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`) - Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) +- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`) Indexing diff --git a/pandas/core/series.py b/pandas/core/series.py index a3e7be1bfb35a..a1d296deae319 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3277,6 +3277,11 @@ def _try_cast(arr, take_fast_path): # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. if issubclass(subarr.dtype.type, compat.string_types): + # GH 16605 + # If not empty convert the data to dtype + if not isna(data).all(): + data = np.array(data, dtype=dtype, copy=False) + subarr = np.array(data, dtype=object, copy=copy) return subarr diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 610b9f7bdbf6c..21c028e634bc0 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -676,6 +676,25 @@ def test_arg_for_errors_in_astype(self): df.astype(np.int8, errors='ignore') + @pytest.mark.parametrize('input_vals', [ + ([1, 2]), + ([1.0, 2.0, np.nan]), + (['1', '2']), + (list(pd.date_range('1/1/2011', periods=2, freq='H'))), + (list(pd.date_range('1/1/2011', periods=2, freq='H', + tz='US/Eastern'))), + ([pd.Interval(left=0, right=5)]), + ]) + def test_constructor_list_str(self, input_vals): + # GH 16605 + # Ensure that data elements are converted to strings when + # dtype is str, 'str', or 'U' + + for dtype in ['str', str, 'U']: + result = DataFrame({'A': input_vals}, dtype=dtype) + expected = DataFrame({'A': input_vals}).astype({'A': dtype}) + assert_frame_equal(result, expected) + class TestDataFrameDatetimeWithTZ(TestData): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index f9842514ed5e5..08416fe34efcc 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -142,6 +142,25 @@ def test_constructor_list_like(self): result = Series(obj, index=[0, 1, 2]) assert_series_equal(result, expected) + @pytest.mark.parametrize('input_vals', [ + ([1, 2]), + ([1.0, 2.0, np.nan]), + (['1', '2']), + (list(pd.date_range('1/1/2011', periods=2, freq='H'))), + (list(pd.date_range('1/1/2011', periods=2, freq='H', + tz='US/Eastern'))), + ([pd.Interval(left=0, right=5)]), + ]) + def test_constructor_list_str(self, input_vals): + # GH 16605 + # Ensure that data elements from a list are converted to strings + # when dtype is str, 'str', or 'U' + + for dtype in ['str', str, 'U']: + result = Series(input_vals, dtype=dtype) + expected = Series(input_vals).astype(dtype) + assert_series_equal(result, expected) + def test_constructor_generator(self): gen = (i for i in range(10))