Skip to content

BUG: Convert data elements when dtype=str in Series constructor with … #18795

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ Conversion
- Fixed a bug where ``FY5253`` date offsets could incorrectly raise an ``AssertionError`` in arithmetic operatons (:issue:`14774`)
- Bug in :meth:`Index.astype` with a categorical dtype where the resultant index is not converted to a :class:`CategoricalIndex` for all types of index (:issue:`18630`)
- Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`)
- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`)


Indexing
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3277,6 +3277,11 @@ def _try_cast(arr, take_fast_path):
# This is to prevent mixed-type Series getting all casted to
# NumPy string type, e.g. NaN --> '-1#IND'.
if issubclass(subarr.dtype.type, compat.string_types):
# GH 16605
# If not empty convert the data to dtype
if not isna(data).all():
data = np.array(data, dtype=dtype, copy=False)

subarr = np.array(data, dtype=object, copy=copy)

return subarr
19 changes: 19 additions & 0 deletions pandas/tests/frame/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,25 @@ def test_arg_for_errors_in_astype(self):

df.astype(np.int8, errors='ignore')

@pytest.mark.parametrize('input_vals', [
([1, 2]),
([1.0, 2.0, np.nan]),
(['1', '2']),
(list(pd.date_range('1/1/2011', periods=2, freq='H'))),
(list(pd.date_range('1/1/2011', periods=2, freq='H',
tz='US/Eastern'))),
([pd.Interval(left=0, right=5)]),
])
def test_constructor_list_str(self, input_vals):
# GH 16605
# Ensure that data elements are converted to strings when
# dtype is str, 'str', or 'U'

for dtype in ['str', str, 'U']:
result = DataFrame({'A': input_vals}, dtype=dtype)
expected = DataFrame({'A': input_vals}).astype({'A': dtype})
assert_frame_equal(result, expected)


class TestDataFrameDatetimeWithTZ(TestData):

Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,25 @@ def test_constructor_list_like(self):
result = Series(obj, index=[0, 1, 2])
assert_series_equal(result, expected)

@pytest.mark.parametrize('input_vals', [
([1, 2]),
([1.0, 2.0, np.nan]),
(['1', '2']),
(list(pd.date_range('1/1/2011', periods=2, freq='H'))),
(list(pd.date_range('1/1/2011', periods=2, freq='H',
tz='US/Eastern'))),
([pd.Interval(left=0, right=5)]),
])
def test_constructor_list_str(self, input_vals):
# GH 16605
# Ensure that data elements from a list are converted to strings
# when dtype is str, 'str', or 'U'

for dtype in ['str', str, 'U']:
result = Series(input_vals, dtype=dtype)
expected = Series(input_vals).astype(dtype)
assert_series_equal(result, expected)

def test_constructor_generator(self):
gen = (i for i in range(10))

Expand Down