diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index cd917924880f1..39b5a810e3a8f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -568,6 +568,7 @@ I/O - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`) - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`) - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`) +- Bug in :meth:`read_csv` not respecting ``dtype`` for ``index``. (:issue:`59077`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`) - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`) - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7d43498d4267b..ffd76b4c86670 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -6379,9 +6379,11 @@ def _transform_index(self, func, *, level=None) -> Index: """ if isinstance(self, ABCMultiIndex): values = [ - self.get_level_values(i).map(func) - if i == level or level is None - else self.get_level_values(i) + ( + self.get_level_values(i).map(func) + if i == level or level is None + else self.get_level_values(i) + ) for i in range(self.nlevels) ] return type(self).from_arrays(values) @@ -7478,7 +7480,14 @@ def ensure_index_from_sequences(sequences, names=None) -> Index: if len(sequences) == 1: if names is not None: names = names[0] - return Index(maybe_sequence_to_range(sequences[0]), name=names) + data = sequences[0] + conv_data = maybe_sequence_to_range(data) + dtype = ( + data.dtype + if isinstance(data, np.ndarray) and isinstance(conv_data, range) + else None + ) + return Index(conv_data, dtype=dtype, name=names) else: # TODO: Apply maybe_sequence_to_range to sequences? return MultiIndex.from_arrays(sequences, names=names) diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 05fe963e9b2b7..e3002afd46c5d 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -309,13 +309,17 @@ def read( data, columns = self._exclude_implicit_index(alldata) conv_data = self._convert_data(data) - conv_data = self._do_date_conversions(columns, conv_data) + date_data = self._do_date_conversions(columns, conv_data) + + if not self._implicit_index: + # propagate index dtype + alldata = list(conv_data.values()) # type: ignore[arg-type] index, result_columns = self._make_index( - conv_data, alldata, columns, indexnamerow + date_data, alldata, columns, indexnamerow ) - return index, result_columns, conv_data + return index, result_columns, date_data def _exclude_implicit_index( self, diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index d45368dece6d2..452bd19ecbd3b 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -59,18 +59,25 @@ def test_dtype_all_columns(all_parsers, dtype, check_orig): def test_dtype_per_column(all_parsers): parser = all_parsers data = """\ -one,two -1,2.5 -2,3.5 -3,4.5 -4,5.5""" +one,two,three +1,2.5,11 +2,3.5,12 +3,4.5,13 +4,5.5,14""" expected = DataFrame( - [[1, "2.5"], [2, "3.5"], [3, "4.5"], [4, "5.5"]], columns=["one", "two"] + [[1, "2.5", 11], [2, "3.5", 12], [3, "4.5", 13], [4, "5.5", 14]], + columns=["one", "two", "three"], ) expected["one"] = expected["one"].astype(np.float64) expected["two"] = expected["two"].astype(object) + expected["three"] = expected["three"].astype(np.uint32) + expected.set_index("three", inplace=True) - result = parser.read_csv(StringIO(data), dtype={"one": np.float64, 1: str}) + result = parser.read_csv( + StringIO(data), + dtype={"one": np.float64, 1: str, "three": np.uint32}, + index_col="three", + ) tm.assert_frame_equal(result, expected)