From fb0ca17115b7fc404a8d849149dab4e71f77e330 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 11 Mar 2022 14:12:24 +0100 Subject: [PATCH 1/3] Regression in read csv causing segfault for invalid file input --- doc/source/whatsnew/v1.4.2.rst | 1 + pandas/io/parsers/readers.py | 4 ++++ pandas/tests/io/parser/test_c_parser_only.py | 7 +++++++ 3 files changed, 12 insertions(+) diff --git a/doc/source/whatsnew/v1.4.2.rst b/doc/source/whatsnew/v1.4.2.rst index badda6a73d1c8..daa3a838530f0 100644 --- a/doc/source/whatsnew/v1.4.2.rst +++ b/doc/source/whatsnew/v1.4.2.rst @@ -15,6 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`DataFrame.drop` and :meth:`Series.drop` when :class:`Index` had extension dtype and duplicates (:issue:`45860`) +- Fixed regression in :func:`read_csv` killing python process when invalid file input was given for ``engine="c"` (:issue:`45957`) - Fixed memory performance regression in :meth:`Series.fillna` when called on a :class:`DataFrame` column with ``inplace=True`` (:issue:`46149`) - Provided an alternative solution for passing custom Excel formats in :meth:`.Styler.to_excel`, which was a regression based on stricter CSS validation. Examples available in the documentation for :meth:`.Styler.format` (:issue:`46152`) - diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 7684fa32fbd66..e0db62b4fd077 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1710,6 +1710,10 @@ def _make_engine( assert self.handles is not None f = self.handles.handle + elif not engine == "python": + msg = f"Invalid file path or buffer object type: {type(f)}" + raise ValueError(msg) + try: return mapping[engine](f, **self.options) except Exception: diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index 83cccdb37b343..60d9362e39c18 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -680,3 +680,10 @@ def test_float_precision_options(c_parser_only): with pytest.raises(ValueError, match=msg): parser.read_csv(StringIO(s), float_precision="junk") + + +def test_invalid_file_inputs(c_parser_only): + # GH#45957 + parser = c_parser_only + with pytest.raises(ValueError, match="Invalid"): + parser.read_csv([]) From ae2a83895e749af3e3c884272669870e555228bb Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 11 Mar 2022 16:14:20 +0100 Subject: [PATCH 2/3] Fid docs --- doc/source/whatsnew/v1.4.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.2.rst b/doc/source/whatsnew/v1.4.2.rst index daa3a838530f0..239f02b0b6bcf 100644 --- a/doc/source/whatsnew/v1.4.2.rst +++ b/doc/source/whatsnew/v1.4.2.rst @@ -15,7 +15,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`DataFrame.drop` and :meth:`Series.drop` when :class:`Index` had extension dtype and duplicates (:issue:`45860`) -- Fixed regression in :func:`read_csv` killing python process when invalid file input was given for ``engine="c"` (:issue:`45957`) +- Fixed regression in :func:`read_csv` killing python process when invalid file input was given for ``engine="c"`` (:issue:`45957`) - Fixed memory performance regression in :meth:`Series.fillna` when called on a :class:`DataFrame` column with ``inplace=True`` (:issue:`46149`) - Provided an alternative solution for passing custom Excel formats in :meth:`.Styler.to_excel`, which was a regression based on stricter CSS validation. Examples available in the documentation for :meth:`.Styler.format` (:issue:`46152`) - From 7770f76153e9e52c625b3e7c562bd9c85f33dcf1 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 11 Mar 2022 21:16:39 +0100 Subject: [PATCH 3/3] Move test --- pandas/io/parsers/readers.py | 2 +- pandas/tests/io/parser/test_c_parser_only.py | 7 ------- pandas/tests/io/parser/test_unsupported.py | 10 ++++++++++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index e0db62b4fd077..95b03c9844219 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1710,7 +1710,7 @@ def _make_engine( assert self.handles is not None f = self.handles.handle - elif not engine == "python": + elif engine != "python": msg = f"Invalid file path or buffer object type: {type(f)}" raise ValueError(msg) diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index 60d9362e39c18..83cccdb37b343 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -680,10 +680,3 @@ def test_float_precision_options(c_parser_only): with pytest.raises(ValueError, match=msg): parser.read_csv(StringIO(s), float_precision="junk") - - -def test_invalid_file_inputs(c_parser_only): - # GH#45957 - parser = c_parser_only - with pytest.raises(ValueError, match="Invalid"): - parser.read_csv([]) diff --git a/pandas/tests/io/parser/test_unsupported.py b/pandas/tests/io/parser/test_unsupported.py index 7937f47e8bff5..f346fad7acecf 100644 --- a/pandas/tests/io/parser/test_unsupported.py +++ b/pandas/tests/io/parser/test_unsupported.py @@ -189,3 +189,13 @@ def test_close_file_handle_on_invalid_usecols(all_parsers): parser.read_csv(fname, usecols=["col1", "col2", "col3"]) # unlink fails on windows if file handles still point to it os.unlink(fname) + + +def test_invalid_file_inputs(all_parsers): + # GH#45957 + parser = all_parsers + if parser.engine == "python": + pytest.skip("Python engine supports lists.") + + with pytest.raises(ValueError, match="Invalid"): + parser.read_csv([])