From bda02573c956b6a888f227f7e58649ce6d133070 Mon Sep 17 00:00:00 2001 From: Johan Kahrstrom Date: Wed, 27 May 2020 18:05:33 +0100 Subject: [PATCH 1/2] BUG: Fixes GH34411 _query_iterator now returns a list with an empty pd.DataFrame if chunksize is set and the resultset is empty Added release note --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/io/sql.py | 22 ++++++++++++++++++++++ pandas/tests/io/test_sql.py | 6 ++++++ 3 files changed, 29 insertions(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index b07351d05defb..1389fb4ea4d31 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -292,6 +292,7 @@ I/O - :meth:`to_csv` passes compression arguments for `'gzip'` always to `gzip.GzipFile` (:issue:`28103`) - :meth:`to_csv` did not support zip compression for binary file object not having a filename (:issue: `35058`) - :meth:`to_csv` and :meth:`read_csv` did not honor `compression` and `encoding` for path-like objects that are internally converted to file-like objects (:issue:`35677`, :issue:`26124`, and :issue:`32392`) +- :meth:`read_sql` returned an empty generator if `chunksize` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`) Plotting ^^^^^^^^ diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 51888e5021d80..5a72f06afe5a6 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -834,11 +834,17 @@ def _query_iterator( self, result, chunksize, columns, coerce_float=True, parse_dates=None ): """Return generator through chunked result set.""" + has_read_data = False while True: data = result.fetchmany(chunksize) if not data: + if not has_read_data: + yield DataFrame.from_records( + [], columns=columns, coerce_float=coerce_float + ) break else: + has_read_data = True self.frame = DataFrame.from_records( data, columns=columns, coerce_float=coerce_float ) @@ -1228,11 +1234,21 @@ def _query_iterator( result, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None ): """Return generator through chunked result set""" + has_read_data = False while True: data = result.fetchmany(chunksize) if not data: + if not has_read_data: + yield _wrap_result( + [], + columns, + index_col=index_col, + coerce_float=coerce_float, + parse_dates=parse_dates, + ) break else: + has_read_data = True yield _wrap_result( data, columns, @@ -1698,14 +1714,20 @@ def _query_iterator( cursor, chunksize, columns, index_col=None, coerce_float=True, parse_dates=None ): """Return generator through chunked result set""" + has_read_data = False while True: data = cursor.fetchmany(chunksize) if type(data) == tuple: data = list(data) if not data: cursor.close() + if not has_read_data: + yield DataFrame.from_records( + [], columns=columns, coerce_float=coerce_float + ) break else: + has_read_data = True yield _wrap_result( data, columns, diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index a7e3162ed7b73..8098f7b18b34c 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -612,6 +612,12 @@ def test_read_sql_view(self): iris_frame = sql.read_sql_query("SELECT * FROM iris_view", self.conn) self._check_iris_loaded_frame(iris_frame) + def test_read_sql_with_chunksize_no_result(self): + query = "SELECT * FROM iris_view WHERE SepalLength < 0.0" + with_batch = sql.read_sql_query(query, self.conn, chunksize=5) + without_batch = sql.read_sql_query(query, self.conn) + tm.assert_frame_equal(pd.concat(with_batch), without_batch) + def test_to_sql(self): sql.to_sql(self.test_frame1, "test_frame1", self.conn) assert sql.has_table("test_frame1", self.conn) From de5f395692bc1006a19e28ececb4ffd9e3b1b038 Mon Sep 17 00:00:00 2001 From: Johan Kahrstrom Date: Thu, 14 Jan 2021 17:54:15 +0000 Subject: [PATCH 2/2] Moved read_sql chunksize fix (issue 34411) release note to 1.3 --- doc/source/whatsnew/v1.2.0.rst | 1 - doc/source/whatsnew/v1.3.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 88f8c7c663444..8e9361125513b 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -731,7 +731,6 @@ I/O - :func:`read_csv` was closing user-provided binary file handles when ``engine="c"`` and an ``encoding`` was requested (:issue:`36980`) - Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`) - Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`) -- :meth:`read_sql` returned an empty generator if ``chunksize`` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`) - :meth:`DataFrame.to_excel`, :meth:`Series.to_excel`, :meth:`DataFrame.to_markdown`, and :meth:`Series.to_markdown` now support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`) - Bug in :func:`read_fwf` with ``skip_blank_lines=True`` was not skipping blank lines (:issue:`37758`) - Parse missing values using :func:`read_json` with ``dtype=False`` to ``NaN`` instead of ``None`` (:issue:`28501`) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 196d2f2d968a7..0cb46a5164674 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -281,6 +281,7 @@ I/O - :func:`read_excel` now respects :func:`set_option` (:issue:`34252`) - Bug in :func:`read_csv` not switching ``true_values`` and ``false_values`` for nullable ``boolean`` dtype (:issue:`34655`) - Bug in :func:`read_json` when ``orient="split"`` does not maintain numeric string index (:issue:`28556`) +- :meth:`read_sql` returned an empty generator if ``chunksize`` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`) Period ^^^^^^