From 275380e3d76e96d91c76116acea7f5bc614b7935 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 19 May 2023 14:09:13 -0700 Subject: [PATCH 1/3] BUG: read_sql reading duplicate tz aware columns --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/io/sql.py | 4 ++-- pandas/tests/io/test_sql.py | 42 ++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index c511626f060cb..710ffacee90d9 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -393,6 +393,7 @@ I/O - Bug in :func:`read_hdf` not properly closing store after a ``IndexError`` is raised (:issue:`52781`) - Bug in :func:`read_html`, style elements were read into DataFrames (:issue:`52197`) - Bug in :func:`read_html`, tail texts were removed together with elements containing ``display:none`` style (:issue:`51629`) +- Bug in :func:`read_sql` when reading multiple timezone aware columns with the same column name (:issue:`44421`) - Bug when writing and reading empty Stata dta files where dtype information was lost (:issue:`46240`) Period diff --git a/pandas/io/sql.py b/pandas/io/sql.py index ebb994f92d8ad..37f1dd4269937 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -131,13 +131,13 @@ def _parse_date_columns(data_frame, parse_dates): # we want to coerce datetime64_tz dtypes for now to UTC # we could in theory do a 'nice' conversion from a FixedOffset tz # GH11216 - for col_name, df_col in data_frame.items(): + for i, (col_name, df_col) in enumerate(data_frame.items()): if isinstance(df_col.dtype, DatetimeTZDtype) or col_name in parse_dates: try: fmt = parse_dates[col_name] except TypeError: fmt = None - data_frame[col_name] = _handle_date_column(df_col, format=fmt) + data_frame.iloc[:, i] = _handle_date_column(df_col, format=fmt) return data_frame diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 77e7e6f8d6c41..b7f1cb89ffe04 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -817,6 +817,48 @@ def psql_insert_copy(table, conn, keys, data_iter): tm.assert_frame_equal(result, expected) +@pytest.mark.db +@pytest.mark.parametrize("conn", postgresql_connectable) +def test_self_join_date_columns(conn, request): + # GH 44421 + conn = request.getfixturevalue(conn) + from sqlalchemy.engine import Engine + from sqlalchemy.sql import text + + create_table = text( + """ + CREATE TABLE person + ( + id serial constraint person_pkey primary key, + created_dt timestamp with time zone + ); + + INSERT INTO person + VALUES (1, '2021-01-01T00:00:00Z'); + """ + ) + if isinstance(conn, Engine): + with conn.connect() as con: + with con.begin(): + con.execute(create_table) + else: + with conn.begin(): + conn.execute(create_table) + + sql_query = ( + 'SELECT * FROM "person" AS p1 INNER JOIN "person" AS p2 ON p1.id = p2.id;' + ) + result = pd.read_sql(sql_query, conn) + expected = DataFrame( + [[1, Timestamp("2021", tz="UTC")] * 2], columns=["id", "created_dt"] * 2 + ) + tm.assert_frame_equal(result, expected) + + # Cleanup + with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL: + pandasSQL.drop_table("person") + + def test_execute_typeerror(sqlite_iris_engine): with pytest.raises(TypeError, match="pandas.io.sql.execute requires a connection"): with tm.assert_produces_warning( From a6d09ddd83cd50ac2966805bd51103632e3325d7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 19 May 2023 17:38:23 -0700 Subject: [PATCH 2/3] Use isetitem --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 37f1dd4269937..51cc3eacae284 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -137,7 +137,7 @@ def _parse_date_columns(data_frame, parse_dates): fmt = parse_dates[col_name] except TypeError: fmt = None - data_frame.iloc[:, i] = _handle_date_column(df_col, format=fmt) + data_frame.isetitem(i, _handle_date_column(df_col, format=fmt)) return data_frame From 776e3bafb75fa779eaf7c80617617ef2c6c19dfd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 22 May 2023 11:47:41 -0700 Subject: [PATCH 3/3] Move to postgres class --- pandas/tests/io/test_sql.py | 80 ++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 42 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 41ee9324a646a..7a3f7521d4a17 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -817,48 +817,6 @@ def psql_insert_copy(table, conn, keys, data_iter): tm.assert_frame_equal(result, expected) -@pytest.mark.db -@pytest.mark.parametrize("conn", postgresql_connectable) -def test_self_join_date_columns(conn, request): - # GH 44421 - conn = request.getfixturevalue(conn) - from sqlalchemy.engine import Engine - from sqlalchemy.sql import text - - create_table = text( - """ - CREATE TABLE person - ( - id serial constraint person_pkey primary key, - created_dt timestamp with time zone - ); - - INSERT INTO person - VALUES (1, '2021-01-01T00:00:00Z'); - """ - ) - if isinstance(conn, Engine): - with conn.connect() as con: - with con.begin(): - con.execute(create_table) - else: - with conn.begin(): - conn.execute(create_table) - - sql_query = ( - 'SELECT * FROM "person" AS p1 INNER JOIN "person" AS p2 ON p1.id = p2.id;' - ) - result = pd.read_sql(sql_query, conn) - expected = DataFrame( - [[1, Timestamp("2021", tz="UTC")] * 2], columns=["id", "created_dt"] * 2 - ) - tm.assert_frame_equal(result, expected) - - # Cleanup - with sql.SQLDatabase(conn, need_transaction=True) as pandasSQL: - pandasSQL.drop_table("person") - - def test_execute_typeerror(sqlite_iris_engine): with pytest.raises(TypeError, match="pandas.io.sql.execute requires a connection"): with tm.assert_produces_warning( @@ -2932,6 +2890,44 @@ def test_schema_support(self): res2 = pdsql.read_table("test_schema_other2") tm.assert_frame_equal(res1, res2) + def test_self_join_date_columns(self): + # GH 44421 + from sqlalchemy.engine import Engine + from sqlalchemy.sql import text + + create_table = text( + """ + CREATE TABLE person + ( + id serial constraint person_pkey primary key, + created_dt timestamp with time zone + ); + + INSERT INTO person + VALUES (1, '2021-01-01T00:00:00Z'); + """ + ) + if isinstance(self.conn, Engine): + with self.conn.connect() as con: + with con.begin(): + con.execute(create_table) + else: + with self.conn.begin(): + self.conn.execute(create_table) + + sql_query = ( + 'SELECT * FROM "person" AS p1 INNER JOIN "person" AS p2 ON p1.id = p2.id;' + ) + result = pd.read_sql(sql_query, self.conn) + expected = DataFrame( + [[1, Timestamp("2021", tz="UTC")] * 2], columns=["id", "created_dt"] * 2 + ) + tm.assert_frame_equal(result, expected) + + # Cleanup + with sql.SQLDatabase(self.conn, need_transaction=True) as pandasSQL: + pandasSQL.drop_table("person") + # ----------------------------------------------------------------------------- # -- Test Sqlite / MySQL fallback