From fbca843932858cc87a5f7f95e63ec372bd07f5a9 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 13 Dec 2022 12:59:22 +0000 Subject: [PATCH 1/4] Revert "Inconsistent date parsing of to_datetime (#42908)" This reverts commit 36e41654858259e3554255f8717e45cca7456a30. --- doc/source/user_guide/timeseries.rst | 2 +- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/_libs/tslibs/parsing.pyx | 24 ---------------------- pandas/tests/io/parser/test_parse_dates.py | 15 ++++++-------- 4 files changed, 8 insertions(+), 35 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 7e1368061322b..7186ecaab76bf 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -214,7 +214,7 @@ you can pass the ``dayfirst`` flag: .. warning:: - You see in the above example that ``dayfirst`` isn't strict. If a date + You see in the above example that ``dayfirst`` isn't strict, so if a date can't be parsed with the day being first it will be parsed as if ``dayfirst`` were ``False`` and a warning will also be raised. diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 5895a06792ffb..bbebfec9206d4 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -249,7 +249,7 @@ Notable bug fixes These are bug fixes that might have notable behavior changes. -.. _whatsnew_140.notable_bug_fixes.inconsistent_date_string_parsing: +.. _whatsnew_140.notable_bug_fixes.notable_bug_fix1: Inconsistent date string parsing ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 83f03f94d2fad..614db69425f4c 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -85,12 +85,6 @@ class DateParseError(ValueError): _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0, second=0, microsecond=0) -PARSING_WARNING_MSG = ( - "Parsing dates in {format} format when dayfirst={dayfirst} was specified. " - "This may lead to inconsistently parsed dates! Specify a format " - "to ensure consistent parsing." -) - cdef: set _not_datelike_strings = {"a", "A", "m", "M", "p", "P", "t", "T"} @@ -203,28 +197,10 @@ cdef object _parse_delimited_date(str date_string, bint dayfirst): # date_string can't be converted to date, above format return None, None - swapped_day_and_month = False if 1 <= month <= MAX_DAYS_IN_MONTH and 1 <= day <= MAX_DAYS_IN_MONTH \ and (month <= MAX_MONTH or day <= MAX_MONTH): if (month > MAX_MONTH or (day <= MAX_MONTH and dayfirst)) and can_swap: day, month = month, day - swapped_day_and_month = True - if dayfirst and not swapped_day_and_month: - warnings.warn( - PARSING_WARNING_MSG.format( - format="MM/DD/YYYY", - dayfirst="True", - ), - stacklevel=find_stack_level(), - ) - elif not dayfirst and swapped_day_and_month: - warnings.warn( - PARSING_WARNING_MSG.format( - format="DD/MM/YYYY", - dayfirst="False (the default)", - ), - stacklevel=find_stack_level(), - ) # In Python <= 3.6.0 there is no range checking for invalid dates # in C api, thus we call faster C version for 3.6.1 or newer return datetime_new(year, month, day, 0, 0, 0, 0, None), reso diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index c366613c2815f..1983b89986635 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -10,7 +10,6 @@ timezone, ) from io import StringIO -import warnings from dateutil.parser import parse as du_parse from hypothesis import given @@ -1665,16 +1664,16 @@ def test_invalid_parse_delimited_date(all_parsers, date_string): "date_string,dayfirst,expected", [ # %d/%m/%Y; month > 12 thus replacement + ("13/02/2019", False, datetime(2019, 2, 13)), ("13/02/2019", True, datetime(2019, 2, 13)), # %m/%d/%Y; day > 12 thus there will be no replacement ("02/13/2019", False, datetime(2019, 2, 13)), + ("02/13/2019", True, datetime(2019, 2, 13)), # %d/%m/%Y; dayfirst==True thus replacement ("04/02/2019", True, datetime(2019, 2, 4)), ], ) -def test_parse_delimited_date_swap_no_warning( - all_parsers, date_string, dayfirst, expected -): +def test_parse_delimited_date_swap(all_parsers, date_string, dayfirst, expected): parser = all_parsers expected = DataFrame({0: [expected]}, dtype="datetime64[ns]") result = parser.read_csv( @@ -1751,11 +1750,9 @@ def test_hypothesis_delimited_date( ) date_string = test_datetime.strftime(date_format.replace(" ", delimiter)) - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=UserWarning) - except_out_dateutil, result = _helper_hypothesis_delimited_date( - parse_datetime_string, date_string, dayfirst=dayfirst - ) + except_out_dateutil, result = _helper_hypothesis_delimited_date( + parse_datetime_string, date_string, dayfirst=dayfirst + ) except_in_dateutil, expected = _helper_hypothesis_delimited_date( du_parse, date_string, From d50802c4cc839620ea9da46a86c5db652b726d38 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 13 Dec 2022 13:09:48 +0000 Subject: [PATCH 2/4] post-merge fixup --- doc/source/user_guide/timeseries.rst | 2 +- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/tests/io/parser/test_parse_dates.py | 15 +++++++++------ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 7186ecaab76bf..7e1368061322b 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -214,7 +214,7 @@ you can pass the ``dayfirst`` flag: .. warning:: - You see in the above example that ``dayfirst`` isn't strict, so if a date + You see in the above example that ``dayfirst`` isn't strict. If a date can't be parsed with the day being first it will be parsed as if ``dayfirst`` were ``False`` and a warning will also be raised. diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index bbebfec9206d4..5895a06792ffb 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -249,7 +249,7 @@ Notable bug fixes These are bug fixes that might have notable behavior changes. -.. _whatsnew_140.notable_bug_fixes.notable_bug_fix1: +.. _whatsnew_140.notable_bug_fixes.inconsistent_date_string_parsing: Inconsistent date string parsing ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 1983b89986635..c366613c2815f 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -10,6 +10,7 @@ timezone, ) from io import StringIO +import warnings from dateutil.parser import parse as du_parse from hypothesis import given @@ -1664,16 +1665,16 @@ def test_invalid_parse_delimited_date(all_parsers, date_string): "date_string,dayfirst,expected", [ # %d/%m/%Y; month > 12 thus replacement - ("13/02/2019", False, datetime(2019, 2, 13)), ("13/02/2019", True, datetime(2019, 2, 13)), # %m/%d/%Y; day > 12 thus there will be no replacement ("02/13/2019", False, datetime(2019, 2, 13)), - ("02/13/2019", True, datetime(2019, 2, 13)), # %d/%m/%Y; dayfirst==True thus replacement ("04/02/2019", True, datetime(2019, 2, 4)), ], ) -def test_parse_delimited_date_swap(all_parsers, date_string, dayfirst, expected): +def test_parse_delimited_date_swap_no_warning( + all_parsers, date_string, dayfirst, expected +): parser = all_parsers expected = DataFrame({0: [expected]}, dtype="datetime64[ns]") result = parser.read_csv( @@ -1750,9 +1751,11 @@ def test_hypothesis_delimited_date( ) date_string = test_datetime.strftime(date_format.replace(" ", delimiter)) - except_out_dateutil, result = _helper_hypothesis_delimited_date( - parse_datetime_string, date_string, dayfirst=dayfirst - ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + except_out_dateutil, result = _helper_hypothesis_delimited_date( + parse_datetime_string, date_string, dayfirst=dayfirst + ) except_in_dateutil, expected = _helper_hypothesis_delimited_date( du_parse, date_string, From ab7a95ef65ccd4b0ba045b1503caa416bc7d17ff Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 13 Dec 2022 13:08:28 +0000 Subject: [PATCH 3/4] add test --- pandas/tests/scalar/timestamp/test_timestamp.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 0384417771056..5446e16c189b0 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -1082,3 +1082,11 @@ def test_as_unit_non_nano(self): == res.nanosecond == 0 ) + + +def test_delimited_date(): + # https://github.com/pandas-dev/pandas/issues/50231 + with tm.assert_produces_warning(None): + result = Timestamp("13-01-2000") + expected = Timestamp(2000, 1, 13) + assert result == expected From 2c0f706f402801ee2877bbc8fd58e69bf437c9fc Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 13 Dec 2022 13:12:51 +0000 Subject: [PATCH 4/4] whatsnew --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 4d3b2548f5fc5..caff9d7f5cb1a 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -775,6 +775,7 @@ Datetimelike - Bug in ``pandas.tseries.holiday.Holiday`` where a half-open date interval causes inconsistent return types from :meth:`USFederalHolidayCalendar.holidays` (:issue:`49075`) - Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`) - Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp`, ``datetime.datetime``, ``datetime.date``, or ``np.datetime64`` objects when non-ISO8601 ``format`` was passed (:issue:`49298`, :issue:`50036`) +- Bug in :class:`Timestamp` was showing ``UserWarning`` which was not actionable by users (:issue:`50232`) - Timedelta