From 97037bd56053db69e1cf35845e3d8d04d53fe353 Mon Sep 17 00:00:00 2001 From: Alexey Izbyshev Date: Tue, 21 Aug 2018 23:43:28 +0300 Subject: [PATCH 1/6] bpo-34482: Add tests for proper handling of non-UTF-8-encodable strings in datetime classes A follow-up of bpo-34454. --- Lib/test/datetimetester.py | 47 ++++++++++++++++--- .../2018-08-23-20-42-14.bpo-34482.BzQYUs.rst | 2 + 2 files changed, 42 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2018-08-23-20-42-14.bpo-34482.BzQYUs.rst diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 9c6e71c54d79d3..30ef34d2af9e6b 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -302,6 +302,8 @@ def test_tzname(self): self.assertEqual('UTC+09:30', timezone(9.5 * HOUR).tzname(None)) self.assertEqual('UTC-00:01', timezone(timedelta(minutes=-1)).tzname(None)) self.assertEqual('XYZ', timezone(-5 * HOUR, 'XYZ').tzname(None)) + # bpo-34482: Check that surrogates are handled properly. + self.assertEqual('\ud800', timezone(ZERO, '\ud800').tzname(None)) # Sub-minute offsets: self.assertEqual('UTC+01:06:40', timezone(timedelta(0, 4000)).tzname(None)) @@ -1307,6 +1309,14 @@ def test_strftime(self): except ValueError: pass + # bpo-34482: Check that surrogates don't cause a crash. + # FIXME: The C datetime implementation raises an exception + # while the pure-Python one succeeds. + try: + t.strftime('\ud800') + except UnicodeEncodeError: + pass + #check that this standard extension works t.strftime("%f") @@ -1746,6 +1756,9 @@ def test_isoformat(self): self.assertEqual(t.isoformat('T'), "0001-02-03T04:05:01.000123") self.assertEqual(t.isoformat(' '), "0001-02-03 04:05:01.000123") self.assertEqual(t.isoformat('\x00'), "0001-02-03\x0004:05:01.000123") + # bpo-34482: Check that surrogates are handled properly. + self.assertEqual(t.isoformat('\ud800'), + "0001-02-03\ud80004:05:01.000123") self.assertEqual(t.isoformat(timespec='hours'), "0001-02-03T04") self.assertEqual(t.isoformat(timespec='minutes'), "0001-02-03T04:05") self.assertEqual(t.isoformat(timespec='seconds'), "0001-02-03T04:05:01") @@ -1754,6 +1767,8 @@ def test_isoformat(self): self.assertEqual(t.isoformat(timespec='auto'), "0001-02-03T04:05:01.000123") self.assertEqual(t.isoformat(sep=' ', timespec='minutes'), "0001-02-03 04:05") self.assertRaises(ValueError, t.isoformat, timespec='foo') + # bpo-34482: Check that surrogates are handled properly. + self.assertRaises(ValueError, t.isoformat, timespec='\ud800') # str is ISO format with the separator forced to a blank. self.assertEqual(str(t), "0001-02-03 04:05:01.000123") @@ -2277,13 +2292,21 @@ def test_utcnow(self): self.assertLessEqual(abs(from_timestamp - from_now), tolerance) def test_strptime(self): - string = '2004-12-01 13:02:47.197' - format = '%Y-%m-%d %H:%M:%S.%f' - expected = _strptime._strptime_datetime(self.theclass, string, format) - got = self.theclass.strptime(string, format) - self.assertEqual(expected, got) - self.assertIs(type(expected), self.theclass) - self.assertIs(type(got), self.theclass) + inputs = [ + ('2004-12-01 13:02:47.197', '%Y-%m-%d %H:%M:%S.%f'), + # bpo-34482: Check that surrogates are handled properly. + ('2004-12-01\ud80013:02:47.197', '%Y-%m-%d\ud800%H:%M:%S.%f'), + ('2004\ud80012-01 13:02:47.197', '%Y\ud800%m-%d %H:%M:%S.%f'), + ('2004-12-01 13:02\ud80047.197', '%Y-%m-%d %H:%M\ud800%S.%f'), + ] + for string, format in inputs: + with self.subTest(string=string, format=format): + expected = _strptime._strptime_datetime(self.theclass, string, + format) + got = self.theclass.strptime(string, format) + self.assertEqual(expected, got) + self.assertIs(type(expected), self.theclass) + self.assertIs(type(got), self.theclass) strptime = self.theclass.strptime self.assertEqual(strptime("+0002", "%z").utcoffset(), 2 * MINUTE) @@ -2869,6 +2892,8 @@ def test_isoformat(self): self.assertEqual(t.isoformat(timespec='microseconds'), "12:34:56.123456") self.assertEqual(t.isoformat(timespec='auto'), "12:34:56.123456") self.assertRaises(ValueError, t.isoformat, timespec='monkey') + # bpo-34482: Check that surrogates are handled properly. + self.assertRaises(ValueError, t.isoformat, timespec='\ud800') t = self.theclass(hour=12, minute=34, second=56, microsecond=999500) self.assertEqual(t.isoformat(timespec='milliseconds'), "12:34:56.999") @@ -2919,6 +2944,14 @@ def test_strftime(self): # A naive object replaces %z and %Z with empty strings. self.assertEqual(t.strftime("'%z' '%Z'"), "'' ''") + # bpo-34482: Check that surrogates don't cause a crash. + # FIXME: The C datetime implementation raises an exception + # while the pure-Python one succeeds. + try: + t.strftime('\ud800') + except UnicodeEncodeError: + pass + def test_format(self): t = self.theclass(1, 2, 3, 4) self.assertEqual(t.__format__(''), str(t)) diff --git a/Misc/NEWS.d/next/Tests/2018-08-23-20-42-14.bpo-34482.BzQYUs.rst b/Misc/NEWS.d/next/Tests/2018-08-23-20-42-14.bpo-34482.BzQYUs.rst new file mode 100644 index 00000000000000..23da8366a4e354 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2018-08-23-20-42-14.bpo-34482.BzQYUs.rst @@ -0,0 +1,2 @@ +Add tests for proper handling of non-UTF-8-encodable strings in +:mod:`datetime` classes. Patch by Alexey Izbyshev. From a7e435d07bf0d7628258ca7fbacdfe673f8ba40f Mon Sep 17 00:00:00 2001 From: Alexey Izbyshev Date: Fri, 24 Aug 2018 00:26:56 +0300 Subject: [PATCH 2/6] Remove FIXMEs and use assertEqual in strftime tests --- Lib/test/datetimetester.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 30ef34d2af9e6b..a24afc274c9c78 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -1310,10 +1310,8 @@ def test_strftime(self): pass # bpo-34482: Check that surrogates don't cause a crash. - # FIXME: The C datetime implementation raises an exception - # while the pure-Python one succeeds. try: - t.strftime('\ud800') + self.assertEqual(t.strftime('%y\ud800%m'), '05\ud80003') except UnicodeEncodeError: pass @@ -2945,10 +2943,8 @@ def test_strftime(self): self.assertEqual(t.strftime("'%z' '%Z'"), "'' ''") # bpo-34482: Check that surrogates don't cause a crash. - # FIXME: The C datetime implementation raises an exception - # while the pure-Python one succeeds. try: - t.strftime('\ud800') + self.assertEqual(t.strftime('%H\ud800%M'), '01\ud80002') except UnicodeEncodeError: pass From 4603f7e28c43f1c5ea161c30531beb24413da4a5 Mon Sep 17 00:00:00 2001 From: Alexey Izbyshev Date: Fri, 24 Aug 2018 00:27:36 +0300 Subject: [PATCH 3/6] Remove the NEWS entry --- Misc/NEWS.d/next/Tests/2018-08-23-20-42-14.bpo-34482.BzQYUs.rst | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 Misc/NEWS.d/next/Tests/2018-08-23-20-42-14.bpo-34482.BzQYUs.rst diff --git a/Misc/NEWS.d/next/Tests/2018-08-23-20-42-14.bpo-34482.BzQYUs.rst b/Misc/NEWS.d/next/Tests/2018-08-23-20-42-14.bpo-34482.BzQYUs.rst deleted file mode 100644 index 23da8366a4e354..00000000000000 --- a/Misc/NEWS.d/next/Tests/2018-08-23-20-42-14.bpo-34482.BzQYUs.rst +++ /dev/null @@ -1,2 +0,0 @@ -Add tests for proper handling of non-UTF-8-encodable strings in -:mod:`datetime` classes. Patch by Alexey Izbyshev. From beca2bdfb19c05bcfa083b346001ffc173328f19 Mon Sep 17 00:00:00 2001 From: Alexey Izbyshev Date: Fri, 24 Aug 2018 00:31:26 +0300 Subject: [PATCH 4/6] Add a separate test for datetime.strftime() --- Lib/test/datetimetester.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index a24afc274c9c78..c66278c3f6687b 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -2373,6 +2373,13 @@ def test_more_strftime(self): t = t.replace(tzinfo=tz) self.assertEqual(t.strftime("%z"), "-0200" + z) + # bpo-34482: Check that surrogates don't cause a crash. + try: + self.assertEqual(t.strftime('%y\ud800%m %H\ud800%M'), + '04\ud80012 06\ud80022') + except UnicodeEncodeError: + pass + def test_extract(self): dt = self.theclass(2002, 3, 4, 18, 45, 3, 1234) self.assertEqual(dt.date(), date(2002, 3, 4)) From 51bd826c51cff3f451386c341d0e6b55c0af1f06 Mon Sep 17 00:00:00 2001 From: Alexey Izbyshev Date: Fri, 24 Aug 2018 23:07:36 +0300 Subject: [PATCH 5/6] Remove check for strftime() return value Its behavior across platforms is inconsistent and hard to test. --- Lib/test/datetimetester.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index c66278c3f6687b..8a1b37bf3e548d 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -1311,7 +1311,7 @@ def test_strftime(self): # bpo-34482: Check that surrogates don't cause a crash. try: - self.assertEqual(t.strftime('%y\ud800%m'), '05\ud80003') + t.strftime('%y\ud800%m') except UnicodeEncodeError: pass @@ -2375,8 +2375,7 @@ def test_more_strftime(self): # bpo-34482: Check that surrogates don't cause a crash. try: - self.assertEqual(t.strftime('%y\ud800%m %H\ud800%M'), - '04\ud80012 06\ud80022') + t.strftime('%y\ud800%m %H\ud800%M') except UnicodeEncodeError: pass @@ -2951,7 +2950,7 @@ def test_strftime(self): # bpo-34482: Check that surrogates don't cause a crash. try: - self.assertEqual(t.strftime('%H\ud800%M'), '01\ud80002') + t.strftime('%H\ud800%M') except UnicodeEncodeError: pass From f24c13cd5f035ea1b9c9993d3fadc2efd47593da Mon Sep 17 00:00:00 2001 From: Alexey Izbyshev Date: Fri, 31 Aug 2018 00:42:41 +0300 Subject: [PATCH 6/6] Don't mix regular and surrogate-related tests in test_strptime() --- Lib/test/datetimetester.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 8a1b37bf3e548d..ef24e295eeb153 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -2290,9 +2290,16 @@ def test_utcnow(self): self.assertLessEqual(abs(from_timestamp - from_now), tolerance) def test_strptime(self): + string = '2004-12-01 13:02:47.197' + format = '%Y-%m-%d %H:%M:%S.%f' + expected = _strptime._strptime_datetime(self.theclass, string, format) + got = self.theclass.strptime(string, format) + self.assertEqual(expected, got) + self.assertIs(type(expected), self.theclass) + self.assertIs(type(got), self.theclass) + + # bpo-34482: Check that surrogates are handled properly. inputs = [ - ('2004-12-01 13:02:47.197', '%Y-%m-%d %H:%M:%S.%f'), - # bpo-34482: Check that surrogates are handled properly. ('2004-12-01\ud80013:02:47.197', '%Y-%m-%d\ud800%H:%M:%S.%f'), ('2004\ud80012-01 13:02:47.197', '%Y\ud800%m-%d %H:%M:%S.%f'), ('2004-12-01 13:02\ud80047.197', '%Y-%m-%d %H:%M\ud800%S.%f'), @@ -2303,8 +2310,6 @@ def test_strptime(self): format) got = self.theclass.strptime(string, format) self.assertEqual(expected, got) - self.assertIs(type(expected), self.theclass) - self.assertIs(type(got), self.theclass) strptime = self.theclass.strptime self.assertEqual(strptime("+0002", "%z").utcoffset(), 2 * MINUTE)