From f28f9c7a2d2a357d512d99e5fef06d97b2feb93b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 29 Jul 2024 15:57:23 +0200 Subject: [PATCH 01/10] protect against `ValueError` raised by `os.stat` --- Lib/filecmp.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/Lib/filecmp.py b/Lib/filecmp.py index 020ea694ca63e9..93de1f0e0d0581 100644 --- a/Lib/filecmp.py +++ b/Lib/filecmp.py @@ -138,10 +138,20 @@ def __init__(self, a, b, ignore=None, hide=None, *, shallow=True): # Initialize self.shallow = shallow def phase0(self): # Compare everything except common subdirectories - self.left_list = _filter(os.listdir(self.left), - self.hide+self.ignore) - self.right_list = _filter(os.listdir(self.right), - self.hide+self.ignore) + try: + left_full_list = os.listdir(self.left) + except (OSError, ValueError): + self.left_list = [] + else: + self.left_list = _filter(left_full_list, self.hide + self.ignore) + + try: + right_full_list = os.listdir(self.right) + except (OSError, ValueError): + self.right_list = [] + else: + self.right_list = _filter(right_full_list, self.hide + self.ignore) + self.left_list.sort() self.right_list.sort() @@ -164,12 +174,12 @@ def phase2(self): # Distinguish files, directories, funnies ok = True try: a_stat = os.stat(a_path) - except OSError: + except (OSError, ValueError): # print('Can\'t stat', a_path, ':', why.args[1]) ok = False try: b_stat = os.stat(b_path) - except OSError: + except (OSError, ValueError): # print('Can\'t stat', b_path, ':', why.args[1]) ok = False @@ -285,12 +295,12 @@ def cmpfiles(a, b, common, shallow=True): # Return: # 0 for equal # 1 for different -# 2 for funny cases (can't stat, etc.) +# 2 for funny cases (can't stat, NUL bytes, etc.) # def _cmp(a, b, sh, abs=abs, cmp=cmp): try: return not abs(cmp(a, b, sh)) - except OSError: + except (OSError, ValueError): return 2 From b63b315f9937c595ba7d3294b1ff06d3f08b3b00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 29 Jul 2024 16:36:40 +0200 Subject: [PATCH 02/10] add tests --- Lib/test/test_filecmp.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Lib/test/test_filecmp.py b/Lib/test/test_filecmp.py index 1fb47163719ede..c93eb7ab65d747 100644 --- a/Lib/test/test_filecmp.py +++ b/Lib/test/test_filecmp.py @@ -156,6 +156,17 @@ def test_cmpfiles(self): (['file'], ['file2'], []), "Comparing mismatched directories fails") + def test_compfiles_invalid_names(self): + for file, desc in [ + ('\x00', 'NUL bytes filename'), + (__file__ + '\x00', 'filename with embedded NUL bytes'), + ("\uD834\uDD1E.py", 'surrogate codes (MUSICAL SYMBOL G CLEF)'), + ('a' * 1_000_000, 'very long filename'), + ]: + for other_dir in [self.dir, self.dir_same, self.dir_diff]: + with self.subTest(desc, other_dir=other_dir): + res = filecmp.cmpfiles(self.dir, other_dir, [file]) + self.assertTupleEqual(res, ([], [], [file])) def _assert_lists(self, actual, expected): """Assert that two lists are equal, up to ordering.""" From 76496522aec67e86a0803863b20e68f541221fb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 29 Jul 2024 16:48:22 +0200 Subject: [PATCH 03/10] blurb --- .../Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst diff --git a/Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst b/Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst new file mode 100644 index 00000000000000..95230247febf5c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst @@ -0,0 +1,3 @@ +Handle :exc:`ValueError`\s raised by OS-related functions +in :class:`filecmp.dircmp` and :func:`filecmp.cmpfiles`. +Patch by Bénédikt Tran. From e70ea7050ac7f3423c3463c5a1a1369cac6200cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 09:40:34 +0200 Subject: [PATCH 04/10] revert protection in `os.listdir` --- Lib/filecmp.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/Lib/filecmp.py b/Lib/filecmp.py index 93de1f0e0d0581..d4c9e93ead7094 100644 --- a/Lib/filecmp.py +++ b/Lib/filecmp.py @@ -138,20 +138,12 @@ def __init__(self, a, b, ignore=None, hide=None, *, shallow=True): # Initialize self.shallow = shallow def phase0(self): # Compare everything except common subdirectories - try: - left_full_list = os.listdir(self.left) - except (OSError, ValueError): - self.left_list = [] - else: - self.left_list = _filter(left_full_list, self.hide + self.ignore) - - try: - right_full_list = os.listdir(self.right) - except (OSError, ValueError): - self.right_list = [] - else: - self.right_list = _filter(right_full_list, self.hide + self.ignore) - + # Do not protect os.listdir() against OSError or ValueError. + # See https://github.com/python/cpython/issues/122400. + self.left_list = _filter(os.listdir(self.left), + self.hide + self.ignore) + self.right_list = _filter(os.listdir(self.right), + self.hide+self.ignore) self.left_list.sort() self.right_list.sort() @@ -175,6 +167,8 @@ def phase2(self): # Distinguish files, directories, funnies try: a_stat = os.stat(a_path) except (OSError, ValueError): + # See https://github.com/python/cpython/issues/122400 + # for the rationale for protecting against ValueError. # print('Can\'t stat', a_path, ':', why.args[1]) ok = False try: From 13bc799757281b7cf9554e5b0f228c28c941072b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 09:40:41 +0200 Subject: [PATCH 05/10] improve test coverage --- Lib/test/test_filecmp.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_filecmp.py b/Lib/test/test_filecmp.py index c93eb7ab65d747..e88f1389de182a 100644 --- a/Lib/test/test_filecmp.py +++ b/Lib/test/test_filecmp.py @@ -156,7 +156,8 @@ def test_cmpfiles(self): (['file'], ['file2'], []), "Comparing mismatched directories fails") - def test_compfiles_invalid_names(self): + def test_cmpfiles_invalid_names(self): + # See https://github.com/python/cpython/issues/122400. for file, desc in [ ('\x00', 'NUL bytes filename'), (__file__ + '\x00', 'filename with embedded NUL bytes'), @@ -164,10 +165,27 @@ def test_compfiles_invalid_names(self): ('a' * 1_000_000, 'very long filename'), ]: for other_dir in [self.dir, self.dir_same, self.dir_diff]: - with self.subTest(desc, other_dir=other_dir): + with self.subTest(f'cmpfiles: {desc}', other_dir=other_dir): res = filecmp.cmpfiles(self.dir, other_dir, [file]) self.assertTupleEqual(res, ([], [], [file])) + def test_dircmp_invalid_names(self): + for bad_dir, desc in [ + ('\x00', 'NUL bytes dirname'), + (f'Top{os.sep}Mid\x00', 'dirname with embedded NUL bytes'), + ("\uD834\uDD1E", 'surrogate codes (MUSICAL SYMBOL G CLEF)'), + ('a' * 1_000_000, 'very long dirname'), + ]: + d = filecmp.dircmp(self.dir, bad_dir) + for target in [ + # attributes where os.listdir() raises OSError or ValueError + 'left_list', 'right_list', + 'left_only', 'right_only', 'common', + ]: + with self.subTest(f'dircmp: {desc}', target=target): + with self.assertRaises((OSError, ValueError)): + getattr(d, target) + def _assert_lists(self, actual, expected): """Assert that two lists are equal, up to ordering.""" self.assertEqual(sorted(actual), sorted(expected)) From 779a3b983996a31d6cf395fe8019d6a514189d28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 09:40:45 +0200 Subject: [PATCH 06/10] update NEWS --- .../Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst b/Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst index 95230247febf5c..768292e5352258 100644 --- a/Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst +++ b/Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst @@ -1,3 +1,2 @@ -Handle :exc:`ValueError`\s raised by OS-related functions -in :class:`filecmp.dircmp` and :func:`filecmp.cmpfiles`. -Patch by Bénédikt Tran. +Handle :exc:`ValueError`\s raised by :func:`os.stat` in +:attr:`filecmp.dircmp.`in :func:`filecmp.cmpfiles`. Patch by Bénédikt Tran. From a0407e1131ceffe4b0c1dc1f14c30fb9798af585 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 09:41:13 +0200 Subject: [PATCH 07/10] revert ws --- Lib/filecmp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/filecmp.py b/Lib/filecmp.py index d4c9e93ead7094..cbd1daa01a9986 100644 --- a/Lib/filecmp.py +++ b/Lib/filecmp.py @@ -141,7 +141,7 @@ def phase0(self): # Compare everything except common subdirectories # Do not protect os.listdir() against OSError or ValueError. # See https://github.com/python/cpython/issues/122400. self.left_list = _filter(os.listdir(self.left), - self.hide + self.ignore) + self.hide+self.ignore) self.right_list = _filter(os.listdir(self.right), self.hide+self.ignore) self.left_list.sort() From af1ecc380fcb503bc29a4b0b3f38d051e366dfe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 09:42:24 +0200 Subject: [PATCH 08/10] update NEWS --- .../Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst b/Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst index 768292e5352258..8c47e94f78d9f0 100644 --- a/Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst +++ b/Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst @@ -1,2 +1,3 @@ Handle :exc:`ValueError`\s raised by :func:`os.stat` in -:attr:`filecmp.dircmp.`in :func:`filecmp.cmpfiles`. Patch by Bénédikt Tran. +:class:`filecmp.dircmp` and :func:`filecmp.cmpfiles`. +Patch by Bénédikt Tran. From 6b01a41cb0776ae2f20b80c18859fba3bd94ca06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:23:16 +0200 Subject: [PATCH 09/10] add symmetric case --- Lib/test/test_filecmp.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_filecmp.py b/Lib/test/test_filecmp.py index e88f1389de182a..2c83667b22feb4 100644 --- a/Lib/test/test_filecmp.py +++ b/Lib/test/test_filecmp.py @@ -176,15 +176,19 @@ def test_dircmp_invalid_names(self): ("\uD834\uDD1E", 'surrogate codes (MUSICAL SYMBOL G CLEF)'), ('a' * 1_000_000, 'very long dirname'), ]: - d = filecmp.dircmp(self.dir, bad_dir) + d1 = filecmp.dircmp(self.dir, bad_dir) + d2 = filecmp.dircmp(bad_dir, self.dir) for target in [ # attributes where os.listdir() raises OSError or ValueError 'left_list', 'right_list', 'left_only', 'right_only', 'common', ]: - with self.subTest(f'dircmp: {desc}', target=target): + with self.subTest(f'dircmp(ok, bad): {desc}', target=target): with self.assertRaises((OSError, ValueError)): - getattr(d, target) + getattr(d1, target) + with self.subTest(f'dircmp(bad, ok): {desc}', target=target): + with self.assertRaises((OSError, ValueError)): + getattr(d2, target) def _assert_lists(self, actual, expected): """Assert that two lists are equal, up to ordering.""" From a41eb2e5514bf4f9d387b44c2de381ca22a2bab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:25:40 +0200 Subject: [PATCH 10/10] remove un-necessary comments --- Lib/filecmp.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/filecmp.py b/Lib/filecmp.py index cbd1daa01a9986..c5b8d854d77de3 100644 --- a/Lib/filecmp.py +++ b/Lib/filecmp.py @@ -138,8 +138,6 @@ def __init__(self, a, b, ignore=None, hide=None, *, shallow=True): # Initialize self.shallow = shallow def phase0(self): # Compare everything except common subdirectories - # Do not protect os.listdir() against OSError or ValueError. - # See https://github.com/python/cpython/issues/122400. self.left_list = _filter(os.listdir(self.left), self.hide+self.ignore) self.right_list = _filter(os.listdir(self.right),