From 5a6e0ea88611fbd3b2ae85d9fc6422cd8492ce7d Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 11 Jul 2024 13:33:15 -0700 Subject: [PATCH 1/4] GH-120754: Make PY_READ_MAX fit in byteobject size Currently if code tries to do a os.read larger than the max bytes object length, the size to read gets capped to `_PY_READ_MAX`, then the code tries to allocate a PyBytes which fails with an OverflowError as the size is larger than the max py bytes object. Since os.read is capping the max size anyways, cap it to a size which is always allocatable as a PyBytes. This changes behavior from bpo-21932 and enables the large file os.read test on 32 bit platforms, as it should cap the read to a platform acceptable size. --- Include/internal/pycore_fileutils.h | 9 +++++++-- Lib/test/test_os.py | 7 +++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index 13f86b01bbfe8f..74685a3e945459 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -70,8 +70,13 @@ extern PyObject* _Py_device_encoding(int); # define _PY_WRITE_MAX INT_MAX #else /* write() should truncate the input to PY_SSIZE_T_MAX bytes, - but it's safer to do it ourself to have a portable behaviour */ -# define _PY_READ_MAX PY_SSIZE_T_MAX + but it's safer to do it ourself to have a portable behaviour + + read() fills a PyBytes object, which has a capped size defined in + bytesobject.c. Prefer reading less data (meets the API spec) to causing + an overflow error. + */ +# define _PY_READ_MAX PY_SSIZE_T_MAX - 4096 # define _PY_WRITE_MAX PY_SSIZE_T_MAX #endif diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index f93937fb587386..6c0bb6a2cfc60c 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -234,10 +234,6 @@ def test_read(self): self.assertEqual(s, b"spam") @support.cpython_only - # Skip the test on 32-bit platforms: the number of bytes must fit in a - # Py_ssize_t type - @unittest.skipUnless(INT_MAX < PY_SSIZE_T_MAX, - "needs INT_MAX < PY_SSIZE_T_MAX") @support.bigmemtest(size=INT_MAX + 10, memuse=1, dry_run=False) def test_large_read(self, size): self.addCleanup(os_helper.unlink, os_helper.TESTFN) @@ -245,6 +241,9 @@ def test_large_read(self, size): # Issue #21932: Make sure that os.read() does not raise an # OverflowError for size larger than INT_MAX + # + # For 32 bit systems, it is expected the read doesn't read all the bytes + # but rather caps to a number it can reasonably read. with open(os_helper.TESTFN, "rb") as fp: data = os.read(fp.fileno(), size) From c60ae273f130f74ee02f01e309f26901b1d2d046 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 11 Jul 2024 13:58:13 -0700 Subject: [PATCH 2/4] Add news blurb --- .../2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst b/Misc/NEWS.d/next/Core and Builtins/2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst new file mode 100644 index 00000000000000..fbd109020fdaa0 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst @@ -0,0 +1,4 @@ +Cap read size to smaller than the max BytesObject size. read() in POSIX +returns at most the number of requseted bytes, this updates python ``os.read`` +to do similarly, and rather than throw an OverflowError in this case, return +a smaller than requseted byte object. From da8b55ca03a7119d4c75eef2c0456affc92d7071 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 16 Jul 2024 22:39:38 -0700 Subject: [PATCH 3/4] tweak news --- .../2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst b/Misc/NEWS.d/next/Core and Builtins/2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst index fbd109020fdaa0..00fd3d0bd9bb72 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst @@ -1,4 +1,3 @@ -Cap read size to smaller than the max BytesObject size. read() in POSIX -returns at most the number of requseted bytes, this updates python ``os.read`` -to do similarly, and rather than throw an OverflowError in this case, return -a smaller than requseted byte object. +``os.read()`` now caps read size to a size which is smaller than the max +BytesObject size. Previously, passing a size larger than max bytes object size +would result in an OverflowError, now the call is made. From 1f4d053f335c46f1b346f757e578839041ae50e8 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 16 Jul 2024 22:42:57 -0700 Subject: [PATCH 4/4] tweak news more --- .../2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst b/Misc/NEWS.d/next/Core and Builtins/2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst index 00fd3d0bd9bb72..47b50360225075 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-07-11-13-57-50.gh-issue-120754.C1HedA.rst @@ -1,3 +1,3 @@ -``os.read()`` now caps read size to a size which is smaller than the max -BytesObject size. Previously, passing a size larger than max bytes object size -would result in an OverflowError, now the call is made. +``os.read()`` caps read size smaller than the max bytes object size to avoid +getting an OverflowError that 'byte string is too large'. This makes it so the +read call is attempted (although still may fail for other reasons).