From 1d85458c80b237f35d12a3ac7719389ab7da4eb5 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Wed, 5 Feb 2025 14:25:56 +0000 Subject: [PATCH 01/17] pythongh-117151: IO performance improvement, increase io.DEFAULT_BUFFER_SIZE to 128k, adjust open() to use max(st_blksize, io.DEFAULT_BUFFER_SIZE) --- Lib/_pyio.py | 15 ++++++++------- Lib/test/test_file.py | 12 ++++++++++++ ...2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst | 5 +++++ Modules/_io/_iomodule.c | 11 +++++++---- Modules/_io/_iomodule.h | 2 +- Modules/_io/clinic/_iomodule.c.h | 9 +++++---- 6 files changed, 38 insertions(+), 16 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst diff --git a/Lib/_pyio.py b/Lib/_pyio.py index b3a8f37d68acdb..5402653bb1b362 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -23,8 +23,8 @@ valid_seek_flags.add(os.SEEK_HOLE) valid_seek_flags.add(os.SEEK_DATA) -# open() uses st_blksize whenever we can -DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes +# open() uses max(st_blksize, io.DEFAULT_BUFFER_SIZE) when st_blksize is available +DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes # NOTE: Base classes defined here are registered with the "official" ABCs # defined in io.py. We don't use real inheritance though, because we don't want @@ -123,10 +123,11 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, the size of a fixed-size chunk buffer. When no buffering argument is given, the default buffering policy works as follows: - * Binary files are buffered in fixed-size chunks; the size of the buffer - is chosen using a heuristic trying to determine the underlying device's - "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. - On many systems, the buffer will typically be 4096 or 8192 bytes long. + * Binary files are buffered in fixed-size chunks; the size of the buffer + is set to `max(io.DEFAULT_BUFFER_SIZE, st_blksize)` using a heuristic + trying to determine the underlying device's "block size" when available + and falling back on `io.DEFAULT_BUFFER_SIZE`. + On most systems, the buffer will typically be 131072 bytes long. * "Interactive" text files (files for which isatty() returns True) use line buffering. Other text files use the policy described above @@ -242,7 +243,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, buffering = -1 line_buffering = True if buffering < 0: - buffering = raw._blksize + buffering = max(raw._blksize, DEFAULT_BUFFER_SIZE) if buffering < 0: raise ValueError("invalid buffering size") if buffering == 0: diff --git a/Lib/test/test_file.py b/Lib/test/test_file.py index 1206032a93566e..70c5256b9a562e 100644 --- a/Lib/test/test_file.py +++ b/Lib/test/test_file.py @@ -216,6 +216,18 @@ def testSetBufferSize(self): with self.assertWarnsRegex(RuntimeWarning, 'line buffering'): self._checkBufferSize(1) + def testDefaultBufferSize(self): + f = self.open(TESTFN, 'wb') + blksize = f.raw._blksize + f.write(bytes([0] * 5_000_000)) + f.close() + + f = self.open(TESTFN, 'rb') + data = f.read1() + expected_size = max(blksize, io.DEFAULT_BUFFER_SIZE) + self.assertEqual(len(data), expected_size) + f.close() + def testTruncateOnWindows(self): # SF bug # "file.truncate fault on windows" diff --git a/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst b/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst new file mode 100644 index 00000000000000..7140c5762e2279 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst @@ -0,0 +1,5 @@ +Increase ``io.DEFAULT_BUFFER_SIZE`` from 8k to 128k and adjust :func:`open` on +platforms where ``fstat`` provides a ``st_blksize`` field (such as Linux) to use +``max(io.DEFAULT_BUFFER_SIZE, device block size)`` rather than always using the +device block size. This should improve I/O performance. +Patch by Romain Morotti. diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 6622f2cabb908b..b5a16eb1abe8a9 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -132,9 +132,10 @@ the size of a fixed-size chunk buffer. When no buffering argument is given, the default buffering policy works as follows: * Binary files are buffered in fixed-size chunks; the size of the buffer - is chosen using a heuristic trying to determine the underlying device's - "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. - On many systems, the buffer will typically be 4096 or 8192 bytes long. + is set to `max(io.DEFAULT_BUFFER_SIZE, st_blksize)` using a heuristic + trying to determine the underlying device's "block size" when available + and falling back on `io.DEFAULT_BUFFER_SIZE`. + On most systems, the buffer will typically be 131072 bytes long. * "Interactive" text files (files for which isatty() returns True) use line buffering. Other text files use the policy described above @@ -200,7 +201,7 @@ static PyObject * _io_open_impl(PyObject *module, PyObject *file, const char *mode, int buffering, const char *encoding, const char *errors, const char *newline, int closefd, PyObject *opener) -/*[clinic end generated code: output=aefafc4ce2b46dc0 input=cd034e7cdfbf4e78]*/ +/*[clinic end generated code: output=aefafc4ce2b46dc0 input=bac1cd70f431fe9a]*/ { size_t i; @@ -368,6 +369,8 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, if (blksize_obj == NULL) goto error; buffering = PyLong_AsLong(blksize_obj); + if (buffering < DEFAULT_BUFFER_SIZE) + buffering = DEFAULT_BUFFER_SIZE; Py_DECREF(blksize_obj); if (buffering == -1 && PyErr_Occurred()) goto error; diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index afd638a120ba08..18cf20edf26f7d 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -78,7 +78,7 @@ extern Py_ssize_t _PyIO_find_line_ending( */ extern int _PyIO_trap_eintr(void); -#define DEFAULT_BUFFER_SIZE (8 * 1024) /* bytes */ +#define DEFAULT_BUFFER_SIZE (128 * 1024) /* bytes */ /* * Offset type for positioning. diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index 82932a23331ab6..1fe0b1de36342c 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -64,9 +64,10 @@ PyDoc_STRVAR(_io_open__doc__, "given, the default buffering policy works as follows:\n" "\n" "* Binary files are buffered in fixed-size chunks; the size of the buffer\n" -" is chosen using a heuristic trying to determine the underlying device\'s\n" -" \"block size\" and falling back on `io.DEFAULT_BUFFER_SIZE`.\n" -" On many systems, the buffer will typically be 4096 or 8192 bytes long.\n" +" is set to `max(io.DEFAULT_BUFFER_SIZE, st_blksize)` using a heuristic\n" +" trying to determine the underlying device\'s \"block size\" when available\n" +" and falling back on `io.DEFAULT_BUFFER_SIZE`.\n" +" On most systems, the buffer will typically be 131072 bytes long.\n" "\n" "* \"Interactive\" text files (files for which isatty() returns True)\n" " use line buffering. Other text files use the policy described above\n" @@ -406,4 +407,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=ec1df2ff5265ab16 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ec27299cc4de03e3 input=a9049054013a1b77]*/ From da988ba8ec062ba78b12167ef4365b507b580170 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Wed, 5 Feb 2025 17:39:40 +0000 Subject: [PATCH 02/17] construct the test data more efficiently, no intermediate list --- Lib/test/test_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_file.py b/Lib/test/test_file.py index 70c5256b9a562e..82108ce2d63c65 100644 --- a/Lib/test/test_file.py +++ b/Lib/test/test_file.py @@ -219,7 +219,7 @@ def testSetBufferSize(self): def testDefaultBufferSize(self): f = self.open(TESTFN, 'wb') blksize = f.raw._blksize - f.write(bytes([0] * 5_000_000)) + f.write(b"\0" * 5_000_000) f.close() f = self.open(TESTFN, 'rb') From eaa0df69b0f47774476593f9c7ab12c67ac8fa26 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Wed, 5 Feb 2025 18:05:23 +0000 Subject: [PATCH 03/17] adjust news and comments --- Lib/_pyio.py | 4 +--- .../2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst | 6 +++--- Modules/_io/_iomodule.c | 8 +++----- Modules/_io/clinic/_iomodule.c.h | 8 +++----- 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 5402653bb1b362..ac96e5a936ab57 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -124,9 +124,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, given, the default buffering policy works as follows: * Binary files are buffered in fixed-size chunks; the size of the buffer - is set to `max(io.DEFAULT_BUFFER_SIZE, st_blksize)` using a heuristic - trying to determine the underlying device's "block size" when available - and falling back on `io.DEFAULT_BUFFER_SIZE`. + is the maximum of the DEFAULT_BUFFER_SIZE and the device block size. On most systems, the buffer will typically be 131072 bytes long. * "Interactive" text files (files for which isatty() returns True) diff --git a/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst b/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst index 7140c5762e2279..89a1424573280a 100644 --- a/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst +++ b/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst @@ -1,5 +1,5 @@ Increase ``io.DEFAULT_BUFFER_SIZE`` from 8k to 128k and adjust :func:`open` on -platforms where ``fstat`` provides a ``st_blksize`` field (such as Linux) to use -``max(io.DEFAULT_BUFFER_SIZE, device block size)`` rather than always using the -device block size. This should improve I/O performance. +platforms where :meth:`os.fstat` provides a ``st_blksize`` field (such as Linux) +to use ``max(io.DEFAULT_BUFFER_SIZE, device block size)`` rather than always +using the device block size. This should improve I/O performance. Patch by Romain Morotti. diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index b5a16eb1abe8a9..b7dbd358d270a8 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -132,10 +132,8 @@ the size of a fixed-size chunk buffer. When no buffering argument is given, the default buffering policy works as follows: * Binary files are buffered in fixed-size chunks; the size of the buffer - is set to `max(io.DEFAULT_BUFFER_SIZE, st_blksize)` using a heuristic - trying to determine the underlying device's "block size" when available - and falling back on `io.DEFAULT_BUFFER_SIZE`. - On most systems, the buffer will typically be 131072 bytes long. + is the maximum of the DEFAULT_BUFFER_SIZE and the device block size. + On most systems, the buffer will typically be 131072 bytes long. * "Interactive" text files (files for which isatty() returns True) use line buffering. Other text files use the policy described above @@ -201,7 +199,7 @@ static PyObject * _io_open_impl(PyObject *module, PyObject *file, const char *mode, int buffering, const char *encoding, const char *errors, const char *newline, int closefd, PyObject *opener) -/*[clinic end generated code: output=aefafc4ce2b46dc0 input=bac1cd70f431fe9a]*/ +/*[clinic end generated code: output=aefafc4ce2b46dc0 input=105f6f1cb63368c4]*/ { size_t i; diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index 1fe0b1de36342c..0e822f7981405b 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -64,10 +64,8 @@ PyDoc_STRVAR(_io_open__doc__, "given, the default buffering policy works as follows:\n" "\n" "* Binary files are buffered in fixed-size chunks; the size of the buffer\n" -" is set to `max(io.DEFAULT_BUFFER_SIZE, st_blksize)` using a heuristic\n" -" trying to determine the underlying device\'s \"block size\" when available\n" -" and falling back on `io.DEFAULT_BUFFER_SIZE`.\n" -" On most systems, the buffer will typically be 131072 bytes long.\n" +" is the maximum of the DEFAULT_BUFFER_SIZE and the device block size.\n" +" On most systems, the buffer will typically be 131072 bytes long.\n" "\n" "* \"Interactive\" text files (files for which isatty() returns True)\n" " use line buffering. Other text files use the policy described above\n" @@ -407,4 +405,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=ec27299cc4de03e3 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=06c3900ae6680e57 input=a9049054013a1b77]*/ From 1af8e18a15092f4351d899678b597b0388a83180 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Wed, 5 Feb 2025 18:07:17 +0000 Subject: [PATCH 04/17] curly braces --- Modules/_io/_iomodule.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index b7dbd358d270a8..7f8281f7b9a809 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -368,7 +368,9 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, goto error; buffering = PyLong_AsLong(blksize_obj); if (buffering < DEFAULT_BUFFER_SIZE) + { buffering = DEFAULT_BUFFER_SIZE; + } Py_DECREF(blksize_obj); if (buffering == -1 && PyErr_Occurred()) goto error; From 18f472c5002563473292370b2142f9dfd62634d7 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Wed, 5 Feb 2025 18:19:43 +0000 Subject: [PATCH 05/17] cap the block size to 8 MB. --- Lib/_pyio.py | 2 +- Modules/_io/_iomodule.c | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index ac96e5a936ab57..6fdef12b2dfe1a 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -241,7 +241,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, buffering = -1 line_buffering = True if buffering < 0: - buffering = max(raw._blksize, DEFAULT_BUFFER_SIZE) + buffering = max(min(raw._blksize, 8192 * 1024), DEFAULT_BUFFER_SIZE) if buffering < 0: raise ValueError("invalid buffering size") if buffering == 0: diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 7f8281f7b9a809..f3417edb3e1c54 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -367,6 +367,10 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, if (blksize_obj == NULL) goto error; buffering = PyLong_AsLong(blksize_obj); + if (buffering > 8192 * 1024) + { + buffering = 8192 * 1024; + } if (buffering < DEFAULT_BUFFER_SIZE) { buffering = DEFAULT_BUFFER_SIZE; From d7e0cf7280cb044f13cd68ef4c4b4be2e4be88a0 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Wed, 5 Feb 2025 18:22:24 +0000 Subject: [PATCH 06/17] use with to open file --- Lib/test/test_file.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_file.py b/Lib/test/test_file.py index 82108ce2d63c65..e09c37728a4f76 100644 --- a/Lib/test/test_file.py +++ b/Lib/test/test_file.py @@ -217,16 +217,14 @@ def testSetBufferSize(self): self._checkBufferSize(1) def testDefaultBufferSize(self): - f = self.open(TESTFN, 'wb') - blksize = f.raw._blksize - f.write(b"\0" * 5_000_000) - f.close() - - f = self.open(TESTFN, 'rb') - data = f.read1() - expected_size = max(blksize, io.DEFAULT_BUFFER_SIZE) - self.assertEqual(len(data), expected_size) - f.close() + with self.open(TESTFN, 'wb') as f: + blksize = f.raw._blksize + f.write(b"\0" * 5_000_000) + + with self.open(TESTFN, 'rb') as f: + data = f.read1() + expected_size = max(blksize, io.DEFAULT_BUFFER_SIZE) + self.assertEqual(len(data), expected_size) def testTruncateOnWindows(self): # SF bug From 07be8afd18a03c8bac1181895c8914ae2b01fec8 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Wed, 5 Feb 2025 18:23:53 +0000 Subject: [PATCH 07/17] cap the block size to 8 MB. --- Lib/test/test_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_file.py b/Lib/test/test_file.py index e09c37728a4f76..029c903e01afb9 100644 --- a/Lib/test/test_file.py +++ b/Lib/test/test_file.py @@ -223,7 +223,7 @@ def testDefaultBufferSize(self): with self.open(TESTFN, 'rb') as f: data = f.read1() - expected_size = max(blksize, io.DEFAULT_BUFFER_SIZE) + expected_size = max(min(blksize, 8192 * 1024), io.DEFAULT_BUFFER_SIZE) self.assertEqual(len(data), expected_size) def testTruncateOnWindows(self): From 8546600f034fcb0c63ee54f34651f86321da1358 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Thu, 6 Feb 2025 10:43:55 +0000 Subject: [PATCH 08/17] add constant MAXIMUM_BUFFER_SIZE --- Lib/_pyio.py | 8 +++++--- Modules/_io/_iomodule.c | 24 +++++++++++------------- Modules/_io/_iomodule.h | 1 + Modules/_io/clinic/_iomodule.c.h | 7 ++++--- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 6fdef12b2dfe1a..c48a7504d9f758 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -23,8 +23,10 @@ valid_seek_flags.add(os.SEEK_HOLE) valid_seek_flags.add(os.SEEK_DATA) -# open() uses max(st_blksize, io.DEFAULT_BUFFER_SIZE) when st_blksize is available +# open() uses max(min(blocksize, MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) +# when the device block size is available. DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes +MAXIMUM_BUFFER_SIZE = 8192 * 1024 # bytes # NOTE: Base classes defined here are registered with the "official" ABCs # defined in io.py. We don't use real inheritance though, because we don't want @@ -125,7 +127,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, * Binary files are buffered in fixed-size chunks; the size of the buffer is the maximum of the DEFAULT_BUFFER_SIZE and the device block size. - On most systems, the buffer will typically be 131072 bytes long. + On most systems, the buffer will typically be 128 kilobytes long. * "Interactive" text files (files for which isatty() returns True) use line buffering. Other text files use the policy described above @@ -241,7 +243,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, buffering = -1 line_buffering = True if buffering < 0: - buffering = max(min(raw._blksize, 8192 * 1024), DEFAULT_BUFFER_SIZE) + buffering = max(min(raw._blksize, MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) if buffering < 0: raise ValueError("invalid buffering size") if buffering == 0: diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index f3417edb3e1c54..f4c757579775f8 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -60,8 +60,12 @@ PyDoc_STRVAR(module_doc, "DEFAULT_BUFFER_SIZE\n" "\n" " An int containing the default buffer size used by the module's buffered\n" -" I/O classes. open() uses the file's blksize (as obtained by os.stat) if\n" -" possible.\n" +" I/O classes.\n" +"\n" +"MAXIMUM_BUFFER_SIZE\n" +"\n" +" An int containing the maximum buffer size used by the module's buffered\n" +" I/O classes.\n" ); @@ -132,8 +136,9 @@ the size of a fixed-size chunk buffer. When no buffering argument is given, the default buffering policy works as follows: * Binary files are buffered in fixed-size chunks; the size of the buffer - is the maximum of the DEFAULT_BUFFER_SIZE and the device block size. - On most systems, the buffer will typically be 131072 bytes long. + is max(min(blocksize, MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) + when the device block size is available. + On most systems, the buffer will typically be 128 kilobytes long. * "Interactive" text files (files for which isatty() returns True) use line buffering. Other text files use the policy described above @@ -199,7 +204,7 @@ static PyObject * _io_open_impl(PyObject *module, PyObject *file, const char *mode, int buffering, const char *encoding, const char *errors, const char *newline, int closefd, PyObject *opener) -/*[clinic end generated code: output=aefafc4ce2b46dc0 input=105f6f1cb63368c4]*/ +/*[clinic end generated code: output=aefafc4ce2b46dc0 input=e1e2d41c6e922cbe]*/ { size_t i; @@ -367,14 +372,7 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, if (blksize_obj == NULL) goto error; buffering = PyLong_AsLong(blksize_obj); - if (buffering > 8192 * 1024) - { - buffering = 8192 * 1024; - } - if (buffering < DEFAULT_BUFFER_SIZE) - { - buffering = DEFAULT_BUFFER_SIZE; - } + buffering = Py_MAX(Py_MIN(buffering, MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE); Py_DECREF(blksize_obj); if (buffering == -1 && PyErr_Occurred()) goto error; diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index 18cf20edf26f7d..dadf0d2cb165c8 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -79,6 +79,7 @@ extern Py_ssize_t _PyIO_find_line_ending( extern int _PyIO_trap_eintr(void); #define DEFAULT_BUFFER_SIZE (128 * 1024) /* bytes */ +#define MAXIMUM_BUFFER_SIZE (8192 * 1024) /* bytes */ /* * Offset type for positioning. diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index 0e822f7981405b..798db3a15aad1c 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -64,8 +64,9 @@ PyDoc_STRVAR(_io_open__doc__, "given, the default buffering policy works as follows:\n" "\n" "* Binary files are buffered in fixed-size chunks; the size of the buffer\n" -" is the maximum of the DEFAULT_BUFFER_SIZE and the device block size.\n" -" On most systems, the buffer will typically be 131072 bytes long.\n" +" is max(min(blocksize, MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE)\n" +" when the device block size is available.\n" +" On most systems, the buffer will typically be 128 kilobytes long.\n" "\n" "* \"Interactive\" text files (files for which isatty() returns True)\n" " use line buffering. Other text files use the policy described above\n" @@ -405,4 +406,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=06c3900ae6680e57 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0c6aa26a7f35f9bd input=a9049054013a1b77]*/ From a1df21dcf5dee636bc7c52d5ba2813af5fd8841d Mon Sep 17 00:00:00 2001 From: rmorotti Date: Thu, 6 Feb 2025 10:49:32 +0000 Subject: [PATCH 09/17] update docstring --- Doc/library/functions.rst | 8 ++++---- Lib/_pyio.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index a7549b9bce76e2..352488acf0511b 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1405,10 +1405,10 @@ are always available. They are listed here in alphabetical order. :func:`io.TextIOWrapper.reconfigure`. When no *buffering* argument is given, the default buffering policy works as follows: - * Binary files are buffered in fixed-size chunks; the size of the buffer is - chosen using a heuristic trying to determine the underlying device's "block - size" and falling back on :const:`io.DEFAULT_BUFFER_SIZE`. On many systems, - the buffer will typically be 4096 or 8192 bytes long. + * Binary files are buffered in fixed-size chunks; the size of the buffer + is max(min(blocksize, MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) + when the device block size is available. + On most systems, the buffer will typically be 128 kilobytes long. * "Interactive" text files (files for which :meth:`~io.IOBase.isatty` returns ``True``) use line buffering. Other text files use the policy diff --git a/Lib/_pyio.py b/Lib/_pyio.py index c48a7504d9f758..dfab460fdc2a7f 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -126,7 +126,8 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, given, the default buffering policy works as follows: * Binary files are buffered in fixed-size chunks; the size of the buffer - is the maximum of the DEFAULT_BUFFER_SIZE and the device block size. + is max(min(blocksize, MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) + when the device block size is available. On most systems, the buffer will typically be 128 kilobytes long. * "Interactive" text files (files for which isatty() returns True) From 4e0e1e1807a42d0c99be1f4e028041143f077783 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Thu, 6 Feb 2025 13:17:57 +0000 Subject: [PATCH 10/17] expose MAXIMUM_BUFFER_SIZE --- Lib/io.py | 14 ++++++++++---- Modules/_io/_iomodule.c | 4 ++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/Lib/io.py b/Lib/io.py index f0e2fa15d5abcf..32b96edf45d741 100644 --- a/Lib/io.py +++ b/Lib/io.py @@ -29,8 +29,12 @@ DEFAULT_BUFFER_SIZE An int containing the default buffer size used by the module's buffered - I/O classes. open() uses the file's blksize (as obtained by os.stat) if - possible. + I/O classes. + +MAXIMUM_BUFFER_SIZE + + An int containing the maximum buffer size used by the module's buffered + I/O classes. """ # New I/O library conforming to PEP 3116. @@ -46,13 +50,15 @@ "BufferedReader", "BufferedWriter", "BufferedRWPair", "BufferedRandom", "TextIOBase", "TextIOWrapper", "UnsupportedOperation", "SEEK_SET", "SEEK_CUR", "SEEK_END", - "DEFAULT_BUFFER_SIZE", "text_encoding", "IncrementalNewlineDecoder"] + "DEFAULT_BUFFER_SIZE", "MAXIMUM_BUFFER_SIZE", "text_encoding", + "IncrementalNewlineDecoder"] import _io import abc -from _io import (DEFAULT_BUFFER_SIZE, BlockingIOError, UnsupportedOperation, +from _io import (DEFAULT_BUFFER_SIZE, MAXIMUM_BUFFER_SIZE, + BlockingIOError, UnsupportedOperation, open, open_code, FileIO, BytesIO, StringIO, BufferedReader, BufferedWriter, BufferedRWPair, BufferedRandom, IncrementalNewlineDecoder, text_encoding, TextIOWrapper) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index f4c757579775f8..2a784dac343208 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -660,6 +660,10 @@ iomodule_exec(PyObject *m) if (PyModule_AddIntMacro(m, DEFAULT_BUFFER_SIZE) < 0) return -1; + /* MAXIMUM_BUFFER_SIZE */ + if (PyModule_AddIntMacro(m, MAXIMUM_BUFFER_SIZE) < 0) + return -1; + /* UnsupportedOperation inherits from ValueError and OSError */ state->unsupported_operation = PyObject_CallFunction( (PyObject *)&PyType_Type, "s(OO){}", From 0ab45926245546948d88769c6ffce99225d1b37e Mon Sep 17 00:00:00 2001 From: rmorotti Date: Fri, 7 Feb 2025 09:26:16 +0000 Subject: [PATCH 11/17] Revert "expose MAXIMUM_BUFFER_SIZE" This reverts commit 4e0e1e1807a42d0c99be1f4e028041143f077783. --- Lib/io.py | 14 ++++---------- Modules/_io/_iomodule.c | 4 ---- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/Lib/io.py b/Lib/io.py index 32b96edf45d741..f0e2fa15d5abcf 100644 --- a/Lib/io.py +++ b/Lib/io.py @@ -29,12 +29,8 @@ DEFAULT_BUFFER_SIZE An int containing the default buffer size used by the module's buffered - I/O classes. - -MAXIMUM_BUFFER_SIZE - - An int containing the maximum buffer size used by the module's buffered - I/O classes. + I/O classes. open() uses the file's blksize (as obtained by os.stat) if + possible. """ # New I/O library conforming to PEP 3116. @@ -50,15 +46,13 @@ "BufferedReader", "BufferedWriter", "BufferedRWPair", "BufferedRandom", "TextIOBase", "TextIOWrapper", "UnsupportedOperation", "SEEK_SET", "SEEK_CUR", "SEEK_END", - "DEFAULT_BUFFER_SIZE", "MAXIMUM_BUFFER_SIZE", "text_encoding", - "IncrementalNewlineDecoder"] + "DEFAULT_BUFFER_SIZE", "text_encoding", "IncrementalNewlineDecoder"] import _io import abc -from _io import (DEFAULT_BUFFER_SIZE, MAXIMUM_BUFFER_SIZE, - BlockingIOError, UnsupportedOperation, +from _io import (DEFAULT_BUFFER_SIZE, BlockingIOError, UnsupportedOperation, open, open_code, FileIO, BytesIO, StringIO, BufferedReader, BufferedWriter, BufferedRWPair, BufferedRandom, IncrementalNewlineDecoder, text_encoding, TextIOWrapper) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 2a784dac343208..f4c757579775f8 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -660,10 +660,6 @@ iomodule_exec(PyObject *m) if (PyModule_AddIntMacro(m, DEFAULT_BUFFER_SIZE) < 0) return -1; - /* MAXIMUM_BUFFER_SIZE */ - if (PyModule_AddIntMacro(m, MAXIMUM_BUFFER_SIZE) < 0) - return -1; - /* UnsupportedOperation inherits from ValueError and OSError */ state->unsupported_operation = PyObject_CallFunction( (PyObject *)&PyType_Type, "s(OO){}", From 209706e0443780172db56a0110690043f0a0da09 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Fri, 7 Feb 2025 09:29:50 +0000 Subject: [PATCH 12/17] rename to _MAXIMUM_BUFFER_SIZE --- Doc/library/functions.rst | 2 +- Lib/_pyio.py | 8 ++++---- Modules/_io/_iomodule.c | 8 ++++---- Modules/_io/_iomodule.h | 2 +- Modules/_io/clinic/_iomodule.c.h | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 352488acf0511b..235d6db289308a 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1406,7 +1406,7 @@ are always available. They are listed here in alphabetical order. given, the default buffering policy works as follows: * Binary files are buffered in fixed-size chunks; the size of the buffer - is max(min(blocksize, MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) + is max(min(blocksize, _MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) when the device block size is available. On most systems, the buffer will typically be 128 kilobytes long. diff --git a/Lib/_pyio.py b/Lib/_pyio.py index dfab460fdc2a7f..b2a029e4a460e8 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -23,10 +23,10 @@ valid_seek_flags.add(os.SEEK_HOLE) valid_seek_flags.add(os.SEEK_DATA) -# open() uses max(min(blocksize, MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) +# open() uses max(min(blocksize, _MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) # when the device block size is available. DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes -MAXIMUM_BUFFER_SIZE = 8192 * 1024 # bytes +_MAXIMUM_BUFFER_SIZE = 8192 * 1024 # bytes # NOTE: Base classes defined here are registered with the "official" ABCs # defined in io.py. We don't use real inheritance though, because we don't want @@ -126,7 +126,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, given, the default buffering policy works as follows: * Binary files are buffered in fixed-size chunks; the size of the buffer - is max(min(blocksize, MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) + is max(min(blocksize, _MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) when the device block size is available. On most systems, the buffer will typically be 128 kilobytes long. @@ -244,7 +244,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, buffering = -1 line_buffering = True if buffering < 0: - buffering = max(min(raw._blksize, MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) + buffering = max(min(raw._blksize, _MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) if buffering < 0: raise ValueError("invalid buffering size") if buffering == 0: diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index f4c757579775f8..eb93f7481d6eb2 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -62,7 +62,7 @@ PyDoc_STRVAR(module_doc, " An int containing the default buffer size used by the module's buffered\n" " I/O classes.\n" "\n" -"MAXIMUM_BUFFER_SIZE\n" +"_MAXIMUM_BUFFER_SIZE\n" "\n" " An int containing the maximum buffer size used by the module's buffered\n" " I/O classes.\n" @@ -136,7 +136,7 @@ the size of a fixed-size chunk buffer. When no buffering argument is given, the default buffering policy works as follows: * Binary files are buffered in fixed-size chunks; the size of the buffer - is max(min(blocksize, MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) + is max(min(blocksize, _MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) when the device block size is available. On most systems, the buffer will typically be 128 kilobytes long. @@ -204,7 +204,7 @@ static PyObject * _io_open_impl(PyObject *module, PyObject *file, const char *mode, int buffering, const char *encoding, const char *errors, const char *newline, int closefd, PyObject *opener) -/*[clinic end generated code: output=aefafc4ce2b46dc0 input=e1e2d41c6e922cbe]*/ +/*[clinic end generated code: output=aefafc4ce2b46dc0 input=a35153cf2829c537]*/ { size_t i; @@ -372,7 +372,7 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, if (blksize_obj == NULL) goto error; buffering = PyLong_AsLong(blksize_obj); - buffering = Py_MAX(Py_MIN(buffering, MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE); + buffering = Py_MAX(Py_MIN(buffering, _MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE); Py_DECREF(blksize_obj); if (buffering == -1 && PyErr_Occurred()) goto error; diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index dadf0d2cb165c8..291f20739445de 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -79,7 +79,7 @@ extern Py_ssize_t _PyIO_find_line_ending( extern int _PyIO_trap_eintr(void); #define DEFAULT_BUFFER_SIZE (128 * 1024) /* bytes */ -#define MAXIMUM_BUFFER_SIZE (8192 * 1024) /* bytes */ +#define _MAXIMUM_BUFFER_SIZE (8192 * 1024) /* bytes */ /* * Offset type for positioning. diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index 798db3a15aad1c..3139cf5a10ddf1 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -64,7 +64,7 @@ PyDoc_STRVAR(_io_open__doc__, "given, the default buffering policy works as follows:\n" "\n" "* Binary files are buffered in fixed-size chunks; the size of the buffer\n" -" is max(min(blocksize, MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE)\n" +" is max(min(blocksize, _MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE)\n" " when the device block size is available.\n" " On most systems, the buffer will typically be 128 kilobytes long.\n" "\n" @@ -406,4 +406,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=0c6aa26a7f35f9bd input=a9049054013a1b77]*/ +/*[clinic end generated code: output=48666bc88d5c8b44 input=a9049054013a1b77]*/ From 7ce54c4c7627e579c2e1c52b9918eeff3375e6c0 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Mon, 10 Feb 2025 10:39:47 +0000 Subject: [PATCH 13/17] remove constant _MAXIMUM_BUFFER_SIZE --- Doc/library/functions.rst | 2 +- Lib/_pyio.py | 7 +++---- Modules/_io/_iomodule.c | 11 +++-------- Modules/_io/_iomodule.h | 1 - Modules/_io/clinic/_iomodule.c.h | 4 ++-- 5 files changed, 9 insertions(+), 16 deletions(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 235d6db289308a..55bd856d424bc3 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1406,7 +1406,7 @@ are always available. They are listed here in alphabetical order. given, the default buffering policy works as follows: * Binary files are buffered in fixed-size chunks; the size of the buffer - is max(min(blocksize, _MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) + is max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) when the device block size is available. On most systems, the buffer will typically be 128 kilobytes long. diff --git a/Lib/_pyio.py b/Lib/_pyio.py index b2a029e4a460e8..9903c217c0fcf8 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -23,10 +23,9 @@ valid_seek_flags.add(os.SEEK_HOLE) valid_seek_flags.add(os.SEEK_DATA) -# open() uses max(min(blocksize, _MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) +# open() uses max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) # when the device block size is available. DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes -_MAXIMUM_BUFFER_SIZE = 8192 * 1024 # bytes # NOTE: Base classes defined here are registered with the "official" ABCs # defined in io.py. We don't use real inheritance though, because we don't want @@ -126,7 +125,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, given, the default buffering policy works as follows: * Binary files are buffered in fixed-size chunks; the size of the buffer - is max(min(blocksize, _MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) + is max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) when the device block size is available. On most systems, the buffer will typically be 128 kilobytes long. @@ -244,7 +243,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, buffering = -1 line_buffering = True if buffering < 0: - buffering = max(min(raw._blksize, _MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) + buffering = max(min(raw._blksize, 8192 * 1024), DEFAULT_BUFFER_SIZE) if buffering < 0: raise ValueError("invalid buffering size") if buffering == 0: diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index eb93f7481d6eb2..cd8a995ff90843 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -60,11 +60,6 @@ PyDoc_STRVAR(module_doc, "DEFAULT_BUFFER_SIZE\n" "\n" " An int containing the default buffer size used by the module's buffered\n" -" I/O classes.\n" -"\n" -"_MAXIMUM_BUFFER_SIZE\n" -"\n" -" An int containing the maximum buffer size used by the module's buffered\n" " I/O classes.\n" ); @@ -136,7 +131,7 @@ the size of a fixed-size chunk buffer. When no buffering argument is given, the default buffering policy works as follows: * Binary files are buffered in fixed-size chunks; the size of the buffer - is max(min(blocksize, _MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE) + is max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) when the device block size is available. On most systems, the buffer will typically be 128 kilobytes long. @@ -204,7 +199,7 @@ static PyObject * _io_open_impl(PyObject *module, PyObject *file, const char *mode, int buffering, const char *encoding, const char *errors, const char *newline, int closefd, PyObject *opener) -/*[clinic end generated code: output=aefafc4ce2b46dc0 input=a35153cf2829c537]*/ +/*[clinic end generated code: output=aefafc4ce2b46dc0 input=28027fdaabb8d744]*/ { size_t i; @@ -372,7 +367,7 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, if (blksize_obj == NULL) goto error; buffering = PyLong_AsLong(blksize_obj); - buffering = Py_MAX(Py_MIN(buffering, _MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE); + buffering = Py_MAX(Py_MIN(buffering, 8192 * 1024), DEFAULT_BUFFER_SIZE); Py_DECREF(blksize_obj); if (buffering == -1 && PyErr_Occurred()) goto error; diff --git a/Modules/_io/_iomodule.h b/Modules/_io/_iomodule.h index 291f20739445de..18cf20edf26f7d 100644 --- a/Modules/_io/_iomodule.h +++ b/Modules/_io/_iomodule.h @@ -79,7 +79,6 @@ extern Py_ssize_t _PyIO_find_line_ending( extern int _PyIO_trap_eintr(void); #define DEFAULT_BUFFER_SIZE (128 * 1024) /* bytes */ -#define _MAXIMUM_BUFFER_SIZE (8192 * 1024) /* bytes */ /* * Offset type for positioning. diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index 3139cf5a10ddf1..9a41b364284459 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -64,7 +64,7 @@ PyDoc_STRVAR(_io_open__doc__, "given, the default buffering policy works as follows:\n" "\n" "* Binary files are buffered in fixed-size chunks; the size of the buffer\n" -" is max(min(blocksize, _MAXIMUM_BUFFER_SIZE), DEFAULT_BUFFER_SIZE)\n" +" is max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE)\n" " when the device block size is available.\n" " On most systems, the buffer will typically be 128 kilobytes long.\n" "\n" @@ -406,4 +406,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=48666bc88d5c8b44 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=2eaf6e914503bcfd input=a9049054013a1b77]*/ From 4e9b68d5ce0656d6d6fc8c8881883c19d2859329 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Mon, 10 Feb 2025 10:40:33 +0000 Subject: [PATCH 14/17] update news --- .../Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst b/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst index 89a1424573280a..aad7e1193e8548 100644 --- a/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst +++ b/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst @@ -1,5 +1,5 @@ Increase ``io.DEFAULT_BUFFER_SIZE`` from 8k to 128k and adjust :func:`open` on platforms where :meth:`os.fstat` provides a ``st_blksize`` field (such as Linux) -to use ``max(io.DEFAULT_BUFFER_SIZE, device block size)`` rather than always -using the device block size. This should improve I/O performance. +to use ``max(min(io.DEFAULT_BUFFER_SIZE, 8 MiB), device block size)`` rather +than always using the device block size. This should improve I/O performance. Patch by Romain Morotti. From f7ecbefff6e667171047da6f87f5fb17beffeac0 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Tue, 18 Feb 2025 18:01:22 +0000 Subject: [PATCH 15/17] move to after error --- Modules/_io/_iomodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index cd8a995ff90843..015e9e36cada43 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -367,10 +367,10 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, if (blksize_obj == NULL) goto error; buffering = PyLong_AsLong(blksize_obj); - buffering = Py_MAX(Py_MIN(buffering, 8192 * 1024), DEFAULT_BUFFER_SIZE); Py_DECREF(blksize_obj); if (buffering == -1 && PyErr_Occurred()) goto error; + buffering = Py_MAX(Py_MIN(buffering, 8192 * 1024), DEFAULT_BUFFER_SIZE); } if (buffering < 0) { PyErr_SetString(PyExc_ValueError, From 54bc5ee957b27fd1176054414a0505c4a86cfdcf Mon Sep 17 00:00:00 2001 From: rmorotti Date: Tue, 18 Feb 2025 18:03:11 +0000 Subject: [PATCH 16/17] update news message to match code --- .../next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst b/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst index aad7e1193e8548..6b13debcdccb48 100644 --- a/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst +++ b/Misc/NEWS.d/next/Library/2024-04-30-14-03-09.gh-issue-117151.yt2H8c.rst @@ -1,5 +1,5 @@ Increase ``io.DEFAULT_BUFFER_SIZE`` from 8k to 128k and adjust :func:`open` on platforms where :meth:`os.fstat` provides a ``st_blksize`` field (such as Linux) -to use ``max(min(io.DEFAULT_BUFFER_SIZE, 8 MiB), device block size)`` rather +to use ``max(min(blocksize, 8 MiB), io.DEFAULT_BUFFER_SIZE)`` rather than always using the device block size. This should improve I/O performance. Patch by Romain Morotti. From f9258be5a25cbd596937ac9b999c2a357f17e209 Mon Sep 17 00:00:00 2001 From: rmorotti Date: Fri, 7 Mar 2025 15:40:55 +0000 Subject: [PATCH 17/17] format code block with double backticks --- Doc/library/functions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 55bd856d424bc3..7e367a0f2b6b25 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1406,7 +1406,7 @@ are always available. They are listed here in alphabetical order. given, the default buffering policy works as follows: * Binary files are buffered in fixed-size chunks; the size of the buffer - is max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) + is ``max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE)`` when the device block size is available. On most systems, the buffer will typically be 128 kilobytes long.