diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index 13f86b01bbfe8f..21cb959a9c2e8c 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -329,6 +329,11 @@ extern int _Py_GetTicksPerSecond(long *ticks_per_second); // Export for '_testcapi' shared extension PyAPI_FUNC(int) _Py_IsValidFD(int fd); +#ifdef MS_WINDOWS +size_t _Py_LimitConsoleWriteSize(const void *buf, size_t requested_size, + size_t cap_size); +#endif + #ifdef __cplusplus } #endif diff --git a/Misc/NEWS.d/next/Windows/2024-07-17-15-07-24.gh-issue-121940.M70eyc.rst b/Misc/NEWS.d/next/Windows/2024-07-17-15-07-24.gh-issue-121940.M70eyc.rst new file mode 100644 index 00000000000000..e33d6c33903b51 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-07-17-15-07-24.gh-issue-121940.M70eyc.rst @@ -0,0 +1,3 @@ +Default buffer size :func:`os.write` on Windows no longer or splits the write. +Writing to the Windows console is still split to maintain responsiveness of +interrupts, but at a much larger size. diff --git a/Modules/_io/winconsoleio.c b/Modules/_io/winconsoleio.c index ec5c298066a587..41be27a0e3dfca 100644 --- a/Modules/_io/winconsoleio.c +++ b/Modules/_io/winconsoleio.c @@ -48,6 +48,10 @@ of less than one character */ #define SMALLBUF 4 +/* Limit write size to consoles so that interrupts feel + responsive. */ +#define WRITE_LIMIT_CONSOLE (1024 * 1024) + char _get_console_type(HANDLE handle) { DWORD mode, peek_count; @@ -134,24 +138,6 @@ char _PyIO_get_console_type(PyObject *path_or_fd) { return m; } -static DWORD -_find_last_utf8_boundary(const char *buf, DWORD len) -{ - /* This function never returns 0, returns the original len instead */ - DWORD count = 1; - if (len == 0 || (buf[len - 1] & 0x80) == 0) { - return len; - } - for (;; count++) { - if (count > 3 || count >= len) { - return len; - } - if ((buf[len - count] & 0xc0) != 0x80) { - return len - count; - } - } -} - /*[clinic input] module _io class _io._WindowsConsoleIO "winconsoleio *" "clinic_state()->PyWindowsConsoleIO_Type" @@ -1016,25 +1002,23 @@ _io__WindowsConsoleIO_write_impl(winconsoleio *self, PyTypeObject *cls, if (!b->len) { return PyLong_FromLong(0); } - if (b->len > BUFMAX) - len = BUFMAX; + /* Ensure len fits in a DWORD. This cap is larger than the write + limit because it doesn't respect utf-8 characters boundaries. + Rely on _Py_LimitConsoleWriteSize to do a character split. */ + if (b->len > WRITE_LIMIT_CONSOLE * 2) + len = WRITE_LIMIT_CONSOLE * 2; else len = (DWORD)b->len; + + /* Limit console write size to keep interactivity. + + This is a soft cap / wlen may be higher, but that is + okay because it isn't a hard OS limit in Windows 8+. */ + len = (DWORD)_Py_LimitConsoleWriteSize(b->buf, len, WRITE_LIMIT_CONSOLE); + Py_BEGIN_ALLOW_THREADS wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, NULL, 0); - - /* issue11395 there is an unspecified upper bound on how many bytes - can be written at once. We cap at 32k - the caller will have to - handle partial writes. - Since we don't know how many input bytes are being ignored, we - have to reduce and recalculate. */ - while (wlen > 32766 / sizeof(wchar_t)) { - len /= 2; - /* Fix for github issues gh-110913 and gh-82052. */ - len = _find_last_utf8_boundary(b->buf, len); - wlen = MultiByteToWideChar(CP_UTF8, 0, b->buf, len, NULL, 0); - } Py_END_ALLOW_THREADS if (!wlen) diff --git a/Python/fileutils.c b/Python/fileutils.c index c9ae1b3f54e167..efc2e08cce46d5 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -52,6 +52,17 @@ int _Py_open_cloexec_works = -1; // The value must be the same in unicodeobject.c. #define MAX_UNICODE 0x10ffff +/* Limit write size on terminals in Windows to keep the interpreter + feeling responsive. + + This is higher than WRITE_LIMIT_CONSOLE because `.write()` + is targeted at non-console I/O (but may happen to touch a tty). Use + WinConsoleIO for best console interactivity. + + This should ideally be bigger than DEFAULT_BUFFER_SIZE so common + case write to file on disk is quick. */ +#define WRITE_LIMIT_INTERACTIVE (5 * 1024 * 1024) + // mbstowcs() and mbrtowc() errors static const size_t DECODE_ERROR = ((size_t)-1); static const size_t INCOMPLETE_CHARACTER = (size_t)-2; @@ -1923,20 +1934,18 @@ _Py_write_impl(int fd, const void *buf, size_t count, int gil_held) _Py_BEGIN_SUPPRESS_IPH #ifdef MS_WINDOWS - if (count > 32767) { - /* Issue #11395: the Windows console returns an error (12: not - enough space error) on writing into stdout if stdout mode is - binary and the length is greater than 66,000 bytes (or less, - depending on heap usage). */ + /* isatty is guarded because don't want it in common case of + writing DEFAULT_BUFFER_SIZE to regular files (gh-121940). */ + if (count > WRITE_LIMIT_INTERACTIVE) { if (gil_held) { Py_BEGIN_ALLOW_THREADS if (isatty(fd)) { - count = 32767; + count = _Py_LimitConsoleWriteSize(buf, count, WRITE_LIMIT_INTERACTIVE); } Py_END_ALLOW_THREADS } else { if (isatty(fd)) { - count = 32767; + count = _Py_LimitConsoleWriteSize(buf, count, WRITE_LIMIT_INTERACTIVE); } } } @@ -3101,3 +3110,52 @@ _Py_IsValidFD(int fd) return (fstat(fd, &st) == 0); #endif } + +#ifdef MS_WINDOWS +static size_t +_find_last_utf8_boundary(const char *buf, size_t len) +{ + /* This function never returns 0, returns the original len instead */ + DWORD count = 1; + if (len == 0 || (buf[len - 1] & 0x80) == 0) { + return len; + } + for (;; count++) { + if (count > 3 || count >= len) { + return len; + } + if ((buf[len - count] & 0xc0) != 0x80) { + return len - count; + } + } +} + +/* Put a soft limit on the number of bytes to be written. + + In older versions of Windows a hard limit was necessary because + there was a hard limit to the number of bytes (bpo-11395), but that + is not the case in Windows 8+. + + For Windows 8+ the console host synchronizes I/O operations which + means a Ctrl-C doesn't generate an interrupt until after the write + is completed. That means large writes which take multiple seconds + will reduce responsiveness to interrupts. + + This does a "soft cap" (not exact number of utf-16 bytes, but close + enough) to maintain responsiveness of consoles on + Windows (gh-121940). */ +size_t _Py_LimitConsoleWriteSize(const void *buf, size_t requested_size, + size_t cap_size) { + if (requested_size <= cap_size) { + return requested_size; + } + + /* Fix for github issues gh-110913 and gh-82052. + + Splitting utf-8 can't be done at arbitrary byte boundaries + because that results in broken utf-8 byte sequences being + presented to the user. */ + return _find_last_utf8_boundary(buf, cap_size); +} + +#endif