From 166b6fe69a0f94a2495b8410c45005f53165fafe Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 5 Oct 2024 16:42:49 +0300 Subject: [PATCH 1/2] gh-124531: Fix strftime() with embedded null characters * time.strftime() (raised ValueError) * the strftime() method and formatting of the datetime classes datetime, date and time (truncated at the null character) --- Lib/test/datetimetester.py | 20 +++++ Lib/test/test_time.py | 12 ++- ...-10-05-16-42-33.gh-issue-124531.1ZDOwp.rst | 4 + Modules/_datetimemodule.c | 22 ++---- Modules/timemodule.c | 79 +++++++++++-------- 5 files changed, 85 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-05-16-42-33.gh-issue-124531.1ZDOwp.rst diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index c81408b344968d..47a2df1290b6ef 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -2955,6 +2955,16 @@ def test_more_strftime(self): except UnicodeEncodeError: pass + def test_strftime_embedded_nul(self): + # gh-124531: The null character should not terminate the format string. + t = self.theclass(2004, 12, 31, 6, 22, 33, 47) + self.assertEqual(t.strftime('\0'), '\0') + self.assertEqual(t.strftime('\0'*1000), '\0'*1000) + s1 = t.strftime('%c') + s2 = t.strftime('%x') + self.assertEqual(t.strftime('\0%c\0%x'), f'\0{s1}\0{s2}') + self.assertEqual(t.strftime('\0%c\0%x\0'), f'\0{s1}\0{s2}\0') + def test_extract(self): dt = self.theclass(2002, 3, 4, 18, 45, 3, 1234) self.assertEqual(dt.date(), date(2002, 3, 4)) @@ -3736,6 +3746,16 @@ def test_strftime(self): # gh-85432: The parameter was named "fmt" in the pure-Python impl. t.strftime(format="%f") + def test_strftime_embedded_nul(self): + # gh-124531: The null character should not terminate the format string. + t = self.theclass(1, 2, 3, 4) + self.assertEqual(t.strftime('\0'), '\0') + self.assertEqual(t.strftime('\0'*1000), '\0'*1000) + s1 = t.strftime('%Z') + s2 = t.strftime('%X') + self.assertEqual(t.strftime('\0%Z\0%X'), f'\0{s1}\0{s2}') + self.assertEqual(t.strftime('\0%Z\0%X\0'), f'\0{s1}\0{s2}\0') + def test_format(self): t = self.theclass(1, 2, 3, 4) self.assertEqual(t.__format__(''), str(t)) diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index 530c317a852e77..caaa20443989e9 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -182,8 +182,16 @@ def test_strftime(self): self.fail('conversion specifier: %r failed.' % format) self.assertRaises(TypeError, time.strftime, b'%S', tt) - # embedded null character - self.assertRaises(ValueError, time.strftime, '%S\0', tt) + + def test_strftime_embedded_nul(self): + # gh-124531: The null character should not terminate the format string. + tt = time.gmtime(self.t) + self.assertEqual(time.strftime('\0', tt), '\0') + self.assertEqual(time.strftime('\0'*1000, tt), '\0'*1000) + s1 = time.strftime('%c', tt) + s2 = time.strftime('%x', tt) + self.assertEqual(time.strftime('\0%c\0%x', tt), f'\0{s1}\0{s2}') + self.assertEqual(time.strftime('\0%c\0%x\0', tt), f'\0{s1}\0{s2}\0') def _bounds_checking(self, func): # Make sure that strftime() checks the bounds of the various parts diff --git a/Misc/NEWS.d/next/Library/2024-10-05-16-42-33.gh-issue-124531.1ZDOwp.rst b/Misc/NEWS.d/next/Library/2024-10-05-16-42-33.gh-issue-124531.1ZDOwp.rst new file mode 100644 index 00000000000000..eb7d2fc7674b33 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-05-16-42-33.gh-issue-124531.1ZDOwp.rst @@ -0,0 +1,4 @@ +Fix :func:`time.strftime`, the :meth:`~datetime.datetime.strftime` method of +the :mod:`datetime` classes :class:`~datetime.datetime`, +:class:`~datetime.date` and :class:`~datetime.time` and formatting of these +classes with format strings containing embedded null characters. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 90527d2a3e0350..0138a90a9f981a 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -1837,6 +1837,7 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, PyObject *freplacement = NULL; /* py string, replacement for %f */ const char *pin; /* pointer to next char in input format */ + const char *pend; /* pointer past the end of input format */ Py_ssize_t flen; /* length of input format */ char ch; /* next char in input format */ @@ -1886,22 +1887,15 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, pnew = PyBytes_AsString(newfmt); usednew = 0; - while ((ch = *pin++) != '\0') { - if (ch != '%') { - ptoappend = pin - 1; - ntoappend = 1; - } - else if ((ch = *pin++) == '\0') { - /* Null byte follows %, copy only '%'. - * - * Back the pin up one char so that we catch the null check - * the next time through the loop.*/ - pin--; + pend = pin + flen; + while (pin != pend) { + ch = *pin++; + if (ch != '%' || pin == pend) { ptoappend = pin - 1; ntoappend = 1; } /* A % has been seen and ch is the character after it. */ - else if (ch == 'z') { + else if ((ch = *pin++) == 'z') { /* %z -> +HHMM */ if (zreplacement == NULL) { zreplacement = make_somezreplacement(object, "", tzinfoarg); @@ -2035,12 +2029,10 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, assert(usednew <= totalnew); } /* end while() */ - if (_PyBytes_Resize(&newfmt, usednew) < 0) - goto Done; { PyObject *format; - format = PyUnicode_FromString(PyBytes_AS_STRING(newfmt)); + format = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(newfmt), usednew); if (format != NULL) { result = PyObject_CallFunctionObjArgs(strftime, format, timetuple, NULL); diff --git a/Modules/timemodule.c b/Modules/timemodule.c index 9720c201a184a8..a44cb5cafaf328 100644 --- a/Modules/timemodule.c +++ b/Modules/timemodule.c @@ -787,16 +787,14 @@ time_strftime(PyObject *module, PyObject *args) PyObject *format; #endif PyObject *format_arg; + Py_ssize_t fmtsize; size_t fmtlen, buflen; time_char *outbuf = NULL; - size_t i; + size_t outsize, outpos; PyObject *ret = NULL; memset((void *) &buf, '\0', sizeof(buf)); - /* Will always expect a unicode string to be passed as format. - Given that there's no str type anymore in py3k this seems safe. - */ if (!PyArg_ParseTuple(args, "U|O:strftime", &format_arg, &tup)) return NULL; @@ -835,7 +833,7 @@ time_strftime(PyObject *module, PyObject *args) buf.tm_isdst = 1; #ifdef HAVE_WCSFTIME - format = PyUnicode_AsWideCharString(format_arg, NULL); + format = PyUnicode_AsWideCharString(format_arg, &fmtsize); if (format == NULL) return NULL; fmt = format; @@ -845,19 +843,20 @@ time_strftime(PyObject *module, PyObject *args) if (format == NULL) return NULL; fmt = PyBytes_AS_STRING(format); + fmtsize = PyBytes_GET_SIZE(format); #endif #if defined(MS_WINDOWS) && !defined(HAVE_WCSFTIME) /* check that the format string contains only valid directives */ - for (outbuf = strchr(fmt, '%'); - outbuf != NULL; - outbuf = strchr(outbuf+2, '%')) + for (const time_char *f = memchr(fmt, '%', fmtsize); + f != NULL; + f = memchr(f + 2, '%', fmtsize - (f + 2 - fmt))) { - if (outbuf[1] == '#') - ++outbuf; /* not documented by python, */ - if (outbuf[1] == '\0') + if (f[1] == '#') + ++f; /* not documented by python, */ + if (f + 1 >= fmt + fmtsize) break; - if ((outbuf[1] == 'y') && buf.tm_year < 0) { + if ((f[1] == 'y') && buf.tm_year < 0) { PyErr_SetString(PyExc_ValueError, "format %y requires year >= 1900 on Windows"); Py_DECREF(format); @@ -865,15 +864,15 @@ time_strftime(PyObject *module, PyObject *args) } } #elif (defined(_AIX) || (defined(__sun) && defined(__SVR4))) && defined(HAVE_WCSFTIME) - for (outbuf = wcschr(fmt, '%'); - outbuf != NULL; - outbuf = wcschr(outbuf+2, '%')) + for (const time_char *f = wmemchr(fmt, '%', fmtsize); + f != NULL; + f = wmemchr(f + 2, '%', fmtsize - (f + 2 - fmt))) { - if (outbuf[1] == L'\0') + if (f + 1 >= fmt + fmtsize) break; /* Issue #19634: On AIX, wcsftime("y", (1899, 1, 1, 0, 0, 0, 0, 0, 0)) returns "0/" instead of "99" */ - if (outbuf[1] == L'y' && buf.tm_year < 0) { + if (f[1] == L'y' && buf.tm_year < 0) { PyErr_SetString(PyExc_ValueError, "format %y requires year >= 1900 on AIX"); PyMem_Free(format); @@ -882,13 +881,14 @@ time_strftime(PyObject *module, PyObject *args) } #endif - fmtlen = time_strlen(fmt); - /* I hate these functions that presume you know how big the output * will be ahead of time... */ - for (i = 1024; ; i += i) { - outbuf = (time_char *)PyMem_Malloc(i*sizeof(time_char)); + outsize = fmtsize + 128; + outpos = 0; + fmtlen = time_strlen(fmt); + while (1) { + outbuf = (time_char *)PyMem_Realloc(outbuf, outsize*sizeof(time_char)); if (outbuf == NULL) { PyErr_NoMemory(); break; @@ -897,32 +897,41 @@ time_strftime(PyObject *module, PyObject *args) errno = 0; #endif _Py_BEGIN_SUPPRESS_IPH - buflen = format_time(outbuf, i, fmt, &buf); + buflen = format_time(outbuf + outpos, outsize - outpos, fmt, &buf); _Py_END_SUPPRESS_IPH #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__) /* VisualStudio .NET 2005 does this properly */ if (buflen == 0 && errno == EINVAL) { PyErr_SetString(PyExc_ValueError, "Invalid format string"); - PyMem_Free(outbuf); break; } #endif - if (buflen > 0 || i >= 256 * fmtlen) { - /* If the buffer is 256 times as long as the format, - it's probably not failing for lack of room! - More likely, the format yields an empty result, - e.g. an empty format, or %Z when the timezone - is unknown. */ + if (buflen == 0 && outsize - outpos < 256 * fmtlen) { + outsize += outsize; + continue; + } + /* If the buffer is 256 times as long as the format, + it's probably not failing for lack of room! + More likely, the format yields an empty result, + e.g. an empty format, or %Z when the timezone + is unknown. */ + outpos += buflen + 1; + if (fmtlen < (size_t)fmtsize) { + /* It was not terminating NUL, but an embedded NUL. + Skip the NUL and continue. */ + fmt += fmtlen + 1; + fmtsize -= fmtlen + 1; + fmtlen = time_strlen(fmt); + continue; + } #ifdef HAVE_WCSFTIME - ret = PyUnicode_FromWideChar(outbuf, buflen); + ret = PyUnicode_FromWideChar(outbuf, outpos - 1); #else - ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, "surrogateescape"); + ret = PyUnicode_DecodeLocaleAndSize(outbuf, outpos - 1, "surrogateescape"); #endif - PyMem_Free(outbuf); - break; - } - PyMem_Free(outbuf); + break; } + PyMem_Free(outbuf); #ifdef HAVE_WCSFTIME PyMem_Free(format); #else From 3871471cb98e28eefc7467ecac9bde6e01076486 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 5 Oct 2024 19:54:24 +0300 Subject: [PATCH 2/2] Do not call strftime() for empty format string. --- Modules/timemodule.c | 54 ++++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/Modules/timemodule.c b/Modules/timemodule.c index a44cb5cafaf328..e6b57f075288e2 100644 --- a/Modules/timemodule.c +++ b/Modules/timemodule.c @@ -893,41 +893,55 @@ time_strftime(PyObject *module, PyObject *args) PyErr_NoMemory(); break; } + if (fmtlen == 0) { + /* Empty format string or leading or trailing NUL, + or consequent NULs. + strftime() on macOS does not work well with empty format string. + */ + if (outpos == outsize) { + outsize += outsize; + continue; + } + outbuf[outpos] = 0; + } + else { #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__) - errno = 0; + errno = 0; #endif - _Py_BEGIN_SUPPRESS_IPH - buflen = format_time(outbuf + outpos, outsize - outpos, fmt, &buf); - _Py_END_SUPPRESS_IPH + _Py_BEGIN_SUPPRESS_IPH + buflen = format_time(outbuf + outpos, outsize - outpos, fmt, &buf); + _Py_END_SUPPRESS_IPH #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__) - /* VisualStudio .NET 2005 does this properly */ - if (buflen == 0 && errno == EINVAL) { - PyErr_SetString(PyExc_ValueError, "Invalid format string"); - break; - } + /* VisualStudio .NET 2005 does this properly */ + if (buflen == 0 && errno == EINVAL) { + PyErr_SetString(PyExc_ValueError, "Invalid format string"); + break; + } #endif - if (buflen == 0 && outsize - outpos < 256 * fmtlen) { - outsize += outsize; - continue; + if (buflen == 0 && outsize - outpos < 256 * fmtlen) { + outsize += outsize; + continue; + } + /* If the buffer is 256 times as long as the format, + it's probably not failing for lack of room! + More likely, the format yields an empty result, + e.g. an empty format, or %Z when the timezone + is unknown. */ + outpos += buflen; } - /* If the buffer is 256 times as long as the format, - it's probably not failing for lack of room! - More likely, the format yields an empty result, - e.g. an empty format, or %Z when the timezone - is unknown. */ - outpos += buflen + 1; if (fmtlen < (size_t)fmtsize) { /* It was not terminating NUL, but an embedded NUL. Skip the NUL and continue. */ + outpos++; fmt += fmtlen + 1; fmtsize -= fmtlen + 1; fmtlen = time_strlen(fmt); continue; } #ifdef HAVE_WCSFTIME - ret = PyUnicode_FromWideChar(outbuf, outpos - 1); + ret = PyUnicode_FromWideChar(outbuf, outpos); #else - ret = PyUnicode_DecodeLocaleAndSize(outbuf, outpos - 1, "surrogateescape"); + ret = PyUnicode_DecodeLocaleAndSize(outbuf, outpos, "surrogateescape"); #endif break; }