From 166b6fe69a0f94a2495b8410c45005f53165fafe Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sat, 5 Oct 2024 16:42:49 +0300
Subject: [PATCH 1/2] gh-124531: Fix strftime() with embedded null characters

* time.strftime() (raised ValueError)
* the strftime() method and formatting of the datetime classes
  datetime, date and time (truncated at the null character)
---
 Lib/test/datetimetester.py                    | 20 +++++
 Lib/test/test_time.py                         | 12 ++-
 ...-10-05-16-42-33.gh-issue-124531.1ZDOwp.rst |  4 +
 Modules/_datetimemodule.c                     | 22 ++----
 Modules/timemodule.c                          | 79 +++++++++++--------
 5 files changed, 85 insertions(+), 52 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2024-10-05-16-42-33.gh-issue-124531.1ZDOwp.rst

diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py
index c81408b344968d..47a2df1290b6ef 100644
--- a/Lib/test/datetimetester.py
+++ b/Lib/test/datetimetester.py
@@ -2955,6 +2955,16 @@ def test_more_strftime(self):
         except UnicodeEncodeError:
             pass
 
+    def test_strftime_embedded_nul(self):
+        # gh-124531: The null character should not terminate the format string.
+        t = self.theclass(2004, 12, 31, 6, 22, 33, 47)
+        self.assertEqual(t.strftime('\0'), '\0')
+        self.assertEqual(t.strftime('\0'*1000), '\0'*1000)
+        s1 = t.strftime('%c')
+        s2 = t.strftime('%x')
+        self.assertEqual(t.strftime('\0%c\0%x'), f'\0{s1}\0{s2}')
+        self.assertEqual(t.strftime('\0%c\0%x\0'), f'\0{s1}\0{s2}\0')
+
     def test_extract(self):
         dt = self.theclass(2002, 3, 4, 18, 45, 3, 1234)
         self.assertEqual(dt.date(), date(2002, 3, 4))
@@ -3736,6 +3746,16 @@ def test_strftime(self):
         # gh-85432: The parameter was named "fmt" in the pure-Python impl.
         t.strftime(format="%f")
 
+    def test_strftime_embedded_nul(self):
+        # gh-124531: The null character should not terminate the format string.
+        t = self.theclass(1, 2, 3, 4)
+        self.assertEqual(t.strftime('\0'), '\0')
+        self.assertEqual(t.strftime('\0'*1000), '\0'*1000)
+        s1 = t.strftime('%Z')
+        s2 = t.strftime('%X')
+        self.assertEqual(t.strftime('\0%Z\0%X'), f'\0{s1}\0{s2}')
+        self.assertEqual(t.strftime('\0%Z\0%X\0'), f'\0{s1}\0{s2}\0')
+
     def test_format(self):
         t = self.theclass(1, 2, 3, 4)
         self.assertEqual(t.__format__(''), str(t))
diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py
index 530c317a852e77..caaa20443989e9 100644
--- a/Lib/test/test_time.py
+++ b/Lib/test/test_time.py
@@ -182,8 +182,16 @@ def test_strftime(self):
                 self.fail('conversion specifier: %r failed.' % format)
 
         self.assertRaises(TypeError, time.strftime, b'%S', tt)
-        # embedded null character
-        self.assertRaises(ValueError, time.strftime, '%S\0', tt)
+
+    def test_strftime_embedded_nul(self):
+        # gh-124531: The null character should not terminate the format string.
+        tt = time.gmtime(self.t)
+        self.assertEqual(time.strftime('\0', tt), '\0')
+        self.assertEqual(time.strftime('\0'*1000, tt), '\0'*1000)
+        s1 = time.strftime('%c', tt)
+        s2 = time.strftime('%x', tt)
+        self.assertEqual(time.strftime('\0%c\0%x', tt), f'\0{s1}\0{s2}')
+        self.assertEqual(time.strftime('\0%c\0%x\0', tt), f'\0{s1}\0{s2}\0')
 
     def _bounds_checking(self, func):
         # Make sure that strftime() checks the bounds of the various parts
diff --git a/Misc/NEWS.d/next/Library/2024-10-05-16-42-33.gh-issue-124531.1ZDOwp.rst b/Misc/NEWS.d/next/Library/2024-10-05-16-42-33.gh-issue-124531.1ZDOwp.rst
new file mode 100644
index 00000000000000..eb7d2fc7674b33
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-10-05-16-42-33.gh-issue-124531.1ZDOwp.rst
@@ -0,0 +1,4 @@
+Fix :func:`time.strftime`, the :meth:`~datetime.datetime.strftime` method of
+the :mod:`datetime` classes :class:`~datetime.datetime`,
+:class:`~datetime.date` and :class:`~datetime.time` and formatting of these
+classes with format strings containing embedded null characters.
diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c
index 90527d2a3e0350..0138a90a9f981a 100644
--- a/Modules/_datetimemodule.c
+++ b/Modules/_datetimemodule.c
@@ -1837,6 +1837,7 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple,
     PyObject *freplacement = NULL;      /* py string, replacement for %f */
 
     const char *pin;            /* pointer to next char in input format */
+    const char *pend;           /* pointer past the end of input format */
     Py_ssize_t flen;            /* length of input format */
     char ch;                    /* next char in input format */
 
@@ -1886,22 +1887,15 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple,
     pnew = PyBytes_AsString(newfmt);
     usednew = 0;
 
-    while ((ch = *pin++) != '\0') {
-        if (ch != '%') {
-            ptoappend = pin - 1;
-            ntoappend = 1;
-        }
-        else if ((ch = *pin++) == '\0') {
-        /* Null byte follows %, copy only '%'.
-         *
-         * Back the pin up one char so that we catch the null check
-         * the next time through the loop.*/
-            pin--;
+    pend = pin + flen;
+    while (pin != pend) {
+        ch = *pin++;
+        if (ch != '%' || pin == pend) {
             ptoappend = pin - 1;
             ntoappend = 1;
         }
         /* A % has been seen and ch is the character after it. */
-        else if (ch == 'z') {
+        else if ((ch = *pin++) == 'z') {
             /* %z -> +HHMM */
             if (zreplacement == NULL) {
                 zreplacement = make_somezreplacement(object, "", tzinfoarg);
@@ -2035,12 +2029,10 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple,
         assert(usednew <= totalnew);
     }  /* end while() */
 
-    if (_PyBytes_Resize(&newfmt, usednew) < 0)
-        goto Done;
     {
         PyObject *format;
 
-        format = PyUnicode_FromString(PyBytes_AS_STRING(newfmt));
+        format = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(newfmt), usednew);
         if (format != NULL) {
             result = PyObject_CallFunctionObjArgs(strftime,
                                                    format, timetuple, NULL);
diff --git a/Modules/timemodule.c b/Modules/timemodule.c
index 9720c201a184a8..a44cb5cafaf328 100644
--- a/Modules/timemodule.c
+++ b/Modules/timemodule.c
@@ -787,16 +787,14 @@ time_strftime(PyObject *module, PyObject *args)
     PyObject *format;
 #endif
     PyObject *format_arg;
+    Py_ssize_t fmtsize;
     size_t fmtlen, buflen;
     time_char *outbuf = NULL;
-    size_t i;
+    size_t outsize, outpos;
     PyObject *ret = NULL;
 
     memset((void *) &buf, '\0', sizeof(buf));
 
-    /* Will always expect a unicode string to be passed as format.
-       Given that there's no str type anymore in py3k this seems safe.
-    */
     if (!PyArg_ParseTuple(args, "U|O:strftime", &format_arg, &tup))
         return NULL;
 
@@ -835,7 +833,7 @@ time_strftime(PyObject *module, PyObject *args)
         buf.tm_isdst = 1;
 
 #ifdef HAVE_WCSFTIME
-    format = PyUnicode_AsWideCharString(format_arg, NULL);
+    format = PyUnicode_AsWideCharString(format_arg, &fmtsize);
     if (format == NULL)
         return NULL;
     fmt = format;
@@ -845,19 +843,20 @@ time_strftime(PyObject *module, PyObject *args)
     if (format == NULL)
         return NULL;
     fmt = PyBytes_AS_STRING(format);
+    fmtsize = PyBytes_GET_SIZE(format);
 #endif
 
 #if defined(MS_WINDOWS) && !defined(HAVE_WCSFTIME)
     /* check that the format string contains only valid directives */
-    for (outbuf = strchr(fmt, '%');
-        outbuf != NULL;
-        outbuf = strchr(outbuf+2, '%'))
+    for (const time_char *f = memchr(fmt, '%', fmtsize);
+        f != NULL;
+        f = memchr(f + 2, '%', fmtsize - (f + 2 - fmt)))
     {
-        if (outbuf[1] == '#')
-            ++outbuf; /* not documented by python, */
-        if (outbuf[1] == '\0')
+        if (f[1] == '#')
+            ++f; /* not documented by python, */
+        if (f + 1 >= fmt + fmtsize)
             break;
-        if ((outbuf[1] == 'y') && buf.tm_year < 0) {
+        if ((f[1] == 'y') && buf.tm_year < 0) {
             PyErr_SetString(PyExc_ValueError,
                         "format %y requires year >= 1900 on Windows");
             Py_DECREF(format);
@@ -865,15 +864,15 @@ time_strftime(PyObject *module, PyObject *args)
         }
     }
 #elif (defined(_AIX) || (defined(__sun) && defined(__SVR4))) && defined(HAVE_WCSFTIME)
-    for (outbuf = wcschr(fmt, '%');
-        outbuf != NULL;
-        outbuf = wcschr(outbuf+2, '%'))
+    for (const time_char *f = wmemchr(fmt, '%', fmtsize);
+        f != NULL;
+        f = wmemchr(f + 2, '%', fmtsize - (f + 2 - fmt)))
     {
-        if (outbuf[1] == L'\0')
+        if (f + 1 >= fmt + fmtsize)
             break;
         /* Issue #19634: On AIX, wcsftime("y", (1899, 1, 1, 0, 0, 0, 0, 0, 0))
            returns "0/" instead of "99" */
-        if (outbuf[1] == L'y' && buf.tm_year < 0) {
+        if (f[1] == L'y' && buf.tm_year < 0) {
             PyErr_SetString(PyExc_ValueError,
                             "format %y requires year >= 1900 on AIX");
             PyMem_Free(format);
@@ -882,13 +881,14 @@ time_strftime(PyObject *module, PyObject *args)
     }
 #endif
 
-    fmtlen = time_strlen(fmt);
-
     /* I hate these functions that presume you know how big the output
      * will be ahead of time...
      */
-    for (i = 1024; ; i += i) {
-        outbuf = (time_char *)PyMem_Malloc(i*sizeof(time_char));
+    outsize = fmtsize + 128;
+    outpos = 0;
+    fmtlen = time_strlen(fmt);
+    while (1) {
+        outbuf = (time_char *)PyMem_Realloc(outbuf, outsize*sizeof(time_char));
         if (outbuf == NULL) {
             PyErr_NoMemory();
             break;
@@ -897,32 +897,41 @@ time_strftime(PyObject *module, PyObject *args)
         errno = 0;
 #endif
         _Py_BEGIN_SUPPRESS_IPH
-        buflen = format_time(outbuf, i, fmt, &buf);
+        buflen = format_time(outbuf + outpos, outsize - outpos, fmt, &buf);
         _Py_END_SUPPRESS_IPH
 #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
         /* VisualStudio .NET 2005 does this properly */
         if (buflen == 0 && errno == EINVAL) {
             PyErr_SetString(PyExc_ValueError, "Invalid format string");
-            PyMem_Free(outbuf);
             break;
         }
 #endif
-        if (buflen > 0 || i >= 256 * fmtlen) {
-            /* If the buffer is 256 times as long as the format,
-               it's probably not failing for lack of room!
-               More likely, the format yields an empty result,
-               e.g. an empty format, or %Z when the timezone
-               is unknown. */
+        if (buflen == 0 && outsize - outpos < 256 * fmtlen) {
+            outsize += outsize;
+            continue;
+        }
+        /* If the buffer is 256 times as long as the format,
+           it's probably not failing for lack of room!
+           More likely, the format yields an empty result,
+           e.g. an empty format, or %Z when the timezone
+           is unknown. */
+        outpos += buflen + 1;
+        if (fmtlen < (size_t)fmtsize) {
+            /* It was not terminating NUL, but an embedded NUL.
+               Skip the NUL and continue. */
+            fmt += fmtlen + 1;
+            fmtsize -= fmtlen + 1;
+            fmtlen = time_strlen(fmt);
+            continue;
+        }
 #ifdef HAVE_WCSFTIME
-            ret = PyUnicode_FromWideChar(outbuf, buflen);
+        ret = PyUnicode_FromWideChar(outbuf, outpos - 1);
 #else
-            ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, "surrogateescape");
+        ret = PyUnicode_DecodeLocaleAndSize(outbuf, outpos - 1, "surrogateescape");
 #endif
-            PyMem_Free(outbuf);
-            break;
-        }
-        PyMem_Free(outbuf);
+        break;
     }
+    PyMem_Free(outbuf);
 #ifdef HAVE_WCSFTIME
     PyMem_Free(format);
 #else

From 3871471cb98e28eefc7467ecac9bde6e01076486 Mon Sep 17 00:00:00 2001
From: Serhiy Storchaka <storchaka@gmail.com>
Date: Sat, 5 Oct 2024 19:54:24 +0300
Subject: [PATCH 2/2] Do not call strftime() for empty format string.

---
 Modules/timemodule.c | 54 ++++++++++++++++++++++++++++----------------
 1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/Modules/timemodule.c b/Modules/timemodule.c
index a44cb5cafaf328..e6b57f075288e2 100644
--- a/Modules/timemodule.c
+++ b/Modules/timemodule.c
@@ -893,41 +893,55 @@ time_strftime(PyObject *module, PyObject *args)
             PyErr_NoMemory();
             break;
         }
+        if (fmtlen == 0) {
+            /* Empty format string or leading or trailing NUL,
+               or consequent NULs.
+               strftime() on macOS does not work well with empty format string.
+             */
+            if (outpos == outsize) {
+                outsize += outsize;
+                continue;
+            }
+            outbuf[outpos] = 0;
+        }
+        else {
 #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
-        errno = 0;
+            errno = 0;
 #endif
-        _Py_BEGIN_SUPPRESS_IPH
-        buflen = format_time(outbuf + outpos, outsize - outpos, fmt, &buf);
-        _Py_END_SUPPRESS_IPH
+            _Py_BEGIN_SUPPRESS_IPH
+            buflen = format_time(outbuf + outpos, outsize - outpos, fmt, &buf);
+            _Py_END_SUPPRESS_IPH
 #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
-        /* VisualStudio .NET 2005 does this properly */
-        if (buflen == 0 && errno == EINVAL) {
-            PyErr_SetString(PyExc_ValueError, "Invalid format string");
-            break;
-        }
+            /* VisualStudio .NET 2005 does this properly */
+            if (buflen == 0 && errno == EINVAL) {
+                PyErr_SetString(PyExc_ValueError, "Invalid format string");
+                break;
+            }
 #endif
-        if (buflen == 0 && outsize - outpos < 256 * fmtlen) {
-            outsize += outsize;
-            continue;
+            if (buflen == 0 && outsize - outpos < 256 * fmtlen) {
+                outsize += outsize;
+                continue;
+            }
+            /* If the buffer is 256 times as long as the format,
+               it's probably not failing for lack of room!
+               More likely, the format yields an empty result,
+               e.g. an empty format, or %Z when the timezone
+               is unknown. */
+            outpos += buflen;
         }
-        /* If the buffer is 256 times as long as the format,
-           it's probably not failing for lack of room!
-           More likely, the format yields an empty result,
-           e.g. an empty format, or %Z when the timezone
-           is unknown. */
-        outpos += buflen + 1;
         if (fmtlen < (size_t)fmtsize) {
             /* It was not terminating NUL, but an embedded NUL.
                Skip the NUL and continue. */
+            outpos++;
             fmt += fmtlen + 1;
             fmtsize -= fmtlen + 1;
             fmtlen = time_strlen(fmt);
             continue;
         }
 #ifdef HAVE_WCSFTIME
-        ret = PyUnicode_FromWideChar(outbuf, outpos - 1);
+        ret = PyUnicode_FromWideChar(outbuf, outpos);
 #else
-        ret = PyUnicode_DecodeLocaleAndSize(outbuf, outpos - 1, "surrogateescape");
+        ret = PyUnicode_DecodeLocaleAndSize(outbuf, outpos, "surrogateescape");
 #endif
         break;
     }