From af1977756c4193e62b6f97d03c83d78958a3aa92 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 23 Aug 2021 18:34:17 -0700 Subject: [PATCH 1/7] Serialize frozenset elements deterministically --- Python/marshal.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/Python/marshal.c b/Python/marshal.c index 1260704c74c0be..b1e9a2f03d9fbc 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -503,9 +503,42 @@ w_complex_object(PyObject *v, char flag, WFILE *p) W_TYPE(TYPE_SET, p); n = PySet_GET_SIZE(v); W_SIZE(n, p); + // bpo-37596: We need to write the elements in a deterministic order! + // Since we know that they all need marshal support anyways, this can + // be conveniently accomplished by using their marshal serializations + // as sort keys: + PyObject *pairs = PyList_New(0); + if (pairs == NULL) { + p->error = WFERR_NOMEMORY; + return; + } + PyObject *pair = NULL; while (_PySet_NextEntry(v, &pos, &value, &hash)) { + PyObject *dump = PyMarshal_WriteObjectToString(value, p->version); + if (dump == NULL) { + p->error = WFERR_UNMARSHALLABLE; + goto anyset_done; + } + pair = PyTuple_Pack(2, dump, value); + Py_DECREF(dump); + if (pair == NULL || PyList_Append(pairs, pair)) { + p->error = WFERR_NOMEMORY; + goto anyset_done; + } + Py_CLEAR(pair); + } + if (PyList_Sort(pairs)) { + p->error = WFERR_UNMARSHALLABLE; + goto anyset_done; + } + for (Py_ssize_t i = 0; i < n; i++) { + PyObject *pair = PyList_GET_ITEM(pairs, i); + PyObject *value = PyTuple_GET_ITEM(pair, 1); w_object(value, p); } + anyset_done: + Py_XDECREF(pairs); + Py_XDECREF(pair); } else if (PyCode_Check(v)) { PyCodeObject *co = (PyCodeObject *)v; From ae386193e1be41856daebaf0a78f1b1cb16387d7 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 23 Aug 2021 21:26:29 -0700 Subject: [PATCH 2/7] Add regression test --- Lib/test/test_marshal.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py index d20b9d2c1ff391..f19155ce8a8fcc 100644 --- a/Lib/test/test_marshal.py +++ b/Lib/test/test_marshal.py @@ -344,6 +344,20 @@ def test_eof(self): for i in range(len(data)): self.assertRaises(EOFError, marshal.loads, data[0: i]) + def test_deterministic_sets(self): + # bpo-37596: sets and frozensets must always marshal deterministically! + # Test this by seeding string hashes: + elements = "(float('nan'), b'a', b'b', b'c', 'x', 'y', 'z')" + for kind in ("set", "frozenset"): + script = f"import marshal; print(marshal.dumps({kind}({elements})))" + with self.subTest(kind): + # {nan, b"b", "x", b'a', b'c', 'y', 'z'} + _, a, _ = assert_python_ok("-c", script, PYTHONHASHSEED="0") + # {nan, 'x', b'a', 'z', b'b', 'y', b'c'} + _, b, _ = assert_python_ok("-c", script, PYTHONHASHSEED="1") + self.assertEqual(a, b) + + LARGE_SIZE = 2**31 pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4 From 4be57ef1d530fec57d097ffcc4d65d062b68e59c Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 23 Aug 2021 21:40:12 -0700 Subject: [PATCH 3/7] blurb add --- .../next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst diff --git a/Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst b/Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst new file mode 100644 index 00000000000000..092732abb76b37 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst @@ -0,0 +1,2 @@ +Ensure that :class:`set` and :class:`frozenset` objects are always +:mod:`marshalled ` deterministically. From 1aff762a31e97f3a892108dd03bad9a25689c594 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 23 Aug 2021 21:49:26 -0700 Subject: [PATCH 4/7] Fix comment --- Lib/test/test_marshal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py index f19155ce8a8fcc..6d70f269d34c62 100644 --- a/Lib/test/test_marshal.py +++ b/Lib/test/test_marshal.py @@ -351,7 +351,7 @@ def test_deterministic_sets(self): for kind in ("set", "frozenset"): script = f"import marshal; print(marshal.dumps({kind}({elements})))" with self.subTest(kind): - # {nan, b"b", "x", b'a', b'c', 'y', 'z'} + # {nan, b'b', 'x', b'a', b'c', 'y', 'z'} _, a, _ = assert_python_ok("-c", script, PYTHONHASHSEED="0") # {nan, 'x', b'a', 'z', b'b', 'y', b'c'} _, b, _ = assert_python_ok("-c", script, PYTHONHASHSEED="1") From b71204ed582d1df5142b125699f363c6fe889870 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 23 Aug 2021 21:58:26 -0700 Subject: [PATCH 5/7] Cleanup --- Python/marshal.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index b1e9a2f03d9fbc..7ab45b6e920429 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -532,9 +532,10 @@ w_complex_object(PyObject *v, char flag, WFILE *p) goto anyset_done; } for (Py_ssize_t i = 0; i < n; i++) { - PyObject *pair = PyList_GET_ITEM(pairs, i); - PyObject *value = PyTuple_GET_ITEM(pair, 1); + pair = PyList_GET_ITEM(pairs, i); + value = PyTuple_GET_ITEM(pair, 1); w_object(value, p); + pair = NULL; } anyset_done: Py_XDECREF(pairs); From 597be1cd19a28556e43cc33bb8f8a42009ee0865 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 23 Aug 2021 22:09:03 -0700 Subject: [PATCH 6/7] More cleanup --- Python/marshal.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Python/marshal.c b/Python/marshal.c index 7ab45b6e920429..23cc9fa2910d75 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -512,34 +512,32 @@ w_complex_object(PyObject *v, char flag, WFILE *p) p->error = WFERR_NOMEMORY; return; } - PyObject *pair = NULL; while (_PySet_NextEntry(v, &pos, &value, &hash)) { PyObject *dump = PyMarshal_WriteObjectToString(value, p->version); if (dump == NULL) { p->error = WFERR_UNMARSHALLABLE; goto anyset_done; } - pair = PyTuple_Pack(2, dump, value); + PyObject *pair = PyTuple_Pack(2, dump, value); Py_DECREF(dump); - if (pair == NULL || PyList_Append(pairs, pair)) { + int error = pair == NULL || PyList_Append(pairs, pair); + Py_XDECREF(pair); + if (error) { p->error = WFERR_NOMEMORY; goto anyset_done; } - Py_CLEAR(pair); } if (PyList_Sort(pairs)) { p->error = WFERR_UNMARSHALLABLE; goto anyset_done; } for (Py_ssize_t i = 0; i < n; i++) { - pair = PyList_GET_ITEM(pairs, i); + PyObject *pair = PyList_GET_ITEM(pairs, i); value = PyTuple_GET_ITEM(pair, 1); w_object(value, p); - pair = NULL; } anyset_done: - Py_XDECREF(pairs); - Py_XDECREF(pair); + Py_DECREF(pairs); } else if (PyCode_Check(v)) { PyCodeObject *co = (PyCodeObject *)v; From 23723cbf5d03ae4fe73e3518f9336abfa307ae62 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 24 Aug 2021 11:12:27 -0700 Subject: [PATCH 7/7] Address feedback from PR review --- Lib/test/test_marshal.py | 33 ++++++++++++------- .../2021-08-23-21-39-59.bpo-37596.ojRcwB.rst | 2 +- Python/marshal.c | 16 ++++----- 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py index 6d70f269d34c62..bdfe79fbbecb35 100644 --- a/Lib/test/test_marshal.py +++ b/Lib/test/test_marshal.py @@ -345,18 +345,29 @@ def test_eof(self): self.assertRaises(EOFError, marshal.loads, data[0: i]) def test_deterministic_sets(self): - # bpo-37596: sets and frozensets must always marshal deterministically! - # Test this by seeding string hashes: - elements = "(float('nan'), b'a', b'b', b'c', 'x', 'y', 'z')" + # bpo-37596: To support reproducible builds, sets and frozensets need to + # have their elements serialized in a consistent order (even when they + # have been scrambled by hash randomization): for kind in ("set", "frozenset"): - script = f"import marshal; print(marshal.dumps({kind}({elements})))" - with self.subTest(kind): - # {nan, b'b', 'x', b'a', b'c', 'y', 'z'} - _, a, _ = assert_python_ok("-c", script, PYTHONHASHSEED="0") - # {nan, 'x', b'a', 'z', b'b', 'y', b'c'} - _, b, _ = assert_python_ok("-c", script, PYTHONHASHSEED="1") - self.assertEqual(a, b) - + for elements in ( + "float('nan'), b'a', b'b', b'c', 'x', 'y', 'z'", + # Also test for bad interactions with backreferencing: + "('string', 1), ('string', 2), ('string', 3)", + ): + s = f"{kind}([{elements}])" + with self.subTest(s): + # First, make sure that our test case still has different + # orders under hash seeds 0 and 1. If this check fails, we + # need to update this test with different elements: + args = ["-c", f"print({s})"] + _, repr_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0") + _, repr_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1") + self.assertNotEqual(repr_0, repr_1) + # Then, perform the actual test: + args = ["-c", f"import marshal; print(marshal.dumps({s}))"] + _, dump_0, _ = assert_python_ok(*args, PYTHONHASHSEED="0") + _, dump_1, _ = assert_python_ok(*args, PYTHONHASHSEED="1") + self.assertEqual(dump_0, dump_1) LARGE_SIZE = 2**31 pointer_size = 8 if sys.maxsize > 0xFFFFFFFF else 4 diff --git a/Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst b/Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst index 092732abb76b37..81fdfeb6294569 100644 --- a/Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst +++ b/Misc/NEWS.d/next/Library/2021-08-23-21-39-59.bpo-37596.ojRcwB.rst @@ -1,2 +1,2 @@ Ensure that :class:`set` and :class:`frozenset` objects are always -:mod:`marshalled ` deterministically. +:mod:`marshalled ` reproducibly. diff --git a/Python/marshal.c b/Python/marshal.c index 23cc9fa2910d75..b69c4d09641da8 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -503,10 +503,10 @@ w_complex_object(PyObject *v, char flag, WFILE *p) W_TYPE(TYPE_SET, p); n = PySet_GET_SIZE(v); W_SIZE(n, p); - // bpo-37596: We need to write the elements in a deterministic order! - // Since we know that they all need marshal support anyways, this can - // be conveniently accomplished by using their marshal serializations - // as sort keys: + // bpo-37596: To support reproducible builds, sets and frozensets need + // to have their elements serialized in a consistent order (even when + // they have been scrambled by hash randomization). To ensure this, we + // use an order equivalent to sorted(v, key=marshal.dumps): PyObject *pairs = PyList_New(0); if (pairs == NULL) { p->error = WFERR_NOMEMORY; @@ -520,15 +520,15 @@ w_complex_object(PyObject *v, char flag, WFILE *p) } PyObject *pair = PyTuple_Pack(2, dump, value); Py_DECREF(dump); - int error = pair == NULL || PyList_Append(pairs, pair); - Py_XDECREF(pair); - if (error) { + if (pair == NULL || PyList_Append(pairs, pair)) { p->error = WFERR_NOMEMORY; + Py_XDECREF(pair); goto anyset_done; } + Py_DECREF(pair); } if (PyList_Sort(pairs)) { - p->error = WFERR_UNMARSHALLABLE; + p->error = WFERR_NOMEMORY; goto anyset_done; } for (Py_ssize_t i = 0; i < n; i++) {