From 72ab0f5747bce1376c701956ec20d22306bef4a7 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Mon, 20 Jun 2022 22:42:05 -0400 Subject: [PATCH 1/7] Harden overflow checks before _PyBytes_Resize in compile.c --- Python/compile.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 87d9037ea2891b..d62cd2f6e62213 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -7500,16 +7500,34 @@ assemble_emit_exception_table_item(struct assembler *a, int value, int msb) write_except_byte(a, (value&0x3f) | msb); } +/* Make room for at least logical_length+to_add bytes in the bytes object. + Use exponential growth for O(1) amortized runtime. */ +static int +bytes_make_room(PyObject **bytes, Py_ssize_t logical_length, + Py_ssize_t to_add) +{ + Py_ssize_t b_len = PyBytes_GET_SIZE(*bytes); + if (logical_length >= b_len - to_add) { + if (b_len > PY_SSIZE_T_MAX / 2) { + PyErr_NoMemory(); + return 0; + } + if (_PyBytes_Resize(bytes, b_len * 2) < 0) { + return 0; + } + } + return 1; +} + /* See Objects/exception_handling_notes.txt for details of layout */ #define MAX_SIZE_OF_ENTRY 20 static int assemble_emit_exception_table_entry(struct assembler *a, int start, int end, basicblock *handler) { - Py_ssize_t len = PyBytes_GET_SIZE(a->a_except_table); - if (a->a_except_table_off + MAX_SIZE_OF_ENTRY >= len) { - if (_PyBytes_Resize(&a->a_except_table, len * 2) < 0) - return 0; + if (!bytes_make_room(&a->a_except_table, a->a_except_table_off, + MAX_SIZE_OF_ENTRY)) { + return 0; } int size = end-start; assert(end > start); @@ -7650,12 +7668,9 @@ write_location_info_no_column(struct assembler* a, int length, int line_delta) static int write_location_info_entry(struct assembler* a, struct instr* i, int isize) { - Py_ssize_t len = PyBytes_GET_SIZE(a->a_linetable); - if (a->a_location_off + THEORETICAL_MAX_ENTRY_SIZE >= len) { - assert(len > THEORETICAL_MAX_ENTRY_SIZE); - if (_PyBytes_Resize(&a->a_linetable, len*2) < 0) { - return 0; - } + if (!bytes_make_room(&a->a_linetable, a->a_location_off, + THEORETICAL_MAX_ENTRY_SIZE)) { + return 0; } if (i->i_loc.lineno < 0) { write_location_info_none(a, isize); @@ -7712,13 +7727,11 @@ assemble_emit(struct assembler *a, struct instr *i) { Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode); _Py_CODEUNIT *code; - int size = instr_size(i); - if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) { - if (len > PY_SSIZE_T_MAX / 2) - return 0; - if (_PyBytes_Resize(&a->a_bytecode, len * 2) < 0) - return 0; + assert(a->a_offset <= PY_SSIZE_T_MAX / 2); + if (!bytes_make_room(&a->a_bytecode, a->a_offset * (Py_ssize_t)2, + sizeof(_Py_CODEUNIT) * (Py_ssize_t)size)) { + return 0; } code = (_Py_CODEUNIT *)PyBytes_AS_STRING(a->a_bytecode) + a->a_offset; a->a_offset += size; From fa9e69dc83859c0353de8dd225bac57ea8460f28 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 21 Jun 2022 02:52:41 +0000 Subject: [PATCH 2/7] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2022-06-21-02-52-39.gh-issue-93964.Cg1LE7.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-06-21-02-52-39.gh-issue-93964.Cg1LE7.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-06-21-02-52-39.gh-issue-93964.Cg1LE7.rst b/Misc/NEWS.d/next/Core and Builtins/2022-06-21-02-52-39.gh-issue-93964.Cg1LE7.rst new file mode 100644 index 00000000000000..b184a7b2f7116c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-06-21-02-52-39.gh-issue-93964.Cg1LE7.rst @@ -0,0 +1 @@ +Strengthened compiler overflow checks to prevent crashes when compiling very large source files. From e0054bfc2f2e604356a206e9fe8b34891233451d Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Tue, 21 Jun 2022 00:44:50 -0400 Subject: [PATCH 3/7] Another test, more comments --- Python/compile.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index d62cd2f6e62213..ddbbed8f7fdc78 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -7500,14 +7500,22 @@ assemble_emit_exception_table_item(struct assembler *a, int value, int msb) write_except_byte(a, (value&0x3f) | msb); } -/* Make room for at least logical_length+to_add bytes in the bytes object. - Use exponential growth for O(1) amortized runtime. */ +/* Make room for at least (logical_length+to_add)*unitsize in the + bytes object. Use exponential growth for O(1) amortized runtime. */ static int -bytes_make_room(PyObject **bytes, Py_ssize_t logical_length, - Py_ssize_t to_add) +bytes_make_room(PyObject **bytes, int logical_length, + int to_add, Py_ssize_t unitsize) { + // Make sure we can successfully do the addition. + if (logical_length > INT_MAX - to_add) { + PyErr_NoMemory(); + return 0; + } + // The existing logical buffer should always fit in a Py_ssize_t + assert(logical_length <= PY_SSIZE_T_MAX / unitsize); Py_ssize_t b_len = PyBytes_GET_SIZE(*bytes); - if (logical_length >= b_len - to_add) { + if (unitsize * logical_length >= b_len - to_add * unitsize) { + // There's not enough room. Double it. if (b_len > PY_SSIZE_T_MAX / 2) { PyErr_NoMemory(); return 0; @@ -7526,7 +7534,7 @@ static int assemble_emit_exception_table_entry(struct assembler *a, int start, int end, basicblock *handler) { if (!bytes_make_room(&a->a_except_table, a->a_except_table_off, - MAX_SIZE_OF_ENTRY)) { + MAX_SIZE_OF_ENTRY, 1)) { return 0; } int size = end-start; @@ -7669,7 +7677,7 @@ static int write_location_info_entry(struct assembler* a, struct instr* i, int isize) { if (!bytes_make_room(&a->a_linetable, a->a_location_off, - THEORETICAL_MAX_ENTRY_SIZE)) { + THEORETICAL_MAX_ENTRY_SIZE, 1)) { return 0; } if (i->i_loc.lineno < 0) { @@ -7728,9 +7736,8 @@ assemble_emit(struct assembler *a, struct instr *i) Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode); _Py_CODEUNIT *code; int size = instr_size(i); - assert(a->a_offset <= PY_SSIZE_T_MAX / 2); - if (!bytes_make_room(&a->a_bytecode, a->a_offset * (Py_ssize_t)2, - sizeof(_Py_CODEUNIT) * (Py_ssize_t)size)) { + if (!bytes_make_room(&a->a_bytecode, a->a_offset, + size, sizeof(_Py_CODEUNIT))) { return 0; } code = (_Py_CODEUNIT *)PyBytes_AS_STRING(a->a_bytecode) + a->a_offset; From e05b51e21631f2dd8ee476f7b0d58dd6229fb0b7 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Tue, 21 Jun 2022 01:27:22 -0400 Subject: [PATCH 4/7] remove unused variable --- Python/compile.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/compile.c b/Python/compile.c index ddbbed8f7fdc78..a30abf66041e01 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -7733,7 +7733,6 @@ assemble_emit_location(struct assembler* a, struct instr* i) static int assemble_emit(struct assembler *a, struct instr *i) { - Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode); _Py_CODEUNIT *code; int size = instr_size(i); if (!bytes_make_room(&a->a_bytecode, a->a_offset, From 75a08cd7239dcd84de3aaac790001571b227454a Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Tue, 26 Jul 2022 01:28:50 -0400 Subject: [PATCH 5/7] simplify checks by using b_len/unitsize. use INT_MAX.. --- Python/compile.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 22f9486dff16d7..2ae2281ebb3df3 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -7541,29 +7541,27 @@ assemble_emit_exception_table_item(struct assembler *a, int value, int msb) write_except_byte(a, (value&0x3f) | msb); } -/* Make room for at least (logical_length+to_add)*unitsize in the - bytes object. Use exponential growth for O(1) amortized runtime. */ +/* Make room for at least (logical_length+to_add)*unitsize bytes in the + bytes object. Use exponential growth for O(1) amortized runtime. + Keep len(bytes)/unitsize <= INT_MAX. */ static int -bytes_make_room(PyObject **bytes, int logical_length, - int to_add, Py_ssize_t unitsize) +bytes_make_room(PyObject **bytes, Py_ssize_t unitsize, + int logical_length, int to_add, const char *overflow_msg) { - // Make sure we can successfully do the addition. - if (logical_length > INT_MAX - to_add) { - PyErr_NoMemory(); - return 0; - } - // The existing logical buffer should always fit in a Py_ssize_t - assert(logical_length <= PY_SSIZE_T_MAX / unitsize); Py_ssize_t b_len = PyBytes_GET_SIZE(*bytes); - if (unitsize * logical_length >= b_len - to_add * unitsize) { + Py_ssize_t space = b_len / unitsize; + assert(space <= INT_MAX); + assert(logical_length <= space); + if (logical_length >= space - to_add) { // There's not enough room. Double it. - if (b_len > PY_SSIZE_T_MAX / 2) { - PyErr_NoMemory(); + if (space > INT_MAX / 2 || b_len > PY_SSIZE_T_MAX / 2) { + PyErr_SetString(PyExc_OverflowError, overflow_msg); return 0; } - if (_PyBytes_Resize(bytes, b_len * 2) < 0) { + if (_PyBytes_Resize(bytes, PyBytes_GET_SIZE(*bytes) * 2) < 0) { return 0; } + assert(PyBytes_GET_SIZE(*bytes) / unitsize <= INT_MAX); } return 1; } @@ -7574,8 +7572,9 @@ bytes_make_room(PyObject **bytes, int logical_length, static int assemble_emit_exception_table_entry(struct assembler *a, int start, int end, basicblock *handler) { - if (!bytes_make_room(&a->a_except_table, a->a_except_table_off, - MAX_SIZE_OF_ENTRY, 1)) { + if (!bytes_make_room(&a->a_except_table, sizeof(char), + a->a_except_table_off, MAX_SIZE_OF_ENTRY, + "exception table is too long")) { return 0; } int size = end-start; @@ -7717,8 +7716,9 @@ write_location_info_no_column(struct assembler* a, int length, int line_delta) static int write_location_info_entry(struct assembler* a, struct instr* i, int isize) { - if (!bytes_make_room(&a->a_linetable, a->a_location_off, - THEORETICAL_MAX_ENTRY_SIZE, 1)) { + if (!bytes_make_room(&a->a_linetable, sizeof(char), + a->a_location_off, THEORETICAL_MAX_ENTRY_SIZE, + "line number table is too long")) { return 0; } if (i->i_loc.lineno < 0) { @@ -7776,8 +7776,9 @@ assemble_emit(struct assembler *a, struct instr *i) { _Py_CODEUNIT *code; int size = instr_size(i); - if (!bytes_make_room(&a->a_bytecode, a->a_offset, - size, sizeof(_Py_CODEUNIT))) { + assert(a->a_offset <= INT_MAX / sizeof(_Py_CODEUNIT)); + if (!bytes_make_room(&a->a_bytecode, sizeof(_Py_CODEUNIT), + a->a_offset, size, "bytecode is too long")) { return 0; } code = (_Py_CODEUNIT *)PyBytes_AS_STRING(a->a_bytecode) + a->a_offset; From 7febdc85c35ac6c2dcffde386b21688fc8377feb Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Tue, 26 Jul 2022 01:40:24 -0400 Subject: [PATCH 6/7] re-use b_len --- Python/compile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/compile.c b/Python/compile.c index 2ae2281ebb3df3..3cf11b637508ab 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -7558,7 +7558,7 @@ bytes_make_room(PyObject **bytes, Py_ssize_t unitsize, PyErr_SetString(PyExc_OverflowError, overflow_msg); return 0; } - if (_PyBytes_Resize(bytes, PyBytes_GET_SIZE(*bytes) * 2) < 0) { + if (_PyBytes_Resize(bytes, b_len * 2) < 0) { return 0; } assert(PyBytes_GET_SIZE(*bytes) / unitsize <= INT_MAX); From 4a708ebde52a4e1e21ab785f36cfb1450ab5c453 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Wed, 27 Jul 2022 12:25:54 -0400 Subject: [PATCH 7/7] remove misleading assertion --- Python/compile.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/compile.c b/Python/compile.c index 3cf11b637508ab..88eadd95c7ada4 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -7776,7 +7776,6 @@ assemble_emit(struct assembler *a, struct instr *i) { _Py_CODEUNIT *code; int size = instr_size(i); - assert(a->a_offset <= INT_MAX / sizeof(_Py_CODEUNIT)); if (!bytes_make_room(&a->a_bytecode, sizeof(_Py_CODEUNIT), a->a_offset, size, "bytecode is too long")) { return 0;