diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 7f821810aea00c..ce0d9c791eeb4c 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -60,7 +60,7 @@ def test_effect_sizes(self): stack.pop(y) stack.pop(x) for out in outputs: - stack.push(Local.local(out)) + stack.push(Local.undefined(out)) self.assertEqual(stack.base_offset.to_c(), "-1 - oparg - oparg*2") self.assertEqual(stack.top_offset.to_c(), "1 - oparg - oparg*2 + oparg*4") @@ -281,6 +281,67 @@ def test_predictions(self): """ self.run_cases_test(input, output) + def test_sync_sp(self): + input = """ + inst(A, (arg -- res)) { + SYNC_SP(); + escaping_call(); + res = Py_None; + } + inst(B, (arg -- res)) { + res = Py_None; + SYNC_SP(); + escaping_call(); + } + """ + output = """ + TARGET(A) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(A); + _PyStackRef res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + escaping_call(); + res = Py_None; + stack_pointer[0] = res; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + + TARGET(B) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(B); + _PyStackRef res; + res = Py_None; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + escaping_call(); + stack_pointer[0] = res; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + """ + self.run_cases_test(input, output) + + + def test_pep7_condition(self): + input = """ + inst(OP, (arg1 -- out)) { + if (arg1) + out = 0; + else { + out = 1; + } + } + """ + output = "" + with self.assertRaises(SyntaxError): + self.run_cases_test(input, output) + def test_error_if_plain(self): input = """ inst(OP, (--)) { @@ -810,7 +871,7 @@ def test_deopt_and_exit(self): } """ output = "" - with self.assertRaises(Exception): + with self.assertRaises(SyntaxError): self.run_cases_test(input, output) def test_array_of_one(self): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index ad30fb3db0360a..6ec07f42ff28bf 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -972,7 +972,9 @@ dummy_func( int err = _Py_call_instrumentation_arg( tstate, PY_MONITORING_EVENT_PY_RETURN, frame, this_instr, PyStackRef_AsPyObjectBorrow(val)); - if (err) ERROR_NO_POP(); + if (err) { + ERROR_NO_POP(); + } } macro(INSTRUMENTED_RETURN_VALUE) = @@ -1168,7 +1170,9 @@ dummy_func( tstate, PY_MONITORING_EVENT_PY_YIELD, frame, this_instr, PyStackRef_AsPyObjectBorrow(val)); LOAD_SP(); - if (err) ERROR_NO_POP(); + if (err) { + ERROR_NO_POP(); + } if (frame->instr_ptr != this_instr) { next_instr = frame->instr_ptr; DISPATCH(); @@ -1276,10 +1280,12 @@ dummy_func( DECREF_INPUTS(); ERROR_IF(true, error); } - if (PyDict_CheckExact(ns)) + if (PyDict_CheckExact(ns)) { err = PyDict_SetItem(ns, name, PyStackRef_AsPyObjectBorrow(v)); - else + } + else { err = PyObject_SetItem(ns, name, PyStackRef_AsPyObjectBorrow(v)); + } DECREF_INPUTS(); ERROR_IF(err, error); } @@ -1993,6 +1999,8 @@ dummy_func( attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name); DECREF_INPUTS(); ERROR_IF(attr_o == NULL, error); + /* We need to define self_or_null on all paths */ + self_or_null = PyStackRef_NULL; } attr = PyStackRef_FromPyObjectSteal(attr_o); } @@ -4297,7 +4305,9 @@ dummy_func( int err = _Py_call_instrumentation_2args( tstate, PY_MONITORING_EVENT_CALL, frame, this_instr, func, arg); - if (err) ERROR_NO_POP(); + if (err) { + ERROR_NO_POP(); + } result = PyStackRef_FromPyObjectSteal(PyObject_Call(func, callargs, kwargs)); if (!PyFunction_Check(func) && !PyMethod_Check(func)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 3d0f3b36a40ed2..cb3ab9a85921ba 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1432,10 +1432,12 @@ PyStackRef_CLOSE(v); if (true) JUMP_TO_ERROR(); } - if (PyDict_CheckExact(ns)) - err = PyDict_SetItem(ns, name, PyStackRef_AsPyObjectBorrow(v)); - else - err = PyObject_SetItem(ns, name, PyStackRef_AsPyObjectBorrow(v)); + if (PyDict_CheckExact(ns)) { + err = PyDict_SetItem(ns, name, PyStackRef_AsPyObjectBorrow(v)); + } + else { + err = PyObject_SetItem(ns, name, PyStackRef_AsPyObjectBorrow(v)); + } PyStackRef_CLOSE(v); if (err) JUMP_TO_ERROR(); stack_pointer += -1; @@ -2238,6 +2240,8 @@ attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name); PyStackRef_CLOSE(owner); if (attr_o == NULL) JUMP_TO_ERROR(); + /* We need to define self_or_null on all paths */ + self_or_null = PyStackRef_NULL; } attr = PyStackRef_FromPyObjectSteal(attr_o); stack_pointer[-1] = attr; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index bf75cb901bc46d..475134c4dc3aa2 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1587,7 +1587,9 @@ int err = _Py_call_instrumentation_2args( tstate, PY_MONITORING_EVENT_CALL, frame, this_instr, func, arg); - if (err) goto error; + if (err) { + goto error; + } result = PyStackRef_FromPyObjectSteal(PyObject_Call(func, callargs, kwargs)); if (!PyFunction_Check(func) && !PyMethod_Check(func)) { if (PyStackRef_IsNull(result)) { @@ -4541,7 +4543,9 @@ int err = _Py_call_instrumentation_arg( tstate, PY_MONITORING_EVENT_PY_RETURN, frame, this_instr, PyStackRef_AsPyObjectBorrow(val)); - if (err) goto error; + if (err) { + goto error; + } } // _RETURN_VALUE retval = val; @@ -4581,7 +4585,9 @@ int err = _Py_call_instrumentation_arg( tstate, PY_MONITORING_EVENT_PY_RETURN, frame, this_instr, PyStackRef_AsPyObjectBorrow(val)); - if (err) goto error; + if (err) { + goto error; + } } // _RETURN_VALUE retval = val; @@ -4625,7 +4631,9 @@ tstate, PY_MONITORING_EVENT_PY_YIELD, frame, this_instr, PyStackRef_AsPyObjectBorrow(val)); LOAD_SP(); - if (err) goto error; + if (err) { + goto error; + } if (frame->instr_ptr != this_instr) { next_instr = frame->instr_ptr; DISPATCH(); @@ -4893,6 +4901,8 @@ attr_o = PyObject_GetAttr(PyStackRef_AsPyObjectBorrow(owner), name); PyStackRef_CLOSE(owner); if (attr_o == NULL) goto pop_1_error; + /* We need to define self_or_null on all paths */ + self_or_null = PyStackRef_NULL; } attr = PyStackRef_FromPyObjectSteal(attr_o); } @@ -7066,10 +7076,12 @@ PyStackRef_CLOSE(v); if (true) goto pop_1_error; } - if (PyDict_CheckExact(ns)) - err = PyDict_SetItem(ns, name, PyStackRef_AsPyObjectBorrow(v)); - else - err = PyObject_SetItem(ns, name, PyStackRef_AsPyObjectBorrow(v)); + if (PyDict_CheckExact(ns)) { + err = PyDict_SetItem(ns, name, PyStackRef_AsPyObjectBorrow(v)); + } + else { + err = PyObject_SetItem(ns, name, PyStackRef_AsPyObjectBorrow(v)); + } PyStackRef_CLOSE(v); if (err) goto pop_1_error; stack_pointer += -1; @@ -7692,7 +7704,9 @@ int err = _Py_call_instrumentation_2args( tstate, PY_MONITORING_EVENT_CALL, frame, this_instr, func, arg); - if (err) goto error; + if (err) { + goto error; + } result = PyStackRef_FromPyObjectSteal(PyObject_Call(func, callargs, kwargs)); if (!PyFunction_Check(func) && !PyMethod_Check(func)) { if (PyStackRef_IsNull(result)) { diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 9a1b9da52f4bb5..f347a6d81e91a4 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -182,7 +182,9 @@ dummy_func(void) { res = sym_new_type(ctx, &PyFloat_Type); } } - res = sym_new_unknown(ctx); + else { + res = sym_new_unknown(ctx); + } } op(_BINARY_OP_ADD_INT, (left, right -- res)) { @@ -337,41 +339,47 @@ dummy_func(void) { } op(_TO_BOOL, (value -- res)) { - if (!optimize_to_bool(this_instr, ctx, value, &res)) { + int opt = optimize_to_bool(this_instr, ctx, value, &res); + if (!opt) { res = sym_new_type(ctx, &PyBool_Type); } } op(_TO_BOOL_BOOL, (value -- res)) { - if (!optimize_to_bool(this_instr, ctx, value, &res)) { + int opt = optimize_to_bool(this_instr, ctx, value, &res); + if (!opt) { sym_set_type(value, &PyBool_Type); res = value; } } op(_TO_BOOL_INT, (value -- res)) { - if (!optimize_to_bool(this_instr, ctx, value, &res)) { + int opt = optimize_to_bool(this_instr, ctx, value, &res); + if (!opt) { sym_set_type(value, &PyLong_Type); res = sym_new_type(ctx, &PyBool_Type); } } op(_TO_BOOL_LIST, (value -- res)) { - if (!optimize_to_bool(this_instr, ctx, value, &res)) { + int opt = optimize_to_bool(this_instr, ctx, value, &res); + if (!opt) { sym_set_type(value, &PyList_Type); res = sym_new_type(ctx, &PyBool_Type); } } op(_TO_BOOL_NONE, (value -- res)) { - if (!optimize_to_bool(this_instr, ctx, value, &res)) { + int opt = optimize_to_bool(this_instr, ctx, value, &res); + if (!opt) { sym_set_const(value, Py_None); res = sym_new_const(ctx, Py_False); } } op(_TO_BOOL_STR, (value -- res)) { - if (!optimize_to_bool(this_instr, ctx, value, &res)) { + int opt = optimize_to_bool(this_instr, ctx, value, &res); + if (!opt) { res = sym_new_type(ctx, &PyBool_Type); sym_set_type(value, &PyUnicode_Type); } @@ -482,6 +490,9 @@ dummy_func(void) { if (oparg & 1) { self_or_null = sym_new_unknown(ctx); } + else { + self_or_null = NULL; + } } op(_LOAD_ATTR_MODULE, (index/1, owner -- attr, null if (oparg & 1))) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 672fec3946f2fb..3f12e22d033692 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -119,7 +119,8 @@ _Py_UopsSymbol *value; _Py_UopsSymbol *res; value = stack_pointer[-1]; - if (!optimize_to_bool(this_instr, ctx, value, &res)) { + int opt = optimize_to_bool(this_instr, ctx, value, &res); + if (!opt) { res = sym_new_type(ctx, &PyBool_Type); } stack_pointer[-1] = res; @@ -130,7 +131,8 @@ _Py_UopsSymbol *value; _Py_UopsSymbol *res; value = stack_pointer[-1]; - if (!optimize_to_bool(this_instr, ctx, value, &res)) { + int opt = optimize_to_bool(this_instr, ctx, value, &res); + if (!opt) { sym_set_type(value, &PyBool_Type); res = value; } @@ -142,7 +144,8 @@ _Py_UopsSymbol *value; _Py_UopsSymbol *res; value = stack_pointer[-1]; - if (!optimize_to_bool(this_instr, ctx, value, &res)) { + int opt = optimize_to_bool(this_instr, ctx, value, &res); + if (!opt) { sym_set_type(value, &PyLong_Type); res = sym_new_type(ctx, &PyBool_Type); } @@ -154,7 +157,8 @@ _Py_UopsSymbol *value; _Py_UopsSymbol *res; value = stack_pointer[-1]; - if (!optimize_to_bool(this_instr, ctx, value, &res)) { + int opt = optimize_to_bool(this_instr, ctx, value, &res); + if (!opt) { sym_set_type(value, &PyList_Type); res = sym_new_type(ctx, &PyBool_Type); } @@ -166,7 +170,8 @@ _Py_UopsSymbol *value; _Py_UopsSymbol *res; value = stack_pointer[-1]; - if (!optimize_to_bool(this_instr, ctx, value, &res)) { + int opt = optimize_to_bool(this_instr, ctx, value, &res); + if (!opt) { sym_set_const(value, Py_None); res = sym_new_const(ctx, Py_False); } @@ -178,7 +183,8 @@ _Py_UopsSymbol *value; _Py_UopsSymbol *res; value = stack_pointer[-1]; - if (!optimize_to_bool(this_instr, ctx, value, &res)) { + int opt = optimize_to_bool(this_instr, ctx, value, &res); + if (!opt) { res = sym_new_type(ctx, &PyBool_Type); sym_set_type(value, &PyUnicode_Type); } @@ -1022,6 +1028,9 @@ if (oparg & 1) { self_or_null = sym_new_unknown(ctx); } + else { + self_or_null = NULL; + } stack_pointer[-1] = attr; if (oparg & 1) stack_pointer[0] = self_or_null; stack_pointer += (oparg & 1); @@ -2178,7 +2187,9 @@ res = sym_new_type(ctx, &PyFloat_Type); } } - res = sym_new_unknown(ctx); + else { + res = sym_new_unknown(ctx); + } stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 3cc36b6b5841bd..07ee411e0457af 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -4,7 +4,6 @@ import re from typing import Optional - @dataclass class Properties: escapes: bool @@ -156,6 +155,7 @@ class Uop: stack: StackEffect caches: list[CacheEntry] deferred_refs: dict[lexer.Token, str | None] + output_stores: list[lexer.Token] body: list[lexer.Token] properties: Properties _size: int = -1 @@ -353,18 +353,40 @@ def analyze_caches(inputs: list[parser.InputEffect]) -> list[CacheEntry]: return [CacheEntry(i.name, int(i.size)) for i in caches] +def find_assignment_target(node: parser.InstDef, idx: int) -> list[lexer.Token]: + """Find the tokens that make up the left-hand side of an assignment""" + offset = 0 + for tkn in reversed(node.block.tokens[: idx]): + if tkn.kind == "SEMI" or tkn.kind == "LBRACE" or tkn.kind == "RBRACE": + return node.block.tokens[idx - offset : idx] + offset += 1 + return [] + + +def find_stores_outputs(node: parser.InstDef) -> list[lexer.Token]: + res: list[lexer.Token] = [] + outnames = [ out.name for out in node.outputs ] + for idx, tkn in enumerate(node.block.tokens): + if tkn.kind == "AND": + name = node.block.tokens[idx+1] + if name.text in outnames: + res.append(name) + if tkn.kind != "EQUALS": + continue + lhs = find_assignment_target(node, idx) + assert lhs + while lhs and lhs[0].kind == "COMMENT": + lhs = lhs[1:] + if len(lhs) != 1 or lhs[0].kind != "IDENTIFIER": + continue + name = lhs[0] + if name.text in outnames: + res.append(name) + return res + def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None]: """Look for PyStackRef_FromPyObjectNew() calls""" - def find_assignment_target(idx: int) -> list[lexer.Token]: - """Find the tokens that make up the left-hand side of an assignment""" - offset = 1 - for tkn in reversed(node.block.tokens[: idx - 1]): - if tkn.kind == "SEMI" or tkn.kind == "LBRACE" or tkn.kind == "RBRACE": - return node.block.tokens[idx - offset : idx - 1] - offset += 1 - return [] - refs: dict[lexer.Token, str | None] = {} for idx, tkn in enumerate(node.block.tokens): if tkn.kind != "IDENTIFIER" or tkn.text != "PyStackRef_FromPyObjectNew": @@ -373,7 +395,7 @@ def find_assignment_target(idx: int) -> list[lexer.Token]: if idx == 0 or node.block.tokens[idx - 1].kind != "EQUALS": raise analysis_error("Expected '=' before PyStackRef_FromPyObjectNew", tkn) - lhs = find_assignment_target(idx) + lhs = find_assignment_target(node, idx - 1) if len(lhs) == 0: raise analysis_error( "PyStackRef_FromPyObjectNew() must be assigned to an output", tkn @@ -692,6 +714,7 @@ def make_uop( stack=analyze_stack(op), caches=analyze_caches(inputs), deferred_refs=analyze_deferred_refs(op), + output_stores=find_stores_outputs(op), body=op.block.tokens, properties=compute_properties(op), ) @@ -712,6 +735,7 @@ def make_uop( stack=analyze_stack(op, bit), caches=analyze_caches(inputs), deferred_refs=analyze_deferred_refs(op), + output_stores=find_stores_outputs(op), body=op.block.tokens, properties=properties, ) @@ -735,6 +759,7 @@ def make_uop( stack=analyze_stack(op), caches=analyze_caches(inputs), deferred_refs=analyze_deferred_refs(op), + output_stores=find_stores_outputs(op), body=op.block.tokens, properties=properties, ) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index dd4057c931ca19..a913f3af11b190 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -9,11 +9,39 @@ analysis_error, ) from cwriter import CWriter -from typing import Callable, Mapping, TextIO, Iterator +from typing import Callable, Mapping, TextIO, Iterator, Iterable from lexer import Token -from stack import Stack +from stack import Stack, Local, Storage +class TokenIterator: + + look_ahead: Token | None + iterator: Iterator[Token] + + def __init__(self, tkns: Iterable[Token]): + self.iterator = iter(tkns) + self.look_ahead = None + + def __iter__(self) -> "TokenIterator": + return self + + def __next__(self) -> Token: + if self.look_ahead is None: + return next(self.iterator) + else: + res = self.look_ahead + self.look_ahead = None + return res + + def peek(self) -> Token | None: + if self.look_ahead is None: + try: + self.look_ahead = next(self.iterator) + except StopIteration: + pass + return self.look_ahead + ROOT = Path(__file__).parent.parent.parent DEFAULT_INPUT = (ROOT / "Python/bytecodes.c").absolute().as_posix() @@ -47,22 +75,47 @@ def write_header( ) -def emit_to(out: CWriter, tkn_iter: Iterator[Token], end: str) -> None: +def emit_to(out: CWriter, tkn_iter: TokenIterator, end: str) -> Token: parens = 0 for tkn in tkn_iter: if tkn.kind == end and parens == 0: - return + return tkn if tkn.kind == "LPAREN": parens += 1 if tkn.kind == "RPAREN": parens -= 1 out.emit(tkn) + raise analysis_error(f"Expecting {end}. Reached end of file", tkn) ReplacementFunctionType = Callable[ - [Token, Iterator[Token], Uop, Stack, Instruction | None], None + [Token, TokenIterator, Uop, Storage, Instruction | None], bool ] +def always_true(tkn: Token | None) -> bool: + if tkn is None: + return False + return tkn.text == "true" or tkn.text == "1" + +def push_defined_locals(outputs: list[Local], stack:Stack, tkn: Token) -> None: + while outputs: + out = outputs[0] + if out.defined: + stack.push(out) + outputs.pop(0) + else: + break + undefined = "" + for out in outputs: + if out.defined: + raise analysis_error( + f"Locals not defined in stack order. " + f"Expected '{out.name}' is defined before '{undefined}'", + tkn + ) + else: + undefined = out.name + class Emitter: out: CWriter @@ -77,19 +130,34 @@ def __init__(self, out: CWriter): "DECREF_INPUTS": self.decref_inputs, "SYNC_SP": self.sync_sp, "PyStackRef_FromPyObjectNew": self.py_stack_ref_from_py_object_new, + "DISPATCH": self.dispatch } self.out = out + def dispatch( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: Uop, + storage: Storage, + inst: Instruction | None, + ) -> bool: + self.emit(tkn) + return False + def deopt_if( self, tkn: Token, - tkn_iter: Iterator[Token], + tkn_iter: TokenIterator, uop: Uop, - unused: Stack, + storage: Storage, inst: Instruction | None, - ) -> None: + ) -> bool: self.out.emit_at("DEOPT_IF", tkn) - self.out.emit(next(tkn_iter)) + lparen = next(tkn_iter) + self.emit(lparen) + assert(lparen.kind == "LPAREN") + first_tkn = tkn_iter.peek() emit_to(self.out, tkn_iter, "RPAREN") next(tkn_iter) # Semi colon self.out.emit(", ") @@ -97,25 +165,29 @@ def deopt_if( assert inst.family is not None self.out.emit(inst.family.name) self.out.emit(");\n") + return not always_true(first_tkn) exit_if = deopt_if def error_if( self, tkn: Token, - tkn_iter: Iterator[Token], + tkn_iter: TokenIterator, uop: Uop, - stack: Stack, + storage: Storage, inst: Instruction | None, - ) -> None: + ) -> bool: self.out.emit_at("if ", tkn) - self.out.emit(next(tkn_iter)) + lparen = next(tkn_iter) + self.emit(lparen) + assert(lparen.kind == "LPAREN") + first_tkn = tkn_iter.peek() emit_to(self.out, tkn_iter, "COMMA") label = next(tkn_iter).text next(tkn_iter) # RPAREN next(tkn_iter) # Semi colon self.out.emit(") ") - c_offset = stack.peek_offset() + c_offset = storage.stack.peek_offset() try: offset = -int(c_offset) except ValueError: @@ -130,33 +202,35 @@ def error_if( self.out.emit(";\n") else: self.out.emit("{\n") - stack.flush_locally(self.out) + storage.stack.copy().flush(self.out) self.out.emit("goto ") self.out.emit(label) self.out.emit(";\n") self.out.emit("}\n") + return not always_true(first_tkn) def error_no_pop( self, tkn: Token, - tkn_iter: Iterator[Token], + tkn_iter: TokenIterator, uop: Uop, - stack: Stack, + storage: Storage, inst: Instruction | None, - ) -> None: + ) -> bool: next(tkn_iter) # LPAREN next(tkn_iter) # RPAREN next(tkn_iter) # Semi colon self.out.emit_at("goto error;", tkn) + return False def decref_inputs( self, tkn: Token, - tkn_iter: Iterator[Token], + tkn_iter: TokenIterator, uop: Uop, - stack: Stack, + storage: Storage, inst: Instruction | None, - ) -> None: + ) -> bool: next(tkn_iter) next(tkn_iter) next(tkn_iter) @@ -175,28 +249,30 @@ def decref_inputs( self.out.emit(f"PyStackRef_XCLOSE({var.name});\n") else: self.out.emit(f"PyStackRef_CLOSE({var.name});\n") + return True def sync_sp( self, tkn: Token, - tkn_iter: Iterator[Token], + tkn_iter: TokenIterator, uop: Uop, - stack: Stack, + storage: Storage, inst: Instruction | None, - ) -> None: + ) -> bool: next(tkn_iter) next(tkn_iter) next(tkn_iter) - stack.flush(self.out) + storage.stack.flush(self.out) + return True def py_stack_ref_from_py_object_new( self, tkn: Token, - tkn_iter: Iterator[Token], + tkn_iter: TokenIterator, uop: Uop, - stack: Stack, + storage: Storage, inst: Instruction | None, - ) -> None: + ) -> bool: self.out.emit(tkn) emit_to(self.out, tkn_iter, "SEMI") self.out.emit(";\n") @@ -204,29 +280,116 @@ def py_stack_ref_from_py_object_new( target = uop.deferred_refs[tkn] if target is None: # An assignment we don't handle, such as to a pointer or array. - return + return True # Flush the assignment to the stack. Note that we don't flush the # stack pointer here, and instead are currently relying on initializing # unused portions of the stack to NULL. - stack.flush_single_var(self.out, target, uop.stack.outputs) + storage.stack.flush_single_var(self.out, target, uop.stack.outputs) + return True - def emit_tokens( + def _emit_if( self, + tkn_iter: TokenIterator, uop: Uop, - stack: Stack, + storage: Storage, inst: Instruction | None, - ) -> None: - tkns = uop.body[1:-1] - if not tkns: - return - tkn_iter = iter(tkns) - self.out.start_line() + ) -> tuple[bool, Token, Storage]: + """ Returns (reachable?, closing '}', stack).""" + tkn = next(tkn_iter) + assert (tkn.kind == "LPAREN") + self.out.emit(tkn) + rparen = emit_to(self.out, tkn_iter, "RPAREN") + self.emit(rparen) + if_storage = storage.copy() + reachable, rbrace, if_stack = self._emit_block(tkn_iter, uop, if_storage, inst, True) + maybe_else = tkn_iter.peek() + if maybe_else and maybe_else.kind == "ELSE": + self.emit(rbrace) + self.emit(next(tkn_iter)) + maybe_if = tkn_iter.peek() + if maybe_if and maybe_if.kind == "IF": + self.emit(next(tkn_iter)) + else_reachable, rbrace, stack = self._emit_if(tkn_iter, uop, storage, inst) + else: + else_reachable, rbrace, stack = self._emit_block(tkn_iter, uop, storage, inst, True) + if not reachable: + # Discard the if storage + reachable = else_reachable + elif not else_reachable: + # Discard the else storage + storage = if_storage + else: + storage = storage.merge(if_storage, self.out) + else: + if reachable: + storage = storage.merge(if_storage, self.out) + else: + # Discard the if stack + reachable = True + return reachable, rbrace, storage + + def _emit_block( + self, + tkn_iter: TokenIterator, + uop: Uop, + storage: Storage, + inst: Instruction | None, + emit_first_brace: bool + ) -> tuple[bool, Token, Storage]: + """ Returns (reachable?, closing '}', stack).""" + braces = 1 + out_stores = set(uop.output_stores) + tkn = next(tkn_iter) + reachable = True + if tkn.kind != "LBRACE": + raise analysis_error(f"PEP 7: expected '{{' found {tkn.text}", tkn) + if emit_first_brace: + self.emit(tkn) for tkn in tkn_iter: - if tkn.kind == "IDENTIFIER" and tkn.text in self._replacers: - self._replacers[tkn.text](tkn, tkn_iter, uop, stack, inst) + if tkn.kind == "LBRACE": + self.out.emit(tkn) + braces += 1 + elif tkn.kind == "RBRACE": + braces -= 1 + if braces == 0: + return reachable, tkn, storage + self.out.emit(tkn) + elif tkn.kind == "GOTO": + reachable = False; + self.out.emit(tkn) + elif tkn.kind == "IDENTIFIER": + if tkn.text in self._replacers: + if not self._replacers[tkn.text](tkn, tkn_iter, uop, storage, inst): + reachable = False + else: + if tkn in out_stores: + for out in storage.outputs: + if out.name == tkn.text: + out.defined = True + out.in_memory = False + break + self.out.emit(tkn) + elif tkn.kind == "IF": + self.out.emit(tkn) + if_reachable, rbrace, stack = self._emit_if(tkn_iter, uop, storage, inst) + if reachable: + reachable = if_reachable + self.out.emit(rbrace) else: self.out.emit(tkn) + raise analysis_error("Expecting closing brace. Reached end of file", tkn) + + + def emit_tokens( + self, + uop: Uop, + storage: Storage, + inst: Instruction | None, + ) -> None: + tkn_iter = TokenIterator(uop.body) + self.out.start_line() + self._emit_block(tkn_iter, uop, storage, inst, False) def emit(self, txt: str | Token) -> None: self.out.emit(txt) diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py index b74f627235ad84..c5c52f0f8f1333 100644 --- a/Tools/cases_generator/optimizer_generator.py +++ b/Tools/cases_generator/optimizer_generator.py @@ -18,11 +18,12 @@ ROOT, write_header, Emitter, + TokenIterator, ) from cwriter import CWriter from typing import TextIO, Iterator from lexer import Token -from stack import Local, Stack, StackError +from stack import Local, Stack, StackError, Storage DEFAULT_OUTPUT = ROOT / "Python/optimizer_cases.c.h" DEFAULT_ABSTRACT_INPUT = (ROOT / "Python/optimizer_bytecodes.c").absolute().as_posix() @@ -65,7 +66,7 @@ def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: def decref_inputs( out: CWriter, tkn: Token, - tkn_iter: Iterator[Token], + tkn_iter: TokenIterator, uop: Uop, stack: Stack, inst: Instruction | None, @@ -102,21 +103,28 @@ def write_uop( skip_inputs: bool, ) -> None: locals: dict[str, Local] = {} + prototype = override if override else uop try: - prototype = override if override else uop - is_override = override is not None out.start_line() + peeks: list[Local] = [] for var in reversed(prototype.stack.inputs): code, local = stack.pop(var, extract_bits=True) if not skip_inputs: out.emit(code) + if var.peek: + peeks.append(local) if local.defined: locals[local.name] = local + # Push back the peeks, so that they remain on the logical + # stack, but their values are cached. + while peeks: + stack.push(peeks.pop()) out.emit(stack.define_output_arrays(prototype.stack.outputs)) + storage = Storage.for_uop(stack, prototype, locals) if debug: args = [] for var in prototype.stack.inputs: - if not var.peek or is_override: + if not var.peek or override: args.append(var.name) out.emit(f'DEBUG_PRINTF({", ".join(args)});\n') if override: @@ -130,20 +138,16 @@ def write_uop( out.emit(f"{type}{cache.name} = ({cast})this_instr->operand;\n") if override: emitter = OptimizerEmitter(out) - emitter.emit_tokens(override, stack, None) + emitter.emit_tokens(override, storage, None) else: emit_default(out, uop) - for var in prototype.stack.outputs: - if var.name in locals: - local = locals[var.name] - else: - local = Local.local(var) - stack.push(local) + for output in storage.outputs: + stack.push(output) out.start_line() stack.flush(out, cast_type="_Py_UopsSymbol *", extract_bits=True) except StackError as ex: - raise analysis_error(ex.args[0], uop.body[0]) + raise analysis_error(ex.args[0], prototype.body[0]) SKIPS = ("_EXTENDED_ARG",) diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index 34bf597f2f552d..bd959c95ae6ce6 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -51,15 +51,23 @@ def unused(defn: StackItem) -> "Local": return Local(defn, False, defn.is_array(), False) @staticmethod - def local(defn: StackItem) -> "Local": + def undefined(defn: StackItem) -> "Local": array = defn.is_array() - return Local(defn, not array, array, True) + return Local(defn, not array, array, False) @staticmethod def redefinition(var: StackItem, prev: "Local") -> "Local": assert var.is_array() == prev.is_array() return Local(var, prev.cached, prev.in_memory, True) + def copy(self) -> "Local": + return Local( + self.item, + self.cached, + self.in_memory, + self.defined + ) + @property def size(self) -> str: return self.item.size @@ -75,6 +83,16 @@ def condition(self) -> str | None: def is_array(self) -> bool: return self.item.is_array() + def __eq__(self, other: object) -> bool: + if not isinstance(other, Local): + return NotImplemented + return ( + self.item is other.item + and self.cached is other.cached + and self.in_memory is other.in_memory + and self.defined is other.defined + ) + @dataclass class StackOffset: @@ -160,6 +178,11 @@ def clear(self) -> None: self.popped = [] self.pushed = [] + def __eq__(self, other: object) -> bool: + if not isinstance(other, StackOffset): + return NotImplemented + return self.to_c() == other.to_c() + class StackError(Exception): pass @@ -285,18 +308,6 @@ def _do_flush( out.emit("assert(WITHIN_STACK_BOUNDS());\n") out.start_line() - def flush_locally( - self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = False - ) -> None: - self._do_flush( - out, - self.variables[:], - self.base_offset.copy(), - self.top_offset.copy(), - cast_type, - extract_bits, - ) - def flush( self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = False ) -> None: @@ -344,6 +355,29 @@ def peek_offset(self) -> str: def as_comment(self) -> str: return f"/* Variables: {[v.name for v in self.variables]}. Base offset: {self.base_offset.to_c()}. Top offset: {self.top_offset.to_c()} */" + def copy(self) -> "Stack": + other = Stack() + other.top_offset = self.top_offset.copy() + other.base_offset = self.base_offset.copy() + other.variables = [var.copy() for var in self.variables] + other.defined = set(self.defined) + return other + + def __eq__(self, other: object) -> bool: + if not isinstance(other, Stack): + return NotImplemented + return ( + self.top_offset == other.top_offset + and self.base_offset == other.base_offset + and self.variables == other.variables + and self.defined == other.defined + ) + + + def merge(self, other:"Stack") -> "Stack": + if self != other: + raise StackError("unequal stacks") + return self def get_stack_effect(inst: Instruction | PseudoInstruction) -> Stack: stack = Stack() @@ -370,3 +404,58 @@ def stacks(inst: Instruction | PseudoInstruction) -> Iterator[StackEffect]: local = Local.unused(var) stack.push(local) return stack + +@dataclass +class Storage: + + stack: Stack + outputs: list[Local] + + def _push_defined_locals(self) -> None: + while self.outputs: + out = self.outputs[0] + if out.defined: + self.stack.push(out) + self.outputs.pop(0) + else: + break + undefined = "" + for out in self.outputs: + if out.defined: + raise StackError( + f"Locals not defined in stack order. " + f"Expected '{out.name}' is defined before '{undefined}'" + ) + else: + undefined = out.name + + def flush(self, out: CWriter) -> None: + self._push_defined_locals() + self.stack.flush(out) + + @staticmethod + def for_uop(stack: Stack, uop: Uop, locals: dict[str, Local]) -> "Storage": + outputs: list[Local] = [] + for var in uop.stack.outputs: + if not var.peek: + if var.name in locals: + local = locals[var.name] + elif var.name == "unused": + local = Local.unused(var) + else: + local = Local.undefined(var) + outputs.append(local) + return Storage(stack, outputs) + + def copy(self) -> "Storage": + return Storage(self.stack.copy(), [ l.copy() for l in self.outputs]) + + def merge(self, other: "Storage", out: CWriter) -> "Storage": + self.stack.merge(other.stack) + if self.outputs != other.outputs: + raise StackError("unequal locals") + return self + + def as_comment(self) -> str: + stack_comment = self.stack.as_comment() + return stack_comment[:-2] + str(self.outputs) + " */" diff --git a/Tools/cases_generator/tier1_generator.py b/Tools/cases_generator/tier1_generator.py index c749896c2cb7f6..41a9373c64d749 100644 --- a/Tools/cases_generator/tier1_generator.py +++ b/Tools/cases_generator/tier1_generator.py @@ -22,10 +22,11 @@ write_header, type_and_null, Emitter, + TokenIterator, ) from cwriter import CWriter from typing import TextIO -from stack import Local, Stack, StackError, get_stack_effect +from stack import Local, Stack, StackError, get_stack_effect, Storage DEFAULT_OUTPUT = ROOT / "Python/generated_cases.c.h" @@ -99,17 +100,8 @@ def write_uop( stack.push(peeks.pop()) if braces: emitter.emit("{\n") - emitter.out.emit(stack.define_output_arrays(uop.stack.outputs)) - outputs: list[Local] = [] - for var in uop.stack.outputs: - if not var.peek: - if var.name in locals: - local = locals[var.name] - elif var.name == "unused": - local = Local.unused(var) - else: - local = Local.local(var) - outputs.append(local) + emitter.emit(stack.define_output_arrays(uop.stack.outputs)) + storage = Storage.for_uop(stack, uop, locals) for cache in uop.caches: if cache.name != "unused": @@ -125,8 +117,9 @@ def write_uop( if inst.family is None: emitter.emit(f"(void){cache.name};\n") offset += cache.size - emitter.emit_tokens(uop, stack, inst) - for output in outputs: + + emitter.emit_tokens(uop, storage, inst) + for output in storage.outputs: if output.name in uop.deferred_refs.values(): # We've already spilled this when emitting tokens output.cached = False diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index b7c70fdad085fd..1f2df958bf32c9 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -20,11 +20,13 @@ write_header, type_and_null, Emitter, + TokenIterator, + always_true, ) from cwriter import CWriter from typing import TextIO, Iterator from lexer import Token -from stack import Local, Stack, StackError, get_stack_effect +from stack import Local, Stack, StackError, Storage DEFAULT_OUTPUT = ROOT / "Python/executor_cases.c.h" @@ -52,7 +54,7 @@ def declare_variables(uop: Uop, out: CWriter) -> None: for var in reversed(uop.stack.inputs): stack.pop(var) for var in uop.stack.outputs: - stack.push(Local.unused(var)) + stack.push(Local.undefined(var)) required = set(stack.defined) required.discard("unused") for var in reversed(uop.stack.inputs): @@ -69,85 +71,100 @@ def __init__(self, out: CWriter): def error_if( self, tkn: Token, - tkn_iter: Iterator[Token], + tkn_iter: TokenIterator, uop: Uop, - stack: Stack, + storage: Storage, inst: Instruction | None, - ) -> None: + ) -> bool: self.out.emit_at("if ", tkn) - self.emit(next(tkn_iter)) + lparen = next(tkn_iter) + self.emit(lparen) + assert(lparen.kind == "LPAREN") + first_tkn = next(tkn_iter) + self.out.emit(first_tkn) emit_to(self.out, tkn_iter, "COMMA") label = next(tkn_iter).text next(tkn_iter) # RPAREN next(tkn_iter) # Semi colon self.emit(") JUMP_TO_ERROR();\n") + return first_tkn.text != "true" and first_tkn.text != "1" + def error_no_pop( self, tkn: Token, - tkn_iter: Iterator[Token], + tkn_iter: TokenIterator, uop: Uop, - stack: Stack, + storage: Storage, inst: Instruction | None, - ) -> None: + ) -> bool: next(tkn_iter) # LPAREN next(tkn_iter) # RPAREN next(tkn_iter) # Semi colon self.out.emit_at("JUMP_TO_ERROR();", tkn) + return False def deopt_if( self, tkn: Token, - tkn_iter: Iterator[Token], + tkn_iter: TokenIterator, uop: Uop, - unused: Stack, + storage: Storage, inst: Instruction | None, - ) -> None: + ) -> bool: self.out.emit_at("if ", tkn) - self.emit(next(tkn_iter)) + lparen = next(tkn_iter) + self.emit(lparen) + assert(lparen.kind == "LPAREN") + first_tkn = tkn_iter.peek() emit_to(self.out, tkn_iter, "RPAREN") next(tkn_iter) # Semi colon self.emit(") {\n") self.emit("UOP_STAT_INC(uopcode, miss);\n") self.emit("JUMP_TO_JUMP_TARGET();\n") self.emit("}\n") + return not always_true(first_tkn) def exit_if( # type: ignore[override] self, tkn: Token, - tkn_iter: Iterator[Token], + tkn_iter: TokenIterator, uop: Uop, - unused: Stack, + storage: Storage, inst: Instruction | None, - ) -> None: + ) -> bool: self.out.emit_at("if ", tkn) - self.emit(next(tkn_iter)) + lparen = next(tkn_iter) + self.emit(lparen) + first_tkn = tkn_iter.peek() emit_to(self.out, tkn_iter, "RPAREN") next(tkn_iter) # Semi colon self.emit(") {\n") self.emit("UOP_STAT_INC(uopcode, miss);\n") self.emit("JUMP_TO_JUMP_TARGET();\n") self.emit("}\n") + return not always_true(first_tkn) def oparg( self, tkn: Token, - tkn_iter: Iterator[Token], + tkn_iter: TokenIterator, uop: Uop, - unused: Stack, + storage: Storage, inst: Instruction | None, - ) -> None: + ) -> bool: if not uop.name.endswith("_0") and not uop.name.endswith("_1"): self.emit(tkn) - return + return True amp = next(tkn_iter) if amp.text != "&": self.emit(tkn) self.emit(amp) - return + return True one = next(tkn_iter) assert one.text == "1" self.out.emit_at(uop.name[-1], tkn) + return True def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> None: @@ -160,19 +177,20 @@ def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> None: elif uop.properties.const_oparg >= 0: emitter.emit(f"oparg = {uop.properties.const_oparg};\n") emitter.emit(f"assert(oparg == CURRENT_OPARG());\n") + peeks: list[Local] = [] for var in reversed(uop.stack.inputs): code, local = stack.pop(var) emitter.emit(code) + if var.peek: + peeks.append(local) if local.defined: locals[local.name] = local + # Push back the peeks, so that they remain on the logical + # stack, but their values are cached. + while peeks: + stack.push(peeks.pop()) emitter.emit(stack.define_output_arrays(uop.stack.outputs)) - outputs: list[Local] = [] - for var in uop.stack.outputs: - if var.name in locals: - local = locals[var.name] - else: - local = Local.local(var) - outputs.append(local) + storage = Storage.for_uop(stack, uop, locals) for cache in uop.caches: if cache.name != "unused": if cache.size == 4: @@ -181,8 +199,8 @@ def write_uop(uop: Uop, emitter: Emitter, stack: Stack) -> None: type = f"uint{cache.size*16}_t " cast = f"uint{cache.size*16}_t" emitter.emit(f"{type}{cache.name} = ({cast})CURRENT_OPERAND();\n") - emitter.emit_tokens(uop, stack, None) - for output in outputs: + emitter.emit_tokens(uop, storage, None) + for output in storage.outputs: if output.name in uop.deferred_refs.values(): # We've already spilled this when emitting tokens output.cached = False