From 9b691ca4409a71622fdc3ebfd61fde2f9cc187b6 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 6 Jun 2024 13:58:45 +0100 Subject: [PATCH 1/3] Add verification tool to check for unmarked escaping calls --- Tools/cases_generator/lexer.py | 2 +- Tools/cases_generator/verify.py | 195 ++++++++++++++++++++++++++++++++ 2 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 Tools/cases_generator/verify.py diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index 13aee94f2b957c..7c97b919873c65 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -79,7 +79,7 @@ def choice(*opts: str) -> str: opmap = {pattern.replace("\\", "") or "\\": op for op, pattern in operators.items()} # Macros -macro = r"# *(ifdef|ifndef|undef|define|error|endif|if|else|include|#)" +macro = r"# *(ifdef|ifndef|undef|define|error|endif|if +defined|if|else|include|#)" CMACRO = "CMACRO" id_re = r"[a-zA-Z_][0-9a-zA-Z_]*" diff --git a/Tools/cases_generator/verify.py b/Tools/cases_generator/verify.py new file mode 100644 index 00000000000000..8a953d31e6ef85 --- /dev/null +++ b/Tools/cases_generator/verify.py @@ -0,0 +1,195 @@ + +import argparse +import sys + +from analyzer import ( + Analysis, + Instruction, + Uop, + analyze_files, + StackItem, + analysis_error, +) +from typing import Iterator +from lexer import Token, LPAREN, IDENTIFIER +from generators_common import DEFAULT_INPUT + +NON_ESCAPING_FUNCTIONS = ( + "Py_INCREF", + "_PyManagedDictPointer_IsValues", + "_PyObject_GetManagedDict", + "_PyObject_ManagedDictPointer", + "_PyObject_InlineValues", + "_PyDictValues_AddToInsertionOrder", + "Py_DECREF", + "Py_XDECREF", + "_Py_DECREF_SPECIALIZED", + "DECREF_INPUTS_AND_REUSE_FLOAT", + "PyUnicode_Append", + "_PyLong_IsZero", + "Py_ARRAY_LENGTH", + "Py_Unicode_GET_LENGTH", + "PyUnicode_READ_CHAR", + "_Py_SINGLETON", + "PyUnicode_GET_LENGTH", + "_PyLong_IsCompact", + "_PyLong_IsNonNegativeCompact", + "_PyLong_CompactValue", + "_PyLong_DigitCount", + "_Py_NewRef", + "_Py_IsImmortal", + "PyLong_FromLong", + "_Py_STR", + "_PyLong_Add", + "_PyLong_Multiply", + "_PyLong_Subtract", + "Py_NewRef", + "_PyList_ITEMS", + "_PyTuple_ITEMS", + "_Py_atomic_load_uintptr_relaxed", + "_PyFrame_GetCode", + "_PyThreadState_HasStackSpace", + "_PyUnicode_Equal", + "_PyFrame_SetStackPointer", + "_PyType_HasFeature", + "PyUnicode_Concat", + "PySlice_New", + "_Py_LeaveRecursiveCallPy", + "maybe_lltrace_resume_frame", + "_PyUnicode_JoinArray", + "_PyEval_FrameClearAndPop", + "_PyFrame_StackPush", + "PyCell_New", + "PyFloat_AS_DOUBLE", + "_PyFrame_PushUnchecked", + "Py_FatalError", + "assert", + "Py_Is", + "Py_IsTrue", + "Py_IsNone", + "Py_IsFalse", + "_PyFrame_GetStackPointer" + "_PyCode_CODE", + "PyCFunction_GET_FLAGS", + "_PyErr_Occurred", + "_Py_LeaveRecursiveCallTstate", + "_Py_EnterRecursiveCallTstateUnchecked", + "PyStackRef_FromPyObjectSteal", + "PyStackRef_AsPyObjectBorrow", + "PyStackRef_AsPyObjectSteal", + "PyStackRef_CLOSE", + "PyStackRef_DUP", + "PyStackRef_CLEAR", + "PyStackRef_IsNull", + "PyStackRef_TYPE", + "PyStackRef_False", + "PyStackRef_True", + "PyStackRef_None", + "PyStackRef_Is", + "PyStackRef_FromPyObjectNew", + "PyStackRef_AsPyObjectNew", + "PyStackRef_FromPyObjectImmortal", + "STACKREFS_TO_PYOBJECTS", + "STACKREFS_TO_PYOBJECTS_CLEANUP", + "CONVERSION_FAILED", +) + +FLOW_CONTROL = { + "ESCAPING_CALL", + "ERROR_IF", + "DEOPT_IF", + "ERROR_NO_POP", +} + +DECREFS = { + "Py_DECREF", + "Py_XDECREF", + "Py_CLEAR", + "DECREF_INPUTS", +} + +def check_escaping_call(tkn_iter: Iterator[Token]) -> bool: + res = 0 + assert(next(tkn_iter).kind == "LPAREN") + parens = 1 + for tkn in tkn_iter: + if tkn.kind == "LPAREN": + parens += 1 + elif tkn.kind == "RPAREN": + parens -= 1 + if parens == 0: + return + elif tkn.kind == "GOTO": + print(f"`goto` in 'ESCAPING_CALL' on line {tkn.line}") + res = 1 + elif tkn.kind == IDENTIFIER: + if tkn.text in FLOW_CONTROL: + print(f"Exiting flow control in 'ESCAPING_CALL' on line {tkn.line}") + res = 1 + if tkn.text in DECREFS: + print(f"DECREF in 'ESCAPING_CALL' on line {tkn.line}") + res = 1 + return res + +def is_macro_name(name: str) -> bool: + if name[0] == "_": + name = name[1:] + if name.startswith("Py"): + name = name[2:] + return name == name.upper() + +def is_getter(name: str) -> bool: + return "GET" in name + +def check_for_unmarked_escapes(uop: Uop) -> None: + res = 0 + tkns = iter(uop.body) + for tkn in tkns: + if tkn.kind != IDENTIFIER: + continue + try: + next_tkn = next(tkns) + except StopIteration: + return False + if next_tkn.kind != LPAREN: + continue + if tkn.text == "ESCAPING_CALL": + if check_escaping_call(tkns): + res = 1 + if is_macro_name(tkn.text): + continue + if is_getter(tkn.text): + continue + if tkn.text.endswith("Check") or tkn.text.endswith("CheckExact"): + continue + if "backoff_counter" in tkn.text: + continue + if tkn.text not in NON_ESCAPING_FUNCTIONS: + print(f"Unmarked escaping function '{tkn.text}' on line {tkn.line}") + res = 1 + return res + +def verify_uop(uop: Uop) -> None: + return check_for_unmarked_escapes(uop) + +def verify(analysis: Analysis) -> None: + res = 0 + for uop in analysis.uops.values(): + res |= verify_uop(uop) + return res + + +arg_parser = argparse.ArgumentParser( + description="Verify the bytecode description file.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, +) + +arg_parser.add_argument( + "input", nargs=argparse.REMAINDER, help="Instruction definition file(s)" +) + +if __name__ == "__main__": + args = arg_parser.parse_args() + if len(args.input) == 0: + args.input.append(DEFAULT_INPUT) + sys.exit(verify(analyze_files(args.input))) From b6701a5924136d031e6c15ab23224b7dd2c4d44c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 17 Jul 2024 15:33:54 +0100 Subject: [PATCH 2/3] Better output format --- Tools/cases_generator/verify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/verify.py b/Tools/cases_generator/verify.py index 8a953d31e6ef85..e1cd6866a5c3fb 100644 --- a/Tools/cases_generator/verify.py +++ b/Tools/cases_generator/verify.py @@ -165,7 +165,7 @@ def check_for_unmarked_escapes(uop: Uop) -> None: if "backoff_counter" in tkn.text: continue if tkn.text not in NON_ESCAPING_FUNCTIONS: - print(f"Unmarked escaping function '{tkn.text}' on line {tkn.line}") + print(f"Unmarked escaping function '{tkn.text}' at {tkn.filename}:{tkn.line}") res = 1 return res From 99bddda0fb13cdd23e7bbbff5ac08bddc7a4c85d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 17 Jul 2024 16:45:30 +0100 Subject: [PATCH 3/3] Fix type annotations --- Tools/cases_generator/verify.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/Tools/cases_generator/verify.py b/Tools/cases_generator/verify.py index e1cd6866a5c3fb..785e373b0382bc 100644 --- a/Tools/cases_generator/verify.py +++ b/Tools/cases_generator/verify.py @@ -108,7 +108,10 @@ "DECREF_INPUTS", } -def check_escaping_call(tkn_iter: Iterator[Token]) -> bool: +def error(msg: str, tkn:Token) -> None: + print(f"{msg} at {tkn.filename}:{tkn.line}") + +def check_escaping_call(tkn_iter: Iterator[Token]) -> int: res = 0 assert(next(tkn_iter).kind == "LPAREN") parens = 1 @@ -118,16 +121,16 @@ def check_escaping_call(tkn_iter: Iterator[Token]) -> bool: elif tkn.kind == "RPAREN": parens -= 1 if parens == 0: - return + return res elif tkn.kind == "GOTO": - print(f"`goto` in 'ESCAPING_CALL' on line {tkn.line}") + error("`goto` in 'ESCAPING_CALL'", tkn) res = 1 elif tkn.kind == IDENTIFIER: if tkn.text in FLOW_CONTROL: - print(f"Exiting flow control in 'ESCAPING_CALL' on line {tkn.line}") + error("Exiting flow control in 'ESCAPING_CALL'", tkn) res = 1 if tkn.text in DECREFS: - print(f"DECREF in 'ESCAPING_CALL' on line {tkn.line}") + error("DECREF in 'ESCAPING_CALL'", tkn) res = 1 return res @@ -141,7 +144,7 @@ def is_macro_name(name: str) -> bool: def is_getter(name: str) -> bool: return "GET" in name -def check_for_unmarked_escapes(uop: Uop) -> None: +def check_for_unmarked_escapes(uop: Uop) -> int: res = 0 tkns = iter(uop.body) for tkn in tkns: @@ -165,14 +168,14 @@ def check_for_unmarked_escapes(uop: Uop) -> None: if "backoff_counter" in tkn.text: continue if tkn.text not in NON_ESCAPING_FUNCTIONS: - print(f"Unmarked escaping function '{tkn.text}' at {tkn.filename}:{tkn.line}") + error(f"Unmarked escaping function '{tkn.text}'", tkn) res = 1 return res -def verify_uop(uop: Uop) -> None: +def verify_uop(uop: Uop) -> int: return check_for_unmarked_escapes(uop) -def verify(analysis: Analysis) -> None: +def verify(analysis: Analysis) -> int: res = 0 for uop in analysis.uops.values(): res |= verify_uop(uop)