diff --git a/llvm/utils/extract_symbols.py b/llvm/utils/extract_symbols.py index 684e124c76259..388723421d660 100755 --- a/llvm/utils/extract_symbols.py +++ b/llvm/utils/extract_symbols.py @@ -53,12 +53,12 @@ def nm_get_symbols(tool, lib): # The -P flag displays the size field for symbols only when applicable, # so the last field is optional. There's no space after the value field, # but \s+ match newline also, so \s+\S* will match the optional size field. - match = re.match("^(\S+)\s+[BDGRSTuVW]\s+\S+\s+\S*$", line) + match = re.match(r"^(\S+)\s+[BDGRSTuVW]\s+\S+\s+\S*$", line) if match: yield (match.group(1), True) # Look for undefined symbols, which have type U and may or may not # (depending on which nm is being used) have value and size. - match = re.match("^(\S+)\s+U\s+(\S+\s+\S*)?$", line) + match = re.match(r"^(\S+)\s+U\s+(\S+\s+\S*)?$", line) if match: yield (match.group(1), False) process.wait() @@ -71,7 +71,7 @@ def readobj_is_32bit_windows(tool, lib): [tool, "--file-header", lib], universal_newlines=True ) for line in output.splitlines(): - match = re.match("Format: (\S+)", line) + match = re.match(r"Format: (\S+)", line) if match: return match.group(1) == "COFF-i386" return False @@ -85,7 +85,7 @@ def should_keep_microsoft_symbol(symbol, calling_convention_decoration): if not "?" in symbol: if calling_convention_decoration: # Remove calling convention decoration from names - match = re.match("[_@]([^@]+)", symbol) + match = re.match(r"[_@]([^@]+)", symbol) if match: symbol = match.group(1) # Discard floating point/SIMD constants. @@ -100,10 +100,10 @@ def should_keep_microsoft_symbol(symbol, calling_convention_decoration): # An anonymous namespace is mangled as ?A(maybe hex number)@. Any symbol # that mentions an anonymous namespace can be discarded, as the anonymous # namespace doesn't exist outside of that translation unit. - elif re.search("\?A(0x\w+)?@", symbol): + elif re.search(r"\?A(0x\w+)?@", symbol): return None # Skip X86GenMnemonicTables functions, they are not exposed from llvm/include/. - elif re.match("\?is[A-Z0-9]*@X86@llvm", symbol): + elif re.match(r"\?is[A-Z0-9]*@X86@llvm", symbol): return None # Keep mangled llvm:: and clang:: function symbols. How we detect these is a # bit of a mess and imprecise, but that avoids having to completely demangle @@ -123,7 +123,7 @@ def should_keep_microsoft_symbol(symbol, calling_convention_decoration): # ::= .+@ (list of types) # ::= .*Z (list of types, varargs) # ::= exceptions are not allowed - elif re.search("(llvm|clang)@@[A-Z][A-Z0-9_]*[A-JQ].+(X|.+@|.*Z)$", symbol): + elif re.search(r"(llvm|clang)@@[A-Z][A-Z0-9_]*[A-JQ].+(X|.+@|.*Z)$", symbol): return symbol return None @@ -140,7 +140,7 @@ def should_keep_itanium_symbol(symbol, calling_convention_decoration): if not symbol.startswith("_") and not symbol.startswith("."): return symbol # Discard manglings that aren't nested names - match = re.match("\.?_Z(T[VTIS])?(N.+)", symbol) + match = re.match(r"\.?_Z(T[VTIS])?(N.+)", symbol) if not match: return None # Demangle the name. If the name is too complex then we don't need to keep @@ -169,19 +169,19 @@ class TooComplexName(Exception): # (name, rest of string) pair. def parse_itanium_name(arg): # Check for a normal name - match = re.match("(\d+)(.+)", arg) + match = re.match(r"(\d+)(.+)", arg) if match: n = int(match.group(1)) name = match.group(1) + match.group(2)[:n] rest = match.group(2)[n:] return name, rest # Check for constructor/destructor names - match = re.match("([CD][123])(.+)", arg) + match = re.match(r"([CD][123])(.+)", arg) if match: return match.group(1), match.group(2) # Assume that a sequence of characters that doesn't end a nesting is an # operator (this is very imprecise, but appears to be good enough) - match = re.match("([^E]+)(.+)", arg) + match = re.match(r"([^E]+)(.+)", arg) if match: return match.group(1), match.group(2) # Anything else: we can't handle it @@ -196,13 +196,13 @@ def skip_itanium_template(arg): tmp = arg[1:] while tmp: # Check for names - match = re.match("(\d+)(.+)", tmp) + match = re.match(r"(\d+)(.+)", tmp) if match: n = int(match.group(1)) tmp = match.group(2)[n:] continue # Check for substitutions - match = re.match("S[A-Z0-9]*_(.+)", tmp) + match = re.match(r"S[A-Z0-9]*_(.+)", tmp) if match: tmp = match.group(1) # Start of a template @@ -231,14 +231,14 @@ def parse_itanium_nested_name(arg): ret = [] # Skip past the N, and possibly a substitution - match = re.match("NS[A-Z0-9]*_(.+)", arg) + match = re.match(r"NS[A-Z0-9]*_(.+)", arg) if match: tmp = match.group(1) else: tmp = arg[1:] # Skip past CV-qualifiers and ref qualifiers - match = re.match("[rVKRO]*(.+)", tmp) + match = re.match(r"[rVKRO]*(.+)", tmp) if match: tmp = match.group(1) @@ -280,19 +280,19 @@ def parse_microsoft_mangling(arg): if arg.startswith("@"): return components # Check for a simple name - match = re.match("(\w+)@(.+)", arg) + match = re.match(r"(\w+)@(.+)", arg) if match: components.append((match.group(1), False)) arg = match.group(2) continue # Check for a special function name - match = re.match("(\?_?\w)(.+)", arg) + match = re.match(r"(\?_?\w)(.+)", arg) if match: components.append((match.group(1), False)) arg = match.group(2) continue # Check for a template name - match = re.match("\?\$(\w+)@[^@]+@(.+)", arg) + match = re.match(r"\?\$(\w+)@[^@]+@(.+)", arg) if match: components.append((match.group(1), True)) arg = match.group(2) @@ -323,7 +323,7 @@ def get_template_name(sym, mangling): if mangling == "microsoft": names = parse_microsoft_mangling(sym) else: - match = re.match("\.?_Z(T[VTIS])?(N.+)", sym) + match = re.match(r"\.?_Z(T[VTIS])?(N.+)", sym) if match: names, _ = parse_itanium_nested_name(match.group(2)) else: