""" Generate Lib/_opcode_metadata.py for RustPython bytecode. This file generates opcode metadata that is compatible with CPython 3.13. """ import itertools import pathlib import re import typing ROOT = pathlib.Path(__file__).parents[1] BYTECODE_FILE = ( ROOT / "crates" / "compiler-core" / "src" / "bytecode" / "instruction.rs" ) OPCODE_METADATA_FILE = ROOT / "Lib" / "_opcode_metadata.py" # Opcodes that needs to be first, regardless of their opcode ID. PRIORITY_OPMAP = { "CACHE", "RESERVED", "RESUME", "INSTRUMENTED_LINE", "ENTER_EXECUTOR", } def to_pascal_case(s: str) -> str: res = re.sub(r"(?<=[a-z0-9])([A-Z])", r"_\1", s) return re.sub(r"(\D)(\d+)$", r"\1_\2", res).upper() class Opcode(typing.NamedTuple): rust_name: str id: int have_oparg: bool @property def cpython_name(self) -> str: return to_pascal_case(self.rust_name) @property def is_instrumented(self): return self.cpython_name.startswith("INSTRUMENTED_") @classmethod def from_str(cls, body: str): raw_variants = re.split(r"(\d+),", body.strip()) raw_variants.remove("") for raw_name, raw_id in itertools.batched(raw_variants, 2, strict=True): have_oparg = "Arg<" in raw_name # Hacky but works name = re.findall(r"\b[A-Z][A-Za-z]*\d*\b(?=\s*[\({=])", raw_name)[0] yield cls(rust_name=name.strip(), id=int(raw_id), have_oparg=have_oparg) def __lt__(self, other: typing.Self) -> bool: sprio, oprio = ( opcode.cpython_name not in PRIORITY_OPMAP for opcode in (self, other) ) return (sprio, self.id) < (oprio, other.id) def extract_enum_body(contents: str, enum_name: str) -> str: res = re.search(f"pub enum {enum_name} " + r"\{(.+?)\n\}", contents, re.DOTALL) if not res: raise ValueError(f"Could not find {enum_name} enum") return "\n".join( line.split("//")[0].strip() # Remove any comment. i.e. "foo // some comment" for line in res.group(1).splitlines() if not line.strip().startswith("//") # Ignore comment lines ) def build_deopts(contents: str) -> dict[str, list[str]]: raw_body = re.search( r"fn deopt\(self\) -> Option(.*)", contents, re.DOTALL ).group(1) body = "\n".join( itertools.takewhile( lambda l: not l.startswith("_ =>"), # Take until reaching fallback filter( lambda l: ( not l.startswith( ("//", "Some(match") ) # Skip comments or start of match ), map(str.strip, raw_body.splitlines()), ), ) ).removeprefix("{") depth = 0 arms = [] buf = [] for char in body: if char == "{": depth += 1 elif char == "}": depth -= 1 if depth == 0 and (char in ("}", ",")): arm = "".join(buf).strip() arms.append(arm) buf = [] else: buf.append(char) # last arm arms.append("".join(buf)) arms = [arm for arm in arms if arm] deopts = {} for arm in arms: *specialized, deopt = map(to_pascal_case, re.findall(r"Self::(\w*)\b", arm)) deopts[deopt] = specialized return deopts contents = BYTECODE_FILE.read_text(encoding="utf-8") deopts = build_deopts(contents) enum_body = "\n".join( extract_enum_body(contents, enum_name) for enum_name in ("Instruction", "PseudoInstruction") ) opcodes = list(Opcode.from_str(enum_body)) have_oparg = min(opcode.id for opcode in opcodes if opcode.have_oparg) - 1 min_instrumented = min(opcode.id for opcode in opcodes if opcode.is_instrumented) # Generate the output file output = """# This file is generated by scripts/generate_opcode_metadata.py # for RustPython bytecode format (CPython 3.14 compatible opcode numbers). # Do not edit! """ output += "\n_specializations = {\n" for key, lst in deopts.items(): output += f' "{key}": [\n' for item in lst: output += f' "{item}",\n' output += " ],\n" output += "}\n" specialized = set(itertools.chain.from_iterable(deopts.values())) output += "\n_specialized_opmap = {\n" for opcode in sorted(opcodes, key=lambda op: op.cpython_name): cpython_name = opcode.cpython_name if cpython_name not in specialized: continue output += f" '{cpython_name}': {opcode.id},\n" output += "}\n" output += "\nopmap = {\n" for opcode in sorted(opcodes): cpython_name = opcode.cpython_name if cpython_name in specialized: continue output += f" '{cpython_name}': {opcode.id},\n" output += "}\n" output += f""" HAVE_ARGUMENT = {have_oparg} MIN_INSTRUMENTED_OPCODE = {min_instrumented} """ OPCODE_METADATA_FILE.write_text(output, encoding="utf-8")