From 0c8947b5563f18c67d37911ac6699a3de8b32081 Mon Sep 17 00:00:00 2001 From: elliot Date: Sat, 18 Jan 2025 15:50:36 -0600 Subject: [PATCH 01/16] init cross version git ignore --- test_crossversion/.gitignore | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 test_crossversion/.gitignore diff --git a/test_crossversion/.gitignore b/test_crossversion/.gitignore new file mode 100644 index 00000000..c4a44ac8 --- /dev/null +++ b/test_crossversion/.gitignore @@ -0,0 +1,3 @@ +templates/ +**/__pycache__/ +*.pyc From e0476fc95a9ca567fbcd7ccdef7c50bd7cc39819 Mon Sep 17 00:00:00 2001 From: elliot Date: Sat, 18 Jan 2025 15:50:51 -0600 Subject: [PATCH 02/16] init automated cross version testing --- test_crossversion/Makefile | 43 ++++++ test_crossversion/USAGE.md | 16 +++ test_crossversion/config/__init__.py | 22 ++++ test_crossversion/prepare_templates.py | 47 +++++++ test_crossversion/serialize_bytecode.py | 165 ++++++++++++++++++++++++ test_crossversion/test_xdis.py | 62 +++++++++ test_crossversion/tox.ini | 13 ++ test_crossversion/tox_prepare.ini | 13 ++ 8 files changed, 381 insertions(+) create mode 100644 test_crossversion/Makefile create mode 100644 test_crossversion/USAGE.md create mode 100644 test_crossversion/config/__init__.py create mode 100644 test_crossversion/prepare_templates.py create mode 100644 test_crossversion/serialize_bytecode.py create mode 100644 test_crossversion/test_xdis.py create mode 100644 test_crossversion/tox.ini create mode 100644 test_crossversion/tox_prepare.ini diff --git a/test_crossversion/Makefile b/test_crossversion/Makefile new file mode 100644 index 00000000..abc5d6ac --- /dev/null +++ b/test_crossversion/Makefile @@ -0,0 +1,43 @@ +.PHONY: clean compile prepare test get_sources + +SOURCE=./templates/source/ +COMPILED=./templates/compiled/ +SERIALIZED=./templates/serialized/ + +# usage +define helptext +Crossversion xdis test usage: + help | usage : show this menu + clean : remove compiled and serialized files + compile : with each tox env, compile all sources in $(SOURCE) to $(COMPILED), then serialize with dis to $(SERIALIZED) + prepare : clean then compile + get_sources : symlink all .py files in ./ -> $(SOURCE) + test : prepare and run tests. with each tox env, serialize pyc's in $(COMPILED) with xdis, then check against corresponding serialized pyc in $(SERIALIZED) +endef +export helptext + +help: + @echo "$$helptext" + +usage: help + +# clean compiled files +clean: + find . -name "*.pyc" -delete + find . -name "__pycache__" -type d -delete + rm -rf $(COMPILED)/* + rm -rf $(SERIALIZED)/* + +# compile sources in templates/source +compile: + tox -c ./tox_prepare.ini + +prepare: clean compile + +# sim link python source files to ./templates/source +get_sources: + cp -f *.py $(SOURCE) + +# main test +test: get_sources prepare + tox diff --git a/test_crossversion/USAGE.md b/test_crossversion/USAGE.md new file mode 100644 index 00000000..9de7121b --- /dev/null +++ b/test_crossversion/USAGE.md @@ -0,0 +1,16 @@ +# Automated crossversion testing +This testing suite is used for automatic testing of differences found between xdis and dis. +This is done by having a way to identically "serialize" important attributes in xdis and dis bytecodes. +We then can check a diff between a serialized xdis and dis bytecode to find if xdis is parsing something incorrectly. +Most tests should be ran using the makefile. + +# System Requirements +- `pyenv` and `pyenv-virtualenv` + - Each version needing to be tested should be installed with pyenv. +- `tox` + +# Usage +## Makefile +Run `make` or `make help` to show the help menu for running and preparing tests. + +To simply run tests, `make test` will copy some sources, prepare template files, and run tests. diff --git a/test_crossversion/config/__init__.py b/test_crossversion/config/__init__.py new file mode 100644 index 00000000..43d0cd91 --- /dev/null +++ b/test_crossversion/config/__init__.py @@ -0,0 +1,22 @@ +from configparser import ConfigParser +from pathlib import Path +from sys import version_info + +# main test root dir +_test_path = Path(__file__).parent.parent + +# system version of python +SYS_VERSION = f"{version_info.major}.{version_info.minor}" + +# template dirs +TEMPLATE_DIR = _test_path / "templates" +TEMPLATE_SOURCE_DIR = TEMPLATE_DIR / "source" +TEMPLATE_COMPILED_DIR = TEMPLATE_DIR / "compiled" +TEMPLATE_SERIALIZED_DIR = TEMPLATE_DIR / "serialized" + +# check dirs and make them if needed +_check_dir = lambda dir: dir.mkdir() if not dir.exists() else True +_check_dir(TEMPLATE_DIR) +_check_dir(TEMPLATE_SOURCE_DIR) +_check_dir(TEMPLATE_COMPILED_DIR) +_check_dir(TEMPLATE_SERIALIZED_DIR) diff --git a/test_crossversion/prepare_templates.py b/test_crossversion/prepare_templates.py new file mode 100644 index 00000000..96d1dc69 --- /dev/null +++ b/test_crossversion/prepare_templates.py @@ -0,0 +1,47 @@ +from py_compile import compile + +from config import ( + SYS_VERSION, + TEMPLATE_COMPILED_DIR, + TEMPLATE_SERIALIZED_DIR, + TEMPLATE_SOURCE_DIR, +) +from serialize_bytecode import serialize_pyc + + +def prepare_templates(): + """ + Compile files in template source dir, then serialize with dis + Intermediary steps are saved in respective folders in templates / / + """ + # create folders to save pyc's + compiled_dir = TEMPLATE_COMPILED_DIR / SYS_VERSION + serialized_dir = TEMPLATE_SERIALIZED_DIR / SYS_VERSION + if not compiled_dir.exists(): + compiled_dir.mkdir() + if not serialized_dir.exists(): + serialized_dir.mkdir() + + # compile and serialize template files + num_source = 0 + for source in TEMPLATE_SOURCE_DIR.glob("*.py"): + + # create paths + pyc_file = compiled_dir / f"{source.stem}_{SYS_VERSION}.pyc" + serialized_file = serialized_dir / f"{source.stem}_{SYS_VERSION}.txt" + + # compile pyc + compile(str(source), str(pyc_file)) + print(f"Compiled {str(source)} -> {str(pyc_file)}") + + # serialize pyc + with serialized_file.open("w") as f: + serialize_pyc(pyc_file, False, f) + print(f"Serialized {str(pyc_file)} -> {str(serialized_file)}") + num_source += 1 + + print(f"{num_source} files compiled and serialized") + + +if __name__ == "__main__": + prepare_templates() diff --git a/test_crossversion/serialize_bytecode.py b/test_crossversion/serialize_bytecode.py new file mode 100644 index 00000000..d4bc83a7 --- /dev/null +++ b/test_crossversion/serialize_bytecode.py @@ -0,0 +1,165 @@ +from __future__ import annotations + +import argparse +import sys +from pathlib import Path +from typing import Callable, TextIO + +import xdis +from xdis import disassemble_file, iscode + +# Util to format shorthand code obj name +# Used so we do not compare memory addrs +_fmt_codeobj = lambda co: f"" + + +def _iter_nested_bytecodes(bytecode, bytecode_constructor: Callable): + """ + iterate over a bytecode and its child bytecodes + bytecode: bytecode object to iterate, will be yielded on first call + bytecode_constructor: constructor to create child bytecodes with + """ + bc_stack = [bytecode] + while bc_stack: + bc = bc_stack.pop() + bc_stack.extend( + bytecode_constructor(obj) for obj in bc.codeobj.co_consts if iscode(obj) + ) + yield bc + + +def _format_headers(bytecode) -> str: + """Format important headers (attrs) of bytecode.""" + + # TODO add an automated way to filter attrs not used in dis that may be present in xdis + # simple solution may just be a header in a serialized pyc stating what is being saved + + # headers of the codeobj to serialize + headers_to_serialize = [ + "co_argcount", + "co_cellvars", + "co_code", + "co_consts", + "co_firstlineno", + "co_flags", + "co_freevars", + "co_kwonlyargcount", + "co_linetable", + # "co_lnotab", # not in dis >3.11, see todo above + "co_name", + "co_names", + "co_nlocals", + "co_posonlyargcount", + "co_stacksize", + "co_varnames", + ] + # default format for each attr + header_fmt = "{name} : {val}" + + # format headers + formatted_headers = [] + for attr in headers_to_serialize: + if not hasattr(bytecode.codeobj, attr): + print(f"Warning: Codeobj missing test_attr {attr}") + continue + val = getattr(bytecode.codeobj, attr) + # filter code objects in co_consts + if attr == "co_consts": + val = [ + f" str: + """Format all instructions in given bytecode.""" + # TODO revisit ignoring argrepr and argvals in tests + # we are ignoring argrepr and val for now, as xdis will sometimes include additional info there + + # default format for each instruction + inst_fmt = "{inst.opcode} {inst.opname} : {inst.arg} {argval}" + insts = [] + for inst in bytecode: + # skip cache + if inst.opname == "CACHE": + continue + # filter and format argvals + if iscode(inst.argval): + argval = _fmt_codeobj(inst.argval) + insts.append(inst_fmt.format(inst=inst, argval=argval)) + else: + insts.append(inst_fmt.format(inst=inst, argval=inst.argval)) + + return "\n".join(insts) + + +def format_bytecode(bytecode) -> str: + """Create complete formatted string of bytecode.""" + outstr = f"BYTECODE {bytecode.codeobj.co_name}\n" + outstr += "ATTRS:\n" + outstr += _format_headers(bytecode) + "\n" + outstr += "INSTS:\n" + outstr += _format_insts(bytecode) + "\n" + return outstr + + +def serialize_pyc( + pyc: Path, use_xdis: bool = False, output_file: TextIO | None = sys.stdout +) -> str: + """Serialize a pyc to text for testing, using dis or xdis.""" + + # create a code object in xdis or dis, and a constructor to make bytecodes with + if use_xdis: + # write to null so no disassembly output + from os import devnull + + with open(devnull, "w") as fnull: + # create xdis code obj + (_, code_object, version_tuple, _, _, is_pypy, _, _) = disassemble_file( + str(pyc), fnull + ) + # get corresponding opcode class + opc = xdis.get_opcode(version_tuple, is_pypy, None) + # create xdis bytecode constructor + bytecode_constructor = lambda codeobj: xdis.Bytecode(codeobj, opc) + else: + import dis + import marshal + + # load code obj + code_object = marshal.loads(pyc.read_bytes()[16:]) + # create dis bytecode constructor + bytecode_constructor = lambda codeobj: dis.Bytecode(codeobj) + + # iter bytecodes + formatted_bytecodes = [] + init_bytecode = bytecode_constructor(code_object) + for bc in _iter_nested_bytecodes(init_bytecode, bytecode_constructor): + formatted_bytecodes.append(format_bytecode(bc)) + + # write formatted bytecodes + full_formatted_bytecode = "\n".join(formatted_bytecodes) + if output_file: + output_file.write(full_formatted_bytecode) + + return full_formatted_bytecode + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(prog="serialize_bytecode") + parser.add_argument( + "-x", + "--use_xdis", + help="Use xdis to serialize bytecode", + action="store_true", + ) + parser.add_argument("pyc", help="PYC file to serialize.") + args = parser.parse_args() + + pyc_path = Path(args.pyc) + assert pyc_path.exists(), "PYC does not exist" + + serialize_pyc(pyc_path, args.use_xdis) diff --git a/test_crossversion/test_xdis.py b/test_crossversion/test_xdis.py new file mode 100644 index 00000000..ce0a0e49 --- /dev/null +++ b/test_crossversion/test_xdis.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Iterable + +from config import SYS_VERSION, TEMPLATE_COMPILED_DIR, TEMPLATE_SERIALIZED_DIR +from serialize_bytecode import serialize_pyc + + +class SerializedTestCase: + """Test case for comparing a disassembled xdis and dis pyc, Needs a pyc to + disassemble with xdis then serialize, and a dis serialized pyc txt file.""" + + pyc_path: Path + serialized_txt_path: Path + serialized_dis: str + serialized_xdis: str + message: str + + def __init__(self, pyc: Path, serialized_txt: Path): + self.pyc_path = pyc + self.serialized_txt_path = serialized_txt + self.serialized_dis = serialized_txt.read_text() + self.serialized_xdis = serialize_pyc(pyc, use_xdis=True, output_file=None) + self.message = ( + f"Checking equivalence: {self.pyc_path} <---> {self.serialized_txt_path}" + ) + + +def get_tests_by_version(v: str) -> Iterable[SerializedTestCase]: + """Iterate test cases from Template folder with given version v.""" + compiled_tests_dir = Path(TEMPLATE_COMPILED_DIR / v) + serialized_tests_dir = Path(TEMPLATE_SERIALIZED_DIR / v) + assert compiled_tests_dir.exists() + assert serialized_tests_dir.exists() + + for compiled_test in compiled_tests_dir.glob("*"): + test_stem = compiled_test.stem + serialized_test = Path(serialized_tests_dir / (test_stem + ".txt")) + + # check test case pair + assert serialized_test.exists() and compiled_test.exists() + yield SerializedTestCase(compiled_test, serialized_test) + + +def get_versions() -> Iterable[str]: + """Get test versions by iterating through dirs in template compiled dir.""" + for dir in TEMPLATE_COMPILED_DIR.glob("*"): + if dir.is_dir(): + yield dir.name + + +def test_all_versions(): + """Test each version in compiled template folder.""" + for v in get_versions(): + print(f"=== {SYS_VERSION}: Testing version {v} ===") + for case in get_tests_by_version(v): + assert case.serialized_dis.splitlines() == case.serialized_xdis.splitlines() + + +if __name__ == "__main__": + test_all_versions() diff --git a/test_crossversion/tox.ini b/test_crossversion/tox.ini new file mode 100644 index 00000000..a5bcaf67 --- /dev/null +++ b/test_crossversion/tox.ini @@ -0,0 +1,13 @@ +[tox] +min_version = 4.0 +requires = tox-pyenv-redux +# ENV LIST MUST BE COMMA SEPARATED LIST OF PYTHON VERSIONS +env_list = 3.9, 3.10, 3.11, 3.12, 3.13 + +[testenv] +description = Check all permutations of python dis code objects with xdis code objects. +deps = + -e=file:///{toxinidir}/../. + pytest +commands = + pytest {tty:--color=yes} -s {posargs} ./test_xdis.py diff --git a/test_crossversion/tox_prepare.ini b/test_crossversion/tox_prepare.ini new file mode 100644 index 00000000..f403eafa --- /dev/null +++ b/test_crossversion/tox_prepare.ini @@ -0,0 +1,13 @@ +[tox] +min_version = 4.0 +requires = tox-pyenv-redux +# ENV LIST MUST BE COMMA SEPARATED LIST OF PYTHON VERSIONS +env_list = 3.9, 3.10, 3.11, 3.12, 3.13 + +[testenv] +description = Compile and serialize source templates with dis +deps = + -e=file:///{toxinidir}/../. + pytest # not needed but speeds up env creation +commands = + python ./prepare_templates.py From 445dd0153f1af518fc21ece3e51dff109faed316 Mon Sep 17 00:00:00 2001 From: elliot Date: Sat, 18 Jan 2025 16:27:46 -0600 Subject: [PATCH 03/16] parameterize pytest --- test_crossversion/test_xdis.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test_crossversion/test_xdis.py b/test_crossversion/test_xdis.py index ce0a0e49..4acd3b2a 100644 --- a/test_crossversion/test_xdis.py +++ b/test_crossversion/test_xdis.py @@ -3,6 +3,8 @@ from pathlib import Path from typing import Iterable +import pytest + from config import SYS_VERSION, TEMPLATE_COMPILED_DIR, TEMPLATE_SERIALIZED_DIR from serialize_bytecode import serialize_pyc @@ -50,13 +52,11 @@ def get_versions() -> Iterable[str]: yield dir.name -def test_all_versions(): - """Test each version in compiled template folder.""" - for v in get_versions(): - print(f"=== {SYS_VERSION}: Testing version {v} ===") - for case in get_tests_by_version(v): - assert case.serialized_dis.splitlines() == case.serialized_xdis.splitlines() +pytest_versions = list(get_versions()) -if __name__ == "__main__": - test_all_versions() +@pytest.mark.parametrize("version", pytest_versions) +def test_version(version): + """Test each version in compiled template folder.""" + for case in get_tests_by_version(version): + assert case.serialized_dis.splitlines() == case.serialized_xdis.splitlines() From 2b0f10a43259d728739210ed951314ccec56695b Mon Sep 17 00:00:00 2001 From: elliot Date: Sat, 18 Jan 2025 17:40:03 -0600 Subject: [PATCH 04/16] improve pytest parameter and testcase repr --- test_crossversion/test_xdis.py | 39 ++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/test_crossversion/test_xdis.py b/test_crossversion/test_xdis.py index 4acd3b2a..e7994184 100644 --- a/test_crossversion/test_xdis.py +++ b/test_crossversion/test_xdis.py @@ -1,10 +1,11 @@ from __future__ import annotations +from itertools import chain + from pathlib import Path from typing import Iterable import pytest - from config import SYS_VERSION, TEMPLATE_COMPILED_DIR, TEMPLATE_SERIALIZED_DIR from serialize_bytecode import serialize_pyc @@ -28,6 +29,19 @@ def __init__(self, pyc: Path, serialized_txt: Path): f"Checking equivalence: {self.pyc_path} <---> {self.serialized_txt_path}" ) + def __str__(self) -> str: + return self.message + + def __repr__(self) -> str: + return self.__str__() + + +def get_versions() -> Iterable[str]: + """Get test versions by iterating through dirs in template compiled dir.""" + for dir in TEMPLATE_COMPILED_DIR.glob("*"): + if dir.is_dir(): + yield dir.name + def get_tests_by_version(v: str) -> Iterable[SerializedTestCase]: """Iterate test cases from Template folder with given version v.""" @@ -45,18 +59,15 @@ def get_tests_by_version(v: str) -> Iterable[SerializedTestCase]: yield SerializedTestCase(compiled_test, serialized_test) -def get_versions() -> Iterable[str]: - """Get test versions by iterating through dirs in template compiled dir.""" - for dir in TEMPLATE_COMPILED_DIR.glob("*"): - if dir.is_dir(): - yield dir.name - - -pytest_versions = list(get_versions()) +# @pytest.mark.parametrize("version", get_versions()) +# def test_version(version): +# """Test each version in compiled template folder.""" +# for case in get_tests_by_version(version): +# assert case.serialized_dis.splitlines() == case.serialized_xdis.splitlines() -@pytest.mark.parametrize("version", pytest_versions) -def test_version(version): - """Test each version in compiled template folder.""" - for case in get_tests_by_version(version): - assert case.serialized_dis.splitlines() == case.serialized_xdis.splitlines() +@pytest.mark.parametrize( + "case", chain.from_iterable(get_tests_by_version(v) for v in get_versions()) +) +def test_case(case: SerializedTestCase): + assert case.serialized_dis.splitlines() == case.serialized_xdis.splitlines() From b867e8f50af3513059a366279ae166cfaeae86a7 Mon Sep 17 00:00:00 2001 From: elliot Date: Tue, 21 Jan 2025 11:22:43 -0600 Subject: [PATCH 05/16] Update makefile and usage for `remake` --- test_crossversion/Makefile | 16 ++++++++-------- test_crossversion/USAGE.md | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/test_crossversion/Makefile b/test_crossversion/Makefile index abc5d6ac..e718531b 100644 --- a/test_crossversion/Makefile +++ b/test_crossversion/Makefile @@ -1,4 +1,4 @@ -.PHONY: clean compile prepare test get_sources +.PHONY: help clean compile prepare get_sources test SOURCE=./templates/source/ COMPILED=./templates/compiled/ @@ -7,7 +7,7 @@ SERIALIZED=./templates/serialized/ # usage define helptext Crossversion xdis test usage: - help | usage : show this menu + help : show this menu clean : remove compiled and serialized files compile : with each tox env, compile all sources in $(SOURCE) to $(COMPILED), then serialize with dis to $(SERIALIZED) prepare : clean then compile @@ -16,28 +16,28 @@ Crossversion xdis test usage: endef export helptext +#: show help menu help: @echo "$$helptext" -usage: help - -# clean compiled files +#: remove compiled and serialized files clean: find . -name "*.pyc" -delete find . -name "__pycache__" -type d -delete rm -rf $(COMPILED)/* rm -rf $(SERIALIZED)/* -# compile sources in templates/source +#: with each tox env, compile all sources in ./templates/source/ to ./templates/compiled/, then serialize with dis to ./templates/serialized/ compile: tox -c ./tox_prepare.ini +#: clean then compile prepare: clean compile -# sim link python source files to ./templates/source +#: copy all .py files in ./ -> ./templates/source/ get_sources: cp -f *.py $(SOURCE) -# main test +#: prepare and run tests. with each tox env, serialize pyc's in ./templates/compiled/ with xdis, then check against corresponding dis serialized pyc in ./templates/serialized/ test: get_sources prepare tox diff --git a/test_crossversion/USAGE.md b/test_crossversion/USAGE.md index 9de7121b..006808a5 100644 --- a/test_crossversion/USAGE.md +++ b/test_crossversion/USAGE.md @@ -11,6 +11,6 @@ Most tests should be ran using the makefile. # Usage ## Makefile -Run `make` or `make help` to show the help menu for running and preparing tests. +Run `make` or `make help` to show the help menu for running and preparing tests, or with `remake`, `remake --tasks`. To simply run tests, `make test` will copy some sources, prepare template files, and run tests. From 98611179a66978584f77c23fb3f8b868600f4fc0 Mon Sep 17 00:00:00 2001 From: elliot Date: Tue, 21 Jan 2025 11:24:29 -0600 Subject: [PATCH 06/16] test refactor and comments move test of existing files to test class --- test_crossversion/test_xdis.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test_crossversion/test_xdis.py b/test_crossversion/test_xdis.py index e7994184..5821bb38 100644 --- a/test_crossversion/test_xdis.py +++ b/test_crossversion/test_xdis.py @@ -6,7 +6,7 @@ from typing import Iterable import pytest -from config import SYS_VERSION, TEMPLATE_COMPILED_DIR, TEMPLATE_SERIALIZED_DIR +from config import TEMPLATE_COMPILED_DIR, TEMPLATE_SERIALIZED_DIR from serialize_bytecode import serialize_pyc @@ -21,10 +21,14 @@ class SerializedTestCase: message: str def __init__(self, pyc: Path, serialized_txt: Path): + # check test case pair exist + assert pyc.exists() and serialized_txt.exists() self.pyc_path = pyc self.serialized_txt_path = serialized_txt + # read serialized bytecode self.serialized_dis = serialized_txt.read_text() self.serialized_xdis = serialize_pyc(pyc, use_xdis=True, output_file=None) + # debug message self.message = ( f"Checking equivalence: {self.pyc_path} <---> {self.serialized_txt_path}" ) @@ -54,8 +58,6 @@ def get_tests_by_version(v: str) -> Iterable[SerializedTestCase]: test_stem = compiled_test.stem serialized_test = Path(serialized_tests_dir / (test_stem + ".txt")) - # check test case pair - assert serialized_test.exists() and compiled_test.exists() yield SerializedTestCase(compiled_test, serialized_test) From aa0acad799c1297a8e31bbf17eae2d5f6a3da73d Mon Sep 17 00:00:00 2001 From: elliot Date: Tue, 21 Jan 2025 18:25:12 -0600 Subject: [PATCH 07/16] Add version specific test headers and improve test verbosity --- test_crossversion/config/__init__.py | 1 + test_crossversion/serialize_bytecode.py | 77 ++++++++++++++++--------- test_crossversion/test_xdis.py | 6 +- 3 files changed, 53 insertions(+), 31 deletions(-) diff --git a/test_crossversion/config/__init__.py b/test_crossversion/config/__init__.py index 43d0cd91..37d9a2ba 100644 --- a/test_crossversion/config/__init__.py +++ b/test_crossversion/config/__init__.py @@ -7,6 +7,7 @@ # system version of python SYS_VERSION = f"{version_info.major}.{version_info.minor}" +SYS_VERSION_TUPLE = (version_info.major, version_info.minor, version_info.micro) # template dirs TEMPLATE_DIR = _test_path / "templates" diff --git a/test_crossversion/serialize_bytecode.py b/test_crossversion/serialize_bytecode.py index d4bc83a7..c46906d0 100644 --- a/test_crossversion/serialize_bytecode.py +++ b/test_crossversion/serialize_bytecode.py @@ -6,6 +6,9 @@ from typing import Callable, TextIO import xdis + +from config import SYS_VERSION_TUPLE + from xdis import disassemble_file, iscode # Util to format shorthand code obj name @@ -28,13 +31,7 @@ def _iter_nested_bytecodes(bytecode, bytecode_constructor: Callable): yield bc -def _format_headers(bytecode) -> str: - """Format important headers (attrs) of bytecode.""" - - # TODO add an automated way to filter attrs not used in dis that may be present in xdis - # simple solution may just be a header in a serialized pyc stating what is being saved - - # headers of the codeobj to serialize +def _get_headers_to_serialize(bytecode_version: tuple): headers_to_serialize = [ "co_argcount", "co_cellvars", @@ -44,8 +41,6 @@ def _format_headers(bytecode) -> str: "co_flags", "co_freevars", "co_kwonlyargcount", - "co_linetable", - # "co_lnotab", # not in dis >3.11, see todo above "co_name", "co_names", "co_nlocals", @@ -53,28 +48,50 @@ def _format_headers(bytecode) -> str: "co_stacksize", "co_varnames", ] + + if bytecode_version >= (3, 10): + headers_to_serialize.append("co_lines") + if bytecode_version >= (3, 11): + headers_to_serialize.append("co_qualname") + headers_to_serialize.append("co_positions") + return headers_to_serialize + + +def _format_headers(bytecode, bytecode_version: tuple) -> str: + """Format important headers (attrs) of bytecode.""" + # default format for each attr header_fmt = "{name} : {val}" # format headers formatted_headers = [] - for attr in headers_to_serialize: - if not hasattr(bytecode.codeobj, attr): - print(f"Warning: Codeobj missing test_attr {attr}") + for attr_name in _get_headers_to_serialize(bytecode_version): + # check for missing attrs + if not hasattr(bytecode.codeobj, attr_name): + print(f"Warning: Codeobj missing test_attr {attr_name}") continue - val = getattr(bytecode.codeobj, attr) - # filter code objects in co_consts - if attr == "co_consts": + + attr_val = getattr(bytecode.codeobj, attr_name) + + # handle const attrs and some callables + if attr_name == "co_consts": + # filter code objects in co_consts val = [ - f" str: +def _format_insts(bytecode, bytecode_version: tuple) -> str: """Format all instructions in given bytecode.""" # TODO revisit ignoring argrepr and argvals in tests # we are ignoring argrepr and val for now, as xdis will sometimes include additional info there @@ -86,23 +103,25 @@ def _format_insts(bytecode) -> str: # skip cache if inst.opname == "CACHE": continue + # filter and format argvals if iscode(inst.argval): argval = _fmt_codeobj(inst.argval) - insts.append(inst_fmt.format(inst=inst, argval=argval)) else: - insts.append(inst_fmt.format(inst=inst, argval=inst.argval)) + argval = inst.argval + + insts.append(inst_fmt.format(inst=inst, argval=argval)) return "\n".join(insts) -def format_bytecode(bytecode) -> str: +def format_bytecode(bytecode, bytecode_version: tuple) -> str: """Create complete formatted string of bytecode.""" outstr = f"BYTECODE {bytecode.codeobj.co_name}\n" outstr += "ATTRS:\n" - outstr += _format_headers(bytecode) + "\n" + outstr += _format_headers(bytecode, bytecode_version) + "\n" outstr += "INSTS:\n" - outstr += _format_insts(bytecode) + "\n" + outstr += _format_insts(bytecode, bytecode_version) + "\n" return outstr @@ -113,19 +132,22 @@ def serialize_pyc( # create a code object in xdis or dis, and a constructor to make bytecodes with if use_xdis: - # write to null so no disassembly output + # using xdis from os import devnull + # write to null so no disassembly output with open(devnull, "w") as fnull: # create xdis code obj (_, code_object, version_tuple, _, _, is_pypy, _, _) = disassemble_file( - str(pyc), fnull + str(pyc), fnull, asm_format="classic" ) # get corresponding opcode class opc = xdis.get_opcode(version_tuple, is_pypy, None) # create xdis bytecode constructor bytecode_constructor = lambda codeobj: xdis.Bytecode(codeobj, opc) + bytecode_version = version_tuple else: + # using dis import dis import marshal @@ -133,12 +155,13 @@ def serialize_pyc( code_object = marshal.loads(pyc.read_bytes()[16:]) # create dis bytecode constructor bytecode_constructor = lambda codeobj: dis.Bytecode(codeobj) + bytecode_version = SYS_VERSION_TUPLE - # iter bytecodes + # iter bytecodes and create list of formatted bytecodes strings formatted_bytecodes = [] init_bytecode = bytecode_constructor(code_object) for bc in _iter_nested_bytecodes(init_bytecode, bytecode_constructor): - formatted_bytecodes.append(format_bytecode(bc)) + formatted_bytecodes.append(format_bytecode(bc, bytecode_version)) # write formatted bytecodes full_formatted_bytecode = "\n".join(formatted_bytecodes) diff --git a/test_crossversion/test_xdis.py b/test_crossversion/test_xdis.py index 5821bb38..2d147722 100644 --- a/test_crossversion/test_xdis.py +++ b/test_crossversion/test_xdis.py @@ -6,7 +6,7 @@ from typing import Iterable import pytest -from config import TEMPLATE_COMPILED_DIR, TEMPLATE_SERIALIZED_DIR +from config import SYS_VERSION, TEMPLATE_COMPILED_DIR, TEMPLATE_SERIALIZED_DIR from serialize_bytecode import serialize_pyc @@ -29,9 +29,7 @@ def __init__(self, pyc: Path, serialized_txt: Path): self.serialized_dis = serialized_txt.read_text() self.serialized_xdis = serialize_pyc(pyc, use_xdis=True, output_file=None) # debug message - self.message = ( - f"Checking equivalence: {self.pyc_path} <---> {self.serialized_txt_path}" - ) + self.message = f"{SYS_VERSION}: Checking equivalence: {self.pyc_path} <---> {self.serialized_txt_path}" def __str__(self) -> str: return self.message From d20e3b0af4b88b01ac762a9895e1afab89e0e4c5 Mon Sep 17 00:00:00 2001 From: rocky Date: Thu, 23 Jan 2025 09:45:50 -0500 Subject: [PATCH 08/16] Add tox to "dev" dependencies --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 10fbecba..f74b78d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ Downloads = "https://github.com/rocky/python-xdis/releases" dev = [ "pre-commit", "pytest", + "tox", ] [project.scripts] From 83a624569974d4302161b46af853839c3bd24b98 Mon Sep 17 00:00:00 2001 From: elliot Date: Thu, 23 Jan 2025 18:51:53 -0600 Subject: [PATCH 09/16] add .python-version to git ignore --- test_crossversion/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/test_crossversion/.gitignore b/test_crossversion/.gitignore index c4a44ac8..ca5e974f 100644 --- a/test_crossversion/.gitignore +++ b/test_crossversion/.gitignore @@ -1,3 +1,4 @@ templates/ **/__pycache__/ *.pyc +.python-version From b03ee2261233f9a9096139fe23acd7089d1f84c0 Mon Sep 17 00:00:00 2001 From: elliot Date: Thu, 23 Jan 2025 18:53:17 -0600 Subject: [PATCH 10/16] update makefile and remove dependency tox-pyenv-redux --- test_crossversion/Makefile | 26 ++++++++++++++++---------- test_crossversion/tox.ini | 1 - test_crossversion/tox_prepare.ini | 1 - 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/test_crossversion/Makefile b/test_crossversion/Makefile index e718531b..9ade38fc 100644 --- a/test_crossversion/Makefile +++ b/test_crossversion/Makefile @@ -1,4 +1,4 @@ -.PHONY: help clean compile prepare get_sources test +.PHONY: help clean get_sources setup_pyenv compile prepare test SOURCE=./templates/source/ COMPILED=./templates/compiled/ @@ -9,9 +9,10 @@ define helptext Crossversion xdis test usage: help : show this menu clean : remove compiled and serialized files - compile : with each tox env, compile all sources in $(SOURCE) to $(COMPILED), then serialize with dis to $(SERIALIZED) - prepare : clean then compile get_sources : symlink all .py files in ./ -> $(SOURCE) + setup_pyenv : setup local pyenv versions to be used by tox + compile : with each tox env, compile all sources in $(SOURCE) to $(COMPILED), then serialize with dis to $(SERIALIZED) + prepare : fully prepare test environment and compile test files test : prepare and run tests. with each tox env, serialize pyc's in $(COMPILED) with xdis, then check against corresponding serialized pyc in $(SERIALIZED) endef export helptext @@ -27,17 +28,22 @@ clean: rm -rf $(COMPILED)/* rm -rf $(SERIALIZED)/* +#: copy all .py files in ./ -> ./templates/source/ +get_sources: + cp -f *.py $(SOURCE) + +.python-version: + tox --listenvs | xargs pyenv local +#: setup local pyenv versions to be used by tox +setup_pyenv: .python-version + #: with each tox env, compile all sources in ./templates/source/ to ./templates/compiled/, then serialize with dis to ./templates/serialized/ compile: tox -c ./tox_prepare.ini -#: clean then compile -prepare: clean compile - -#: copy all .py files in ./ -> ./templates/source/ -get_sources: - cp -f *.py $(SOURCE) +#: fully prepare tests +prepare: clean get_sources setup_pyenv compile #: prepare and run tests. with each tox env, serialize pyc's in ./templates/compiled/ with xdis, then check against corresponding dis serialized pyc in ./templates/serialized/ -test: get_sources prepare +test: prepare tox diff --git a/test_crossversion/tox.ini b/test_crossversion/tox.ini index a5bcaf67..91b03e6f 100644 --- a/test_crossversion/tox.ini +++ b/test_crossversion/tox.ini @@ -1,6 +1,5 @@ [tox] min_version = 4.0 -requires = tox-pyenv-redux # ENV LIST MUST BE COMMA SEPARATED LIST OF PYTHON VERSIONS env_list = 3.9, 3.10, 3.11, 3.12, 3.13 diff --git a/test_crossversion/tox_prepare.ini b/test_crossversion/tox_prepare.ini index f403eafa..bebba9f2 100644 --- a/test_crossversion/tox_prepare.ini +++ b/test_crossversion/tox_prepare.ini @@ -1,6 +1,5 @@ [tox] min_version = 4.0 -requires = tox-pyenv-redux # ENV LIST MUST BE COMMA SEPARATED LIST OF PYTHON VERSIONS env_list = 3.9, 3.10, 3.11, 3.12, 3.13 From 9c257e569b6c7ce1af5a46f937fce834719321ad Mon Sep 17 00:00:00 2001 From: elliot Date: Thu, 30 Jan 2025 18:06:33 -0600 Subject: [PATCH 11/16] update logging --- test_crossversion/prepare_templates.py | 20 +++++++++++++++++--- test_crossversion/serialize_bytecode.py | 10 +++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/test_crossversion/prepare_templates.py b/test_crossversion/prepare_templates.py index 96d1dc69..66fff3ae 100644 --- a/test_crossversion/prepare_templates.py +++ b/test_crossversion/prepare_templates.py @@ -1,3 +1,5 @@ +import argparse +import logging from py_compile import compile from config import ( @@ -25,23 +27,35 @@ def prepare_templates(): # compile and serialize template files num_source = 0 for source in TEMPLATE_SOURCE_DIR.glob("*.py"): - # create paths pyc_file = compiled_dir / f"{source.stem}_{SYS_VERSION}.pyc" serialized_file = serialized_dir / f"{source.stem}_{SYS_VERSION}.txt" # compile pyc compile(str(source), str(pyc_file)) - print(f"Compiled {str(source)} -> {str(pyc_file)}") + logging.info(f"Compiled {str(source)} -> {str(pyc_file)}") # serialize pyc with serialized_file.open("w") as f: serialize_pyc(pyc_file, False, f) - print(f"Serialized {str(pyc_file)} -> {str(serialized_file)}") + logging.info(f"Serialized {str(pyc_file)} -> {str(serialized_file)}") num_source += 1 print(f"{num_source} files compiled and serialized") if __name__ == "__main__": + parser = argparse.ArgumentParser(prog="prepare_templates") + parser.add_argument( + "-V", "--verbose", action="store_true", help="Use verbose output" + ) + args = parser.parse_args() + + # setup logger + logging.basicConfig( + format="%(levelname)s: %(message)s", + level=logging.DEBUG if args.verbose else None, + ) + + # compile and serialize templates prepare_templates() diff --git a/test_crossversion/serialize_bytecode.py b/test_crossversion/serialize_bytecode.py index c46906d0..6dfa1fcf 100644 --- a/test_crossversion/serialize_bytecode.py +++ b/test_crossversion/serialize_bytecode.py @@ -1,14 +1,14 @@ from __future__ import annotations import argparse +import logging import sys from pathlib import Path from typing import Callable, TextIO -import xdis - from config import SYS_VERSION_TUPLE +import xdis from xdis import disassemble_file, iscode # Util to format shorthand code obj name @@ -68,7 +68,7 @@ def _format_headers(bytecode, bytecode_version: tuple) -> str: for attr_name in _get_headers_to_serialize(bytecode_version): # check for missing attrs if not hasattr(bytecode.codeobj, attr_name): - print(f"Warning: Codeobj missing test_attr {attr_name}") + logging.warning(f"Codeobj missing test_attr {attr_name}") continue attr_val = getattr(bytecode.codeobj, attr_name) @@ -182,7 +182,11 @@ def serialize_pyc( parser.add_argument("pyc", help="PYC file to serialize.") args = parser.parse_args() + # verify pyc path pyc_path = Path(args.pyc) assert pyc_path.exists(), "PYC does not exist" + # setup logger + logging.basicConfig(format="%(levelname)s: %(message)s") + serialize_pyc(pyc_path, args.use_xdis) From 545a27b887458a07589a3ba307bc50e4a55740f7 Mon Sep 17 00:00:00 2001 From: elliot Date: Thu, 30 Jan 2025 18:09:06 -0600 Subject: [PATCH 12/16] improve test verbosity and run tox prepare in parallel --- test_crossversion/Makefile | 4 ++-- test_crossversion/tox.ini | 2 +- test_crossversion/tox_prepare.ini | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test_crossversion/Makefile b/test_crossversion/Makefile index 9ade38fc..3b8cda79 100644 --- a/test_crossversion/Makefile +++ b/test_crossversion/Makefile @@ -39,11 +39,11 @@ setup_pyenv: .python-version #: with each tox env, compile all sources in ./templates/source/ to ./templates/compiled/, then serialize with dis to ./templates/serialized/ compile: - tox -c ./tox_prepare.ini + tox p -c ./tox_prepare.ini #: fully prepare tests prepare: clean get_sources setup_pyenv compile #: prepare and run tests. with each tox env, serialize pyc's in ./templates/compiled/ with xdis, then check against corresponding dis serialized pyc in ./templates/serialized/ test: prepare - tox + tox r -c ./tox.ini diff --git a/test_crossversion/tox.ini b/test_crossversion/tox.ini index 91b03e6f..8f34b3d1 100644 --- a/test_crossversion/tox.ini +++ b/test_crossversion/tox.ini @@ -9,4 +9,4 @@ deps = -e=file:///{toxinidir}/../. pytest commands = - pytest {tty:--color=yes} -s {posargs} ./test_xdis.py + pytest {tty:--color=yes} {posargs} ./test_xdis.py diff --git a/test_crossversion/tox_prepare.ini b/test_crossversion/tox_prepare.ini index bebba9f2..3e4efb8a 100644 --- a/test_crossversion/tox_prepare.ini +++ b/test_crossversion/tox_prepare.ini @@ -9,4 +9,4 @@ deps = -e=file:///{toxinidir}/../. pytest # not needed but speeds up env creation commands = - python ./prepare_templates.py + python ./prepare_templates.py {posargs} From c1606c58a806e107b102e15b5e6b13fba93fdb10 Mon Sep 17 00:00:00 2001 From: elliot Date: Thu, 30 Jan 2025 18:09:27 -0600 Subject: [PATCH 13/16] remove co_positions from some tests as not fully supported in xdis --- test_crossversion/serialize_bytecode.py | 2 +- test_crossversion/test_xdis.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test_crossversion/serialize_bytecode.py b/test_crossversion/serialize_bytecode.py index 6dfa1fcf..62308846 100644 --- a/test_crossversion/serialize_bytecode.py +++ b/test_crossversion/serialize_bytecode.py @@ -53,7 +53,7 @@ def _get_headers_to_serialize(bytecode_version: tuple): headers_to_serialize.append("co_lines") if bytecode_version >= (3, 11): headers_to_serialize.append("co_qualname") - headers_to_serialize.append("co_positions") + # headers_to_serialize.append("co_positions"), not fully supported in xdis return headers_to_serialize diff --git a/test_crossversion/test_xdis.py b/test_crossversion/test_xdis.py index 2d147722..9e7d6cc3 100644 --- a/test_crossversion/test_xdis.py +++ b/test_crossversion/test_xdis.py @@ -1,14 +1,14 @@ from __future__ import annotations from itertools import chain - from pathlib import Path from typing import Iterable -import pytest from config import SYS_VERSION, TEMPLATE_COMPILED_DIR, TEMPLATE_SERIALIZED_DIR from serialize_bytecode import serialize_pyc +import pytest + class SerializedTestCase: """Test case for comparing a disassembled xdis and dis pyc, Needs a pyc to From 557ee34b8e7d6a050d5b9e2e803b163b14c5d861 Mon Sep 17 00:00:00 2001 From: elliot Date: Thu, 30 Jan 2025 18:32:54 -0600 Subject: [PATCH 14/16] fix co_lines parsing of co_linetable Co-authored-by: jdw170000 --- xdis/codetype/code310.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/xdis/codetype/code310.py b/xdis/codetype/code310.py index d79c4fd8..ed1edec7 100644 --- a/xdis/codetype/code310.py +++ b/xdis/codetype/code310.py @@ -163,24 +163,32 @@ def co_lines(self): either be a positive integer, or None Parsing implementation adapted from: https://github.com/python/cpython/blob/3.10/Objects/lnotab_notes.txt + The algorithm presented in the lnotab_notes.txt file is slightly inaccurate. The first linetable entry will have a line delta of 0, and should be yielded instead of skipped. + This implementation follows the lineiter definition in https://github.com/python/cpython/blob/3.10/Objects/codeobject.c#L1030. """ - line = self.co_firstlineno end_offset = 0 + line = self.co_firstlineno + # co_linetable is pairs of (offset_delta: unsigned byte, line_delta: signed byte) for offset_delta, line_delta in struct.iter_unpack('=Bb', self.co_linetable): assert isinstance(line_delta, int) assert isinstance(offset_delta, int) - if line_delta == 0: # No change to line number, just accumulate changes to end - end_offset += offset_delta - continue + start_offset = end_offset - end_offset = start_offset + offset_delta - if line_delta == -128: # No valid line number -- skip entry - continue - line += line_delta - if end_offset == start_offset: # Empty range, omit. + end_offset += offset_delta + + # line_delta of -128 signifies an instruction range that is not associated with any line + if line_delta != -128: + line += line_delta + display_line = line + else: + display_line = None + + # omit empty ranges + if start_offset == end_offset: continue - yield start_offset, end_offset, line + + yield start_offset, end_offset, display_line def encode_lineno_tab(self): """ From 165bf1dc286e5f4e12b423b60a65d5481c2afaa7 Mon Sep 17 00:00:00 2001 From: elliot Date: Mon, 3 Feb 2025 13:35:10 -0600 Subject: [PATCH 15/16] fix 3.10 line number comments and format code --- xdis/codetype/code310.py | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/xdis/codetype/code310.py b/xdis/codetype/code310.py index ed1edec7..70ddf052 100644 --- a/xdis/codetype/code310.py +++ b/xdis/codetype/code310.py @@ -14,11 +14,10 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import struct import types from copy import deepcopy -import struct - from xdis.codetype.code38 import Code38 from xdis.cross_types import UnicodeForPython3 from xdis.version_info import PYTHON_VERSION_TRIPLE, version_tuple_to_str @@ -124,17 +123,18 @@ def check(self): for field, fieldtype in self.fieldtypes.items(): val = getattr(self, field) if isinstance(fieldtype, tuple): - assert ( - type(val) in fieldtype - ), "%s should be one of the types %s; is type %s" % ( - field, - fieldtype, - type(val), + assert type(val) in fieldtype, ( + "%s should be one of the types %s; is type %s" + % ( + field, + fieldtype, + type(val), + ) ) else: - assert isinstance( - val, fieldtype - ), "%s should have type %s; is type %s" % (field, fieldtype, type(val)) + assert isinstance(val, fieldtype), ( + "%s should have type %s; is type %s" % (field, fieldtype, type(val)) + ) pass pass @@ -164,26 +164,28 @@ def co_lines(self): Parsing implementation adapted from: https://github.com/python/cpython/blob/3.10/Objects/lnotab_notes.txt The algorithm presented in the lnotab_notes.txt file is slightly inaccurate. The first linetable entry will have a line delta of 0, and should be yielded instead of skipped. - This implementation follows the lineiter definition in https://github.com/python/cpython/blob/3.10/Objects/codeobject.c#L1030. + This implementation follows the `lineiter_next` definition in https://github.com/python/cpython/blob/10a2a9b3bcf237fd6183f84941632cda59395319/Objects/codeobject.c#L1029C1-L1062C2, + and the `advance` function in https://github.com/python/cpython/blob/10a2a9b3bcf237fd6183f84941632cda59395319/Objects/codeobject.c#L1140-L1155. """ + end_offset = 0 line = self.co_firstlineno # co_linetable is pairs of (offset_delta: unsigned byte, line_delta: signed byte) - for offset_delta, line_delta in struct.iter_unpack('=Bb', self.co_linetable): + for offset_delta, line_delta in struct.iter_unpack("=Bb", self.co_linetable): assert isinstance(line_delta, int) assert isinstance(offset_delta, int) - + start_offset = end_offset end_offset += offset_delta - + # line_delta of -128 signifies an instruction range that is not associated with any line if line_delta != -128: line += line_delta display_line = line else: display_line = None - + # omit empty ranges if start_offset == end_offset: continue From f22aed37ed5c2c4a2ce980ea4da5d28b8cb7c45b Mon Sep 17 00:00:00 2001 From: elliot Date: Mon, 3 Feb 2025 17:02:02 -0600 Subject: [PATCH 16/16] add "headers" and "serialize_insts" options to bytecode serialization to define which parts of bytecode should be serialized --- test_crossversion/prepare_templates.py | 16 +---- test_crossversion/serialize_bytecode.py | 82 +++++++++++++++++-------- 2 files changed, 58 insertions(+), 40 deletions(-) diff --git a/test_crossversion/prepare_templates.py b/test_crossversion/prepare_templates.py index 66fff3ae..9d4ee36f 100644 --- a/test_crossversion/prepare_templates.py +++ b/test_crossversion/prepare_templates.py @@ -2,13 +2,8 @@ import logging from py_compile import compile -from config import ( - SYS_VERSION, - TEMPLATE_COMPILED_DIR, - TEMPLATE_SERIALIZED_DIR, - TEMPLATE_SOURCE_DIR, -) from serialize_bytecode import serialize_pyc +from config import SYS_VERSION, TEMPLATE_COMPILED_DIR, TEMPLATE_SERIALIZED_DIR, TEMPLATE_SOURCE_DIR def prepare_templates(): @@ -46,16 +41,11 @@ def prepare_templates(): if __name__ == "__main__": parser = argparse.ArgumentParser(prog="prepare_templates") - parser.add_argument( - "-V", "--verbose", action="store_true", help="Use verbose output" - ) + parser.add_argument("-V", "--verbose", action="store_true", help="Use verbose output") args = parser.parse_args() # setup logger - logging.basicConfig( - format="%(levelname)s: %(message)s", - level=logging.DEBUG if args.verbose else None, - ) + logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG if args.verbose else None) # compile and serialize templates prepare_templates() diff --git a/test_crossversion/serialize_bytecode.py b/test_crossversion/serialize_bytecode.py index 62308846..6f10f85f 100644 --- a/test_crossversion/serialize_bytecode.py +++ b/test_crossversion/serialize_bytecode.py @@ -11,23 +11,24 @@ import xdis from xdis import disassemble_file, iscode + # Util to format shorthand code obj name # Used so we do not compare memory addrs -_fmt_codeobj = lambda co: f"" +def _fmt_codeobj(co): + return f"" def _iter_nested_bytecodes(bytecode, bytecode_constructor: Callable): """ iterate over a bytecode and its child bytecodes - bytecode: bytecode object to iterate, will be yielded on first call - bytecode_constructor: constructor to create child bytecodes with + + :param bytecode: bytecode object to iterate, will be yielded on first call + :param bytecode_constructor: constructor to create child bytecodes with """ bc_stack = [bytecode] while bc_stack: bc = bc_stack.pop() - bc_stack.extend( - bytecode_constructor(obj) for obj in bc.codeobj.co_consts if iscode(obj) - ) + bc_stack.extend(bytecode_constructor(obj) for obj in bc.codeobj.co_consts if iscode(obj)) yield bc @@ -57,15 +58,22 @@ def _get_headers_to_serialize(bytecode_version: tuple): return headers_to_serialize -def _format_headers(bytecode, bytecode_version: tuple) -> str: - """Format important headers (attrs) of bytecode.""" +def _format_headers(bytecode, bytecode_version: tuple, headers_to_serialize: list[str] | None) -> str: + """ + Format important headers (attrs) of bytecode. + + :param bytecode: bytecode object + :param bytecode_version: bytecode version tuple to track version specific headers + :param headers: list bytecode headers that we want to specifically format, excluding the other headers. By default, tests all params. + """ # default format for each attr header_fmt = "{name} : {val}" # format headers formatted_headers = [] - for attr_name in _get_headers_to_serialize(bytecode_version): + headers = headers_to_serialize if headers_to_serialize is not None else _get_headers_to_serialize(bytecode_version) + for attr_name in headers: # check for missing attrs if not hasattr(bytecode.codeobj, attr_name): logging.warning(f"Codeobj missing test_attr {attr_name}") @@ -76,10 +84,7 @@ def _format_headers(bytecode, bytecode_version: tuple) -> str: # handle const attrs and some callables if attr_name == "co_consts": # filter code objects in co_consts - val = [ - f" str: return "\n".join(insts) -def format_bytecode(bytecode, bytecode_version: tuple) -> str: - """Create complete formatted string of bytecode.""" +def format_bytecode(bytecode, bytecode_version: tuple, headers_to_serialize: list[str] | None = None, serialize_insts: bool = True) -> str: + """ + Create complete formatted string of bytecode. + + :param bytecode: bytecode object + :param bytecode_version: tuple of bytecode version to track version specific formatting + :param headers: list of bytecode headers we want to format in output. If None or not defined, we format all params by default. + :param serialize_insts: bool to determine if we serialize instructions or ignore them and dont output. + """ + outstr = f"BYTECODE {bytecode.codeobj.co_name}\n" outstr += "ATTRS:\n" - outstr += _format_headers(bytecode, bytecode_version) + "\n" - outstr += "INSTS:\n" - outstr += _format_insts(bytecode, bytecode_version) + "\n" + outstr += _format_headers(bytecode, bytecode_version, headers_to_serialize) + "\n" + if serialize_insts: + outstr += "INSTS:\n" + outstr += _format_insts(bytecode, bytecode_version) + "\n" return outstr -def serialize_pyc( - pyc: Path, use_xdis: bool = False, output_file: TextIO | None = sys.stdout -) -> str: - """Serialize a pyc to text for testing, using dis or xdis.""" +def serialize_pyc(pyc: Path, use_xdis: bool = False, output_file: TextIO | None = sys.stdout, headers: list[str] | None = None, serialize_insts: bool = True) -> str: + """ + Serialize a pyc to text for testing, using dis or xdis. + + :param pyc: path of pyc file + :param use_xdis: boolean if we serialize with xdis, default use dis (meaning pyc must be same version as running python) + :param output_file: file to write output to + :param headers: list of bytecode headers we want to format in output. Default is None, where we format all params. + :param serialize_insts: bool to determine if we format instructions or ignore them and dont output save. + """ # create a code object in xdis or dis, and a constructor to make bytecodes with if use_xdis: @@ -138,9 +158,7 @@ def serialize_pyc( # write to null so no disassembly output with open(devnull, "w") as fnull: # create xdis code obj - (_, code_object, version_tuple, _, _, is_pypy, _, _) = disassemble_file( - str(pyc), fnull, asm_format="classic" - ) + (_, code_object, version_tuple, _, _, is_pypy, _, _) = disassemble_file(str(pyc), fnull, asm_format="classic") # get corresponding opcode class opc = xdis.get_opcode(version_tuple, is_pypy, None) # create xdis bytecode constructor @@ -161,7 +179,7 @@ def serialize_pyc( formatted_bytecodes = [] init_bytecode = bytecode_constructor(code_object) for bc in _iter_nested_bytecodes(init_bytecode, bytecode_constructor): - formatted_bytecodes.append(format_bytecode(bc, bytecode_version)) + formatted_bytecodes.append(format_bytecode(bc, bytecode_version, headers, serialize_insts)) # write formatted bytecodes full_formatted_bytecode = "\n".join(formatted_bytecodes) @@ -179,6 +197,16 @@ def serialize_pyc( help="Use xdis to serialize bytecode", action="store_true", ) + parser.add_argument( + "--headers", + help="List of specific code object params to test, defaults to all parameters. Should be 'co_*', for example, 'co_lines'", + nargs="*", + ) + parser.add_argument( + "--skip_insts", + help="Do not test accuracy of instructions", + action="store_false", + ) parser.add_argument("pyc", help="PYC file to serialize.") args = parser.parse_args() @@ -189,4 +217,4 @@ def serialize_pyc( # setup logger logging.basicConfig(format="%(levelname)s: %(message)s") - serialize_pyc(pyc_path, args.use_xdis) + serialize_pyc(pyc_path, args.use_xdis, headers=args.headers if args.headers else None, serialize_insts=args.skip_insts)