From 10fec20120c98815e47ebe0efefc32816b92c31c Mon Sep 17 00:00:00 2001 From: rocky Date: Tue, 17 Jun 2025 07:35:34 -0400 Subject: [PATCH 1/2] Start formatting stack manip opcodes... These are things like SWAP, COPY, DUP_TOP, ROT_XXX --- xdis/opcodes/format/basic.py | 12 ++++++-- xdis/opcodes/format/extended.py | 54 +++++++++++++++++++++++++-------- xdis/opcodes/opcode_311.py | 32 ++++++++++++++++++- 3 files changed, 82 insertions(+), 16 deletions(-) diff --git a/xdis/opcodes/format/basic.py b/xdis/opcodes/format/basic.py index 407a21aa..6efbf61f 100644 --- a/xdis/opcodes/format/basic.py +++ b/xdis/opcodes/format/basic.py @@ -1,4 +1,4 @@ -# (C) Copyright 2023 by Rocky Bernstein +# (C) Copyright 2023, 2025 by Rocky Bernstein # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -17,7 +17,6 @@ Routines for formatting opcodes. """ - def format_extended_arg(arg): return str(arg * (1 << 16)) @@ -47,7 +46,7 @@ def format_MAKE_FUNCTION_10_27(argc: int) -> str: # Up until 3.7 -def format_RAISE_VARARGS_older(argc): +def format_RAISE_VARARGS_older(argc) -> str: assert 0 <= argc <= 3 if argc == 0: return "reraise" @@ -57,6 +56,12 @@ def format_RAISE_VARARGS_older(argc): return "exception, parameter" elif argc == 3: return "exception, parameter, traceback" + return "" + +def format_ROT_TWO(_: int) -> str: + # We add a space at the end as a sentinal to use in get_instruction_tos_str() + return "TOS, TOS1 = TOS1, TOS" + opcode_arg_fmt_base = opcode_arg_fmt34 = { @@ -65,4 +70,5 @@ def format_RAISE_VARARGS_older(argc): "CALL_FUNCTION_VAR_KW": format_CALL_FUNCTION_pos_name_encoded, "EXTENDED_ARG": format_extended_arg, "RAISE_VARARGS": format_RAISE_VARARGS_older, + "ROT_TWO": format_ROT_TWO, } diff --git a/xdis/opcodes/format/extended.py b/xdis/opcodes/format/extended.py index 2ff51eb5..0cc8edb9 100644 --- a/xdis/opcodes/format/extended.py +++ b/xdis/opcodes/format/extended.py @@ -1,4 +1,4 @@ -# (C) Copyright 2023-2024 by Rocky Bernstein +# (C) Copyright 2023-2025 by Rocky Bernstein # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -16,11 +16,15 @@ """ Routines for formatting opcodes. """ + +import re from typing import List, Optional, Tuple from xdis.instruction import Instruction from xdis.opcodes.format.basic import format_IS_OP, format_RAISE_VARARGS_older +NULL_EXTENDED_OP = "", None + def extended_format_binary_op( opc, instructions: List[Instruction], fmt_str: str @@ -108,11 +112,7 @@ def extended_format_infix_binary_op( instructions[j].opcode in opc.operator_set and instructions[i].opcode in opc.operator_set ): - arg2 = ( - instructions[j].tos_str - if instructions[j].tos_str is not None - else instructions[j].argrepr - ) + arg2 = get_instruction_tos_str(instructions[j]) start_offset = instructions[j].start_offset return f"{arg2}{op_str}{arg1}", start_offset elif instructions[j].start_offset is not None: @@ -265,7 +265,7 @@ def extended_format_ATTR( instr1.tos_str or instr1.opcode in opc.NAME_OPS | opc.CONST_OPS | opc.LOCAL_OPS | opc.FREE_OPS ): - base = get_instruction_arg(instr1) + base = get_instruction_tos_str(instr1) return ( f"{base}.{instructions[0].argrepr}", @@ -448,6 +448,21 @@ def extended_format_COMPARE_OP( ) +def extended_format_DUP_TOP( + opc, instructions: List[Instruction] +) -> Tuple[str, Optional[int]]: + """Try to extract TOS value and show that surrounded in a "push() ". + The trailing space at the used as a sentinal for `get_instruction_tos_str()` + which tries to remove the push() part when the operand value string is needed. + """ + + # We add a space at the end as a sentinal to use in get_instruction_tos_str() + if instructions[1].optype not in ['jrel', 'jabs']: + return extended_format_unary_op(opc, instructions, "push(%s) ") + else: + return NULL_EXTENDED_OP + + def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int]]: """call_function_inst should be a "CALL_FUNCTION" instruction. Look in `instructions` to see if we can find a method name. If not we'll @@ -468,7 +483,7 @@ def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int] assert i is not None if i >= len(instructions) - 1: - return "", None + return NULL_EXTENDED_OP fn_inst = instructions[i + 1] if fn_inst.opcode in opc.operator_set: @@ -480,7 +495,7 @@ def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int] arglist.reverse() s = f'{fn_name}({", ".join(arglist)})' return s, start_offset - return "", None + return NULL_EXTENDED_OP def extended_format_IMPORT_FROM( @@ -493,7 +508,8 @@ def extended_format_IMPORT_FROM( instructions[i].start_offset, instructions, 1 ) if i is None: - return "", None + return NULL_EXTENDED_OP + module_name = get_instruction_arg(instructions[i]) if module_name.startswith("import_module("): module_name = module_name[len("import_module(") : -1] @@ -647,7 +663,7 @@ def extended_format_CALL_METHOD(opc, instructions) -> Tuple[str, Optional[int]]: arglist, arg_count, first_arg = get_arglist(instructions, 0, arg_count) if first_arg is None or first_arg >= len(instructions) - 1: - return "", None + return NULL_EXTENDED_OP fn_inst = instructions[first_arg + 1] if fn_inst.opcode in opc.operator_set and arglist is not None: @@ -657,7 +673,8 @@ def extended_format_CALL_METHOD(opc, instructions) -> Tuple[str, Optional[int]]: arglist.reverse() s = f'{fn_name}({", ".join(arglist)})' return s, start_offset - return "", None + return NULL_EXTENDED_OP + def extended_format_RAISE_VARARGS_older( @@ -771,6 +788,18 @@ def get_instruction_arg(inst: Instruction, argval=None) -> str: return inst.tos_str if inst.tos_str is not None else argval +def get_instruction_tos_str(inst: Instruction) -> str: + if inst.tos_str is not None: + argval = inst.tos_str + argval_without_push = re.match(r"^push\((.+)\) ", argval) + if argval_without_push: + # remove surrounding "push(...)" string + argval = argval_without_push.group(1) + else: + argval = inst.argrepr + return argval + + def get_instruction_index_from_offset( target_offset: int, instructions: List[Instruction], start_index: int = 1 ) -> Optional[int]: @@ -858,6 +887,7 @@ def skip_cache(instructions: List[Instruction], i: int) -> int: "BUILD_TUPLE": extended_format_BUILD_TUPLE, "CALL_FUNCTION": extended_format_CALL_FUNCTION, "COMPARE_OP": extended_format_COMPARE_OP, + "DUP_TOP": extended_format_DUP_TOP, "IMPORT_FROM": extended_format_IMPORT_FROM, "IMPORT_NAME": extended_format_IMPORT_NAME, "INPLACE_ADD": extended_format_INPLACE_ADD, diff --git a/xdis/opcodes/opcode_311.py b/xdis/opcodes/opcode_311.py index d63f3636..65f71a49 100644 --- a/xdis/opcodes/opcode_311.py +++ b/xdis/opcodes/opcode_311.py @@ -24,6 +24,7 @@ from typing import Dict, List, Optional, Tuple import xdis.opcodes.opcode_310 as opcode_310 +from xdis.instruction import Instruction from xdis.opcodes.base import ( binary_op, def_op, @@ -244,10 +245,38 @@ def extended_format_BINARY_OP(opc, instructions) -> Tuple[str, Optional[int]]: return extended_format_binary_op(opc, instructions, f"%s {opname} %s") -def format_BINARY_OP(arg) -> str: +def extended_format_SWAP( + opc, instructions: List[Instruction] +) -> Tuple[str, Optional[int]]: + """call_function_inst should be a "SWAP" instruction. See if + `we can find the two instructions to be swapped. If not we'll + return None. + + """ + # From opcode description: argc indicates the total number of + # positional and keyword arguments. Sometimes the function name + # is in the stack arg positions back. + # From opcode description: arg_count indicates the total number of + # positional and keyword arguments. + + swap_instr = instructions[0] + i = swap_instr.argval + # s = "" + + if (i is None or not (0 < i < len(instructions))): + return "", None + + # To be continued + return "", None + +def format_BINARY_OP(arg: int) -> str: return _nb_ops[arg][1] +def format_SWAP_OP(arg: int) -> str: + return f"TOS <-> TOS{arg-1}" + + opcode_arg_fmt311 = opcode_arg_fmt310.copy() del opcode_arg_fmt311["CALL_FUNCTION"] del opcode_arg_fmt311["CALL_FUNCTION_KW"] @@ -257,6 +286,7 @@ def format_BINARY_OP(arg) -> str: **opcode_arg_fmt310, **{ "BINARY_OP": format_BINARY_OP, + "SWAP": format_SWAP_OP, }, } From adde8c98415caf4e5a69b31714a5689554a3d274 Mon Sep 17 00:00:00 2001 From: rocky Date: Wed, 18 Jun 2025 11:18:11 -0400 Subject: [PATCH 2/2] More formatting * Add 3.11+ COPY formatting * Add ROT_THREE and ROT_FOUR * expand use of NULL_EXTENDED_OP --- xdis/opcodes/format/basic.py | 10 ++++++++++ xdis/opcodes/format/extended.py | 30 +++++++++++++++--------------- xdis/opcodes/opcode_311.py | 25 +++++++++++++++++++++++-- 3 files changed, 48 insertions(+), 17 deletions(-) diff --git a/xdis/opcodes/format/basic.py b/xdis/opcodes/format/basic.py index 6efbf61f..08abbaca 100644 --- a/xdis/opcodes/format/basic.py +++ b/xdis/opcodes/format/basic.py @@ -58,6 +58,14 @@ def format_RAISE_VARARGS_older(argc) -> str: return "exception, parameter, traceback" return "" +def format_ROT_FOUR(_: int) -> str: + return "TOS, TOS1, TOS2, TOS3 = TOS1, TOS2, TOS3, TOS" + + +def format_ROT_THREE(_: int) -> str: + return "TOS, TOS1, TOS2 = TOS1, TOS2, TOS" + + def format_ROT_TWO(_: int) -> str: # We add a space at the end as a sentinal to use in get_instruction_tos_str() return "TOS, TOS1 = TOS1, TOS" @@ -70,5 +78,7 @@ def format_ROT_TWO(_: int) -> str: "CALL_FUNCTION_VAR_KW": format_CALL_FUNCTION_pos_name_encoded, "EXTENDED_ARG": format_extended_arg, "RAISE_VARARGS": format_RAISE_VARARGS_older, + "ROT_FOUR": format_ROT_FOUR, + "ROT_THREE": format_ROT_THREE, "ROT_TWO": format_ROT_TWO, } diff --git a/xdis/opcodes/format/extended.py b/xdis/opcodes/format/extended.py index 0cc8edb9..03310510 100644 --- a/xdis/opcodes/format/extended.py +++ b/xdis/opcodes/format/extended.py @@ -60,7 +60,7 @@ def extended_format_binary_op( arg1_start_offset, instructions, 1 ) if i is None: - return "", None + return NULL_EXTENDED_OP j = skip_cache(instructions, i + 1) stack_inst2 = instructions[j] if ( @@ -79,7 +79,7 @@ def extended_format_binary_op( return fmt_str % (arg2, arg1), start_offset else: return fmt_str % ("...", arg1), None - return "", None + return NULL_EXTENDED_OP def extended_format_infix_binary_op( @@ -103,7 +103,7 @@ def extended_format_infix_binary_op( if arg1_start_offset is not None: i = get_instruction_index_from_offset(arg1_start_offset, instructions, 1) if i is None: - return "", None + return NULL_EXTENDED_OP j = i + 1 # 3.11+ has CACHE instructions while instructions[j].opname == "CACHE": @@ -129,7 +129,7 @@ def extended_format_infix_binary_op( return f"{arg2}{op_str}{arg1}", start_offset else: return f"...{op_str}{arg1}", None - return "", None + return NULL_EXTENDED_OP def extended_format_store_op( @@ -142,7 +142,7 @@ def extended_format_store_op( # are more complicated, so let's not try to figure this out. # This kind of things is best left for a decompiler. if inst.is_jump_target: - return "", None + return NULL_EXTENDED_OP prev_inst = instructions[1] start_offset = prev_inst.offset @@ -198,7 +198,7 @@ def extended_format_ternary_op( if arg1_start_offset is not None: i = get_instruction_index_from_offset(arg1_start_offset, instructions, 1) if i is None: - return "", None + return NULL_EXTENDED_OP j = skip_cache(instructions, i + 1) stack_inst2 = instructions[j] if ( @@ -229,7 +229,7 @@ def extended_format_ternary_op( return fmt_str % (arg2, arg1, arg3), start_offset else: return fmt_str % ("...", "...", "..."), None - return "", None + return NULL_EXTENDED_OP def extended_format_STORE_SUBSCR( @@ -251,7 +251,7 @@ def extended_format_unary_op( return fmt_str % stack_arg.tos_str, start_offset if stack_arg.opcode in opc.operator_set: return fmt_str % stack_arg.argrepr, start_offset - return "", None + return NULL_EXTENDED_OP def extended_format_ATTR( @@ -271,7 +271,7 @@ def extended_format_ATTR( f"{base}.{instructions[0].argrepr}", instr1.start_offset, ) - return "", None + return NULL_EXTENDED_OP def extended_format_BINARY_ADD( @@ -372,7 +372,7 @@ def extended_format_build_tuple_or_list( return f"{left_delim}{args_str},{right_delim}", instructions[i].start_offset else: return f"{left_delim}{args_str}{right_delim}", instructions[i].start_offset - return "", None + return NULL_EXTENDED_OP def extended_format_BUILD_CONST_KEY_MAP(opc, instructions): @@ -393,7 +393,7 @@ def extended_format_BUILD_CONST_KEY_MAP(opc, instructions): arg_pairs.append(f"{key_values[i]}: {arglist[i]}") args_str = ", ".join(arg_pairs) return "{" + args_str + "}", instructions[i].start_offset - return "", None + return NULL_EXTENDED_OP def extended_format_BUILD_LIST( @@ -426,7 +426,7 @@ def extended_format_BUILD_SLICE( if instructions[0].argval == 0: # Degenerate case return "set()", instructions[0].start_offset - return "", None + return NULL_EXTENDED_OP def extended_format_BUILD_TUPLE( @@ -479,7 +479,7 @@ def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int] arglist, arg_count, i = get_arglist(instructions, 0, arg_count) if arglist is None: - return "", None + return NULL_EXTENDED_OP assert i is not None if i >= len(instructions) - 1: @@ -791,9 +791,9 @@ def get_instruction_arg(inst: Instruction, argval=None) -> str: def get_instruction_tos_str(inst: Instruction) -> str: if inst.tos_str is not None: argval = inst.tos_str - argval_without_push = re.match(r"^push\((.+)\) ", argval) + argval_without_push = re.match(r"^(?:push|copy)\((.+)\) ", argval) if argval_without_push: - # remove surrounding "push(...)" string + # remove surrounding "push(...) or copy(...)" string argval = argval_without_push.group(1) else: argval = inst.argrepr diff --git a/xdis/opcodes/opcode_311.py b/xdis/opcodes/opcode_311.py index 65f71a49..979678ff 100644 --- a/xdis/opcodes/opcode_311.py +++ b/xdis/opcodes/opcode_311.py @@ -36,7 +36,11 @@ store_op, update_pj3, ) -from xdis.opcodes.format.extended import extended_format_binary_op +from xdis.opcodes.format.extended import ( + NULL_EXTENDED_OP, + extended_format_binary_op, + extended_format_unary_op, +) from xdis.opcodes.opcode_310 import opcode_arg_fmt310, opcode_extended_fmt310 version_tuple = (3, 11) @@ -245,6 +249,21 @@ def extended_format_BINARY_OP(opc, instructions) -> Tuple[str, Optional[int]]: return extended_format_binary_op(opc, instructions, f"%s {opname} %s") +def extended_format_COPY_OP( + opc, instructions: List[Instruction] +) -> Tuple[str, Optional[int]]: + """Try to extract TOS value and show that surrounded in a "push() ". + The trailing space at the used as a sentinal for `get_instruction_tos_str()` + which tries to remove the push() part when the operand value string is needed. + """ + + # We add a space at the end as a sentinal to use in get_instruction_tos_str() + if instructions[1].optype not in ["jrel", "jabs"]: + return extended_format_unary_op(opc, instructions, "copy(%s) ") + else: + return NULL_EXTENDED_OP + + def extended_format_SWAP( opc, instructions: List[Instruction] ) -> Tuple[str, Optional[int]]: @@ -263,12 +282,13 @@ def extended_format_SWAP( i = swap_instr.argval # s = "" - if (i is None or not (0 < i < len(instructions))): + if i is None or not (0 < i < len(instructions)): return "", None # To be continued return "", None + def format_BINARY_OP(arg: int) -> str: return _nb_ops[arg][1] @@ -294,6 +314,7 @@ def format_SWAP_OP(arg: int) -> str: **opcode_extended_fmt310, **{ "BINARY_OP": extended_format_BINARY_OP, + "COPY": extended_format_COPY_OP, }, }