diff --git a/xdis/opcodes/format/basic.py b/xdis/opcodes/format/basic.py index 407a21aa..08abbaca 100644 --- a/xdis/opcodes/format/basic.py +++ b/xdis/opcodes/format/basic.py @@ -1,4 +1,4 @@ -# (C) Copyright 2023 by Rocky Bernstein +# (C) Copyright 2023, 2025 by Rocky Bernstein # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -17,7 +17,6 @@ Routines for formatting opcodes. """ - def format_extended_arg(arg): return str(arg * (1 << 16)) @@ -47,7 +46,7 @@ def format_MAKE_FUNCTION_10_27(argc: int) -> str: # Up until 3.7 -def format_RAISE_VARARGS_older(argc): +def format_RAISE_VARARGS_older(argc) -> str: assert 0 <= argc <= 3 if argc == 0: return "reraise" @@ -57,6 +56,20 @@ def format_RAISE_VARARGS_older(argc): return "exception, parameter" elif argc == 3: return "exception, parameter, traceback" + return "" + +def format_ROT_FOUR(_: int) -> str: + return "TOS, TOS1, TOS2, TOS3 = TOS1, TOS2, TOS3, TOS" + + +def format_ROT_THREE(_: int) -> str: + return "TOS, TOS1, TOS2 = TOS1, TOS2, TOS" + + +def format_ROT_TWO(_: int) -> str: + # We add a space at the end as a sentinal to use in get_instruction_tos_str() + return "TOS, TOS1 = TOS1, TOS" + opcode_arg_fmt_base = opcode_arg_fmt34 = { @@ -65,4 +78,7 @@ def format_RAISE_VARARGS_older(argc): "CALL_FUNCTION_VAR_KW": format_CALL_FUNCTION_pos_name_encoded, "EXTENDED_ARG": format_extended_arg, "RAISE_VARARGS": format_RAISE_VARARGS_older, + "ROT_FOUR": format_ROT_FOUR, + "ROT_THREE": format_ROT_THREE, + "ROT_TWO": format_ROT_TWO, } diff --git a/xdis/opcodes/format/extended.py b/xdis/opcodes/format/extended.py index 2ff51eb5..03310510 100644 --- a/xdis/opcodes/format/extended.py +++ b/xdis/opcodes/format/extended.py @@ -1,4 +1,4 @@ -# (C) Copyright 2023-2024 by Rocky Bernstein +# (C) Copyright 2023-2025 by Rocky Bernstein # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -16,11 +16,15 @@ """ Routines for formatting opcodes. """ + +import re from typing import List, Optional, Tuple from xdis.instruction import Instruction from xdis.opcodes.format.basic import format_IS_OP, format_RAISE_VARARGS_older +NULL_EXTENDED_OP = "", None + def extended_format_binary_op( opc, instructions: List[Instruction], fmt_str: str @@ -56,7 +60,7 @@ def extended_format_binary_op( arg1_start_offset, instructions, 1 ) if i is None: - return "", None + return NULL_EXTENDED_OP j = skip_cache(instructions, i + 1) stack_inst2 = instructions[j] if ( @@ -75,7 +79,7 @@ def extended_format_binary_op( return fmt_str % (arg2, arg1), start_offset else: return fmt_str % ("...", arg1), None - return "", None + return NULL_EXTENDED_OP def extended_format_infix_binary_op( @@ -99,7 +103,7 @@ def extended_format_infix_binary_op( if arg1_start_offset is not None: i = get_instruction_index_from_offset(arg1_start_offset, instructions, 1) if i is None: - return "", None + return NULL_EXTENDED_OP j = i + 1 # 3.11+ has CACHE instructions while instructions[j].opname == "CACHE": @@ -108,11 +112,7 @@ def extended_format_infix_binary_op( instructions[j].opcode in opc.operator_set and instructions[i].opcode in opc.operator_set ): - arg2 = ( - instructions[j].tos_str - if instructions[j].tos_str is not None - else instructions[j].argrepr - ) + arg2 = get_instruction_tos_str(instructions[j]) start_offset = instructions[j].start_offset return f"{arg2}{op_str}{arg1}", start_offset elif instructions[j].start_offset is not None: @@ -129,7 +129,7 @@ def extended_format_infix_binary_op( return f"{arg2}{op_str}{arg1}", start_offset else: return f"...{op_str}{arg1}", None - return "", None + return NULL_EXTENDED_OP def extended_format_store_op( @@ -142,7 +142,7 @@ def extended_format_store_op( # are more complicated, so let's not try to figure this out. # This kind of things is best left for a decompiler. if inst.is_jump_target: - return "", None + return NULL_EXTENDED_OP prev_inst = instructions[1] start_offset = prev_inst.offset @@ -198,7 +198,7 @@ def extended_format_ternary_op( if arg1_start_offset is not None: i = get_instruction_index_from_offset(arg1_start_offset, instructions, 1) if i is None: - return "", None + return NULL_EXTENDED_OP j = skip_cache(instructions, i + 1) stack_inst2 = instructions[j] if ( @@ -229,7 +229,7 @@ def extended_format_ternary_op( return fmt_str % (arg2, arg1, arg3), start_offset else: return fmt_str % ("...", "...", "..."), None - return "", None + return NULL_EXTENDED_OP def extended_format_STORE_SUBSCR( @@ -251,7 +251,7 @@ def extended_format_unary_op( return fmt_str % stack_arg.tos_str, start_offset if stack_arg.opcode in opc.operator_set: return fmt_str % stack_arg.argrepr, start_offset - return "", None + return NULL_EXTENDED_OP def extended_format_ATTR( @@ -265,13 +265,13 @@ def extended_format_ATTR( instr1.tos_str or instr1.opcode in opc.NAME_OPS | opc.CONST_OPS | opc.LOCAL_OPS | opc.FREE_OPS ): - base = get_instruction_arg(instr1) + base = get_instruction_tos_str(instr1) return ( f"{base}.{instructions[0].argrepr}", instr1.start_offset, ) - return "", None + return NULL_EXTENDED_OP def extended_format_BINARY_ADD( @@ -372,7 +372,7 @@ def extended_format_build_tuple_or_list( return f"{left_delim}{args_str},{right_delim}", instructions[i].start_offset else: return f"{left_delim}{args_str}{right_delim}", instructions[i].start_offset - return "", None + return NULL_EXTENDED_OP def extended_format_BUILD_CONST_KEY_MAP(opc, instructions): @@ -393,7 +393,7 @@ def extended_format_BUILD_CONST_KEY_MAP(opc, instructions): arg_pairs.append(f"{key_values[i]}: {arglist[i]}") args_str = ", ".join(arg_pairs) return "{" + args_str + "}", instructions[i].start_offset - return "", None + return NULL_EXTENDED_OP def extended_format_BUILD_LIST( @@ -426,7 +426,7 @@ def extended_format_BUILD_SLICE( if instructions[0].argval == 0: # Degenerate case return "set()", instructions[0].start_offset - return "", None + return NULL_EXTENDED_OP def extended_format_BUILD_TUPLE( @@ -448,6 +448,21 @@ def extended_format_COMPARE_OP( ) +def extended_format_DUP_TOP( + opc, instructions: List[Instruction] +) -> Tuple[str, Optional[int]]: + """Try to extract TOS value and show that surrounded in a "push() ". + The trailing space at the used as a sentinal for `get_instruction_tos_str()` + which tries to remove the push() part when the operand value string is needed. + """ + + # We add a space at the end as a sentinal to use in get_instruction_tos_str() + if instructions[1].optype not in ['jrel', 'jabs']: + return extended_format_unary_op(opc, instructions, "push(%s) ") + else: + return NULL_EXTENDED_OP + + def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int]]: """call_function_inst should be a "CALL_FUNCTION" instruction. Look in `instructions` to see if we can find a method name. If not we'll @@ -464,11 +479,11 @@ def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int] arglist, arg_count, i = get_arglist(instructions, 0, arg_count) if arglist is None: - return "", None + return NULL_EXTENDED_OP assert i is not None if i >= len(instructions) - 1: - return "", None + return NULL_EXTENDED_OP fn_inst = instructions[i + 1] if fn_inst.opcode in opc.operator_set: @@ -480,7 +495,7 @@ def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int] arglist.reverse() s = f'{fn_name}({", ".join(arglist)})' return s, start_offset - return "", None + return NULL_EXTENDED_OP def extended_format_IMPORT_FROM( @@ -493,7 +508,8 @@ def extended_format_IMPORT_FROM( instructions[i].start_offset, instructions, 1 ) if i is None: - return "", None + return NULL_EXTENDED_OP + module_name = get_instruction_arg(instructions[i]) if module_name.startswith("import_module("): module_name = module_name[len("import_module(") : -1] @@ -647,7 +663,7 @@ def extended_format_CALL_METHOD(opc, instructions) -> Tuple[str, Optional[int]]: arglist, arg_count, first_arg = get_arglist(instructions, 0, arg_count) if first_arg is None or first_arg >= len(instructions) - 1: - return "", None + return NULL_EXTENDED_OP fn_inst = instructions[first_arg + 1] if fn_inst.opcode in opc.operator_set and arglist is not None: @@ -657,7 +673,8 @@ def extended_format_CALL_METHOD(opc, instructions) -> Tuple[str, Optional[int]]: arglist.reverse() s = f'{fn_name}({", ".join(arglist)})' return s, start_offset - return "", None + return NULL_EXTENDED_OP + def extended_format_RAISE_VARARGS_older( @@ -771,6 +788,18 @@ def get_instruction_arg(inst: Instruction, argval=None) -> str: return inst.tos_str if inst.tos_str is not None else argval +def get_instruction_tos_str(inst: Instruction) -> str: + if inst.tos_str is not None: + argval = inst.tos_str + argval_without_push = re.match(r"^(?:push|copy)\((.+)\) ", argval) + if argval_without_push: + # remove surrounding "push(...) or copy(...)" string + argval = argval_without_push.group(1) + else: + argval = inst.argrepr + return argval + + def get_instruction_index_from_offset( target_offset: int, instructions: List[Instruction], start_index: int = 1 ) -> Optional[int]: @@ -858,6 +887,7 @@ def skip_cache(instructions: List[Instruction], i: int) -> int: "BUILD_TUPLE": extended_format_BUILD_TUPLE, "CALL_FUNCTION": extended_format_CALL_FUNCTION, "COMPARE_OP": extended_format_COMPARE_OP, + "DUP_TOP": extended_format_DUP_TOP, "IMPORT_FROM": extended_format_IMPORT_FROM, "IMPORT_NAME": extended_format_IMPORT_NAME, "INPLACE_ADD": extended_format_INPLACE_ADD, diff --git a/xdis/opcodes/opcode_311.py b/xdis/opcodes/opcode_311.py index d63f3636..979678ff 100644 --- a/xdis/opcodes/opcode_311.py +++ b/xdis/opcodes/opcode_311.py @@ -24,6 +24,7 @@ from typing import Dict, List, Optional, Tuple import xdis.opcodes.opcode_310 as opcode_310 +from xdis.instruction import Instruction from xdis.opcodes.base import ( binary_op, def_op, @@ -35,7 +36,11 @@ store_op, update_pj3, ) -from xdis.opcodes.format.extended import extended_format_binary_op +from xdis.opcodes.format.extended import ( + NULL_EXTENDED_OP, + extended_format_binary_op, + extended_format_unary_op, +) from xdis.opcodes.opcode_310 import opcode_arg_fmt310, opcode_extended_fmt310 version_tuple = (3, 11) @@ -244,10 +249,54 @@ def extended_format_BINARY_OP(opc, instructions) -> Tuple[str, Optional[int]]: return extended_format_binary_op(opc, instructions, f"%s {opname} %s") -def format_BINARY_OP(arg) -> str: +def extended_format_COPY_OP( + opc, instructions: List[Instruction] +) -> Tuple[str, Optional[int]]: + """Try to extract TOS value and show that surrounded in a "push() ". + The trailing space at the used as a sentinal for `get_instruction_tos_str()` + which tries to remove the push() part when the operand value string is needed. + """ + + # We add a space at the end as a sentinal to use in get_instruction_tos_str() + if instructions[1].optype not in ["jrel", "jabs"]: + return extended_format_unary_op(opc, instructions, "copy(%s) ") + else: + return NULL_EXTENDED_OP + + +def extended_format_SWAP( + opc, instructions: List[Instruction] +) -> Tuple[str, Optional[int]]: + """call_function_inst should be a "SWAP" instruction. See if + `we can find the two instructions to be swapped. If not we'll + return None. + + """ + # From opcode description: argc indicates the total number of + # positional and keyword arguments. Sometimes the function name + # is in the stack arg positions back. + # From opcode description: arg_count indicates the total number of + # positional and keyword arguments. + + swap_instr = instructions[0] + i = swap_instr.argval + # s = "" + + if i is None or not (0 < i < len(instructions)): + return "", None + + # To be continued + return "", None + + +def format_BINARY_OP(arg: int) -> str: return _nb_ops[arg][1] +def format_SWAP_OP(arg: int) -> str: + return f"TOS <-> TOS{arg-1}" + + opcode_arg_fmt311 = opcode_arg_fmt310.copy() del opcode_arg_fmt311["CALL_FUNCTION"] del opcode_arg_fmt311["CALL_FUNCTION_KW"] @@ -257,6 +306,7 @@ def format_BINARY_OP(arg) -> str: **opcode_arg_fmt310, **{ "BINARY_OP": format_BINARY_OP, + "SWAP": format_SWAP_OP, }, } @@ -264,6 +314,7 @@ def format_BINARY_OP(arg) -> str: **opcode_extended_fmt310, **{ "BINARY_OP": extended_format_BINARY_OP, + "COPY": extended_format_COPY_OP, }, }