Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions xdis/opcodes/format/basic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) Copyright 2023 by Rocky Bernstein
# (C) Copyright 2023, 2025 by Rocky Bernstein
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand All @@ -17,7 +17,6 @@
Routines for formatting opcodes.
"""


def format_extended_arg(arg):
return str(arg * (1 << 16))

Expand Down Expand Up @@ -47,7 +46,7 @@ def format_MAKE_FUNCTION_10_27(argc: int) -> str:


# Up until 3.7
def format_RAISE_VARARGS_older(argc):
def format_RAISE_VARARGS_older(argc) -> str:
assert 0 <= argc <= 3
if argc == 0:
return "reraise"
Expand All @@ -57,6 +56,20 @@ def format_RAISE_VARARGS_older(argc):
return "exception, parameter"
elif argc == 3:
return "exception, parameter, traceback"
return ""

def format_ROT_FOUR(_: int) -> str:
return "TOS, TOS1, TOS2, TOS3 = TOS1, TOS2, TOS3, TOS"


def format_ROT_THREE(_: int) -> str:
return "TOS, TOS1, TOS2 = TOS1, TOS2, TOS"


def format_ROT_TWO(_: int) -> str:
# We add a space at the end as a sentinal to use in get_instruction_tos_str()
return "TOS, TOS1 = TOS1, TOS"



opcode_arg_fmt_base = opcode_arg_fmt34 = {
Expand All @@ -65,4 +78,7 @@ def format_RAISE_VARARGS_older(argc):
"CALL_FUNCTION_VAR_KW": format_CALL_FUNCTION_pos_name_encoded,
"EXTENDED_ARG": format_extended_arg,
"RAISE_VARARGS": format_RAISE_VARARGS_older,
"ROT_FOUR": format_ROT_FOUR,
"ROT_THREE": format_ROT_THREE,
"ROT_TWO": format_ROT_TWO,
}
80 changes: 55 additions & 25 deletions xdis/opcodes/format/extended.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) Copyright 2023-2024 by Rocky Bernstein
# (C) Copyright 2023-2025 by Rocky Bernstein
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
Expand All @@ -16,11 +16,15 @@
"""
Routines for formatting opcodes.
"""

import re
from typing import List, Optional, Tuple

from xdis.instruction import Instruction
from xdis.opcodes.format.basic import format_IS_OP, format_RAISE_VARARGS_older

NULL_EXTENDED_OP = "", None


def extended_format_binary_op(
opc, instructions: List[Instruction], fmt_str: str
Expand Down Expand Up @@ -56,7 +60,7 @@ def extended_format_binary_op(
arg1_start_offset, instructions, 1
)
if i is None:
return "", None
return NULL_EXTENDED_OP
j = skip_cache(instructions, i + 1)
stack_inst2 = instructions[j]
if (
Expand All @@ -75,7 +79,7 @@ def extended_format_binary_op(
return fmt_str % (arg2, arg1), start_offset
else:
return fmt_str % ("...", arg1), None
return "", None
return NULL_EXTENDED_OP


def extended_format_infix_binary_op(
Expand All @@ -99,7 +103,7 @@ def extended_format_infix_binary_op(
if arg1_start_offset is not None:
i = get_instruction_index_from_offset(arg1_start_offset, instructions, 1)
if i is None:
return "", None
return NULL_EXTENDED_OP
j = i + 1
# 3.11+ has CACHE instructions
while instructions[j].opname == "CACHE":
Expand All @@ -108,11 +112,7 @@ def extended_format_infix_binary_op(
instructions[j].opcode in opc.operator_set
and instructions[i].opcode in opc.operator_set
):
arg2 = (
instructions[j].tos_str
if instructions[j].tos_str is not None
else instructions[j].argrepr
)
arg2 = get_instruction_tos_str(instructions[j])
start_offset = instructions[j].start_offset
return f"{arg2}{op_str}{arg1}", start_offset
elif instructions[j].start_offset is not None:
Expand All @@ -129,7 +129,7 @@ def extended_format_infix_binary_op(
return f"{arg2}{op_str}{arg1}", start_offset
else:
return f"...{op_str}{arg1}", None
return "", None
return NULL_EXTENDED_OP


def extended_format_store_op(
Expand All @@ -142,7 +142,7 @@ def extended_format_store_op(
# are more complicated, so let's not try to figure this out.
# This kind of things is best left for a decompiler.
if inst.is_jump_target:
return "", None
return NULL_EXTENDED_OP

prev_inst = instructions[1]
start_offset = prev_inst.offset
Expand Down Expand Up @@ -198,7 +198,7 @@ def extended_format_ternary_op(
if arg1_start_offset is not None:
i = get_instruction_index_from_offset(arg1_start_offset, instructions, 1)
if i is None:
return "", None
return NULL_EXTENDED_OP
j = skip_cache(instructions, i + 1)
stack_inst2 = instructions[j]
if (
Expand Down Expand Up @@ -229,7 +229,7 @@ def extended_format_ternary_op(
return fmt_str % (arg2, arg1, arg3), start_offset
else:
return fmt_str % ("...", "...", "..."), None
return "", None
return NULL_EXTENDED_OP


def extended_format_STORE_SUBSCR(
Expand All @@ -251,7 +251,7 @@ def extended_format_unary_op(
return fmt_str % stack_arg.tos_str, start_offset
if stack_arg.opcode in opc.operator_set:
return fmt_str % stack_arg.argrepr, start_offset
return "", None
return NULL_EXTENDED_OP


def extended_format_ATTR(
Expand All @@ -265,13 +265,13 @@ def extended_format_ATTR(
instr1.tos_str
or instr1.opcode in opc.NAME_OPS | opc.CONST_OPS | opc.LOCAL_OPS | opc.FREE_OPS
):
base = get_instruction_arg(instr1)
base = get_instruction_tos_str(instr1)

return (
f"{base}.{instructions[0].argrepr}",
instr1.start_offset,
)
return "", None
return NULL_EXTENDED_OP


def extended_format_BINARY_ADD(
Expand Down Expand Up @@ -372,7 +372,7 @@ def extended_format_build_tuple_or_list(
return f"{left_delim}{args_str},{right_delim}", instructions[i].start_offset
else:
return f"{left_delim}{args_str}{right_delim}", instructions[i].start_offset
return "", None
return NULL_EXTENDED_OP


def extended_format_BUILD_CONST_KEY_MAP(opc, instructions):
Expand All @@ -393,7 +393,7 @@ def extended_format_BUILD_CONST_KEY_MAP(opc, instructions):
arg_pairs.append(f"{key_values[i]}: {arglist[i]}")
args_str = ", ".join(arg_pairs)
return "{" + args_str + "}", instructions[i].start_offset
return "", None
return NULL_EXTENDED_OP


def extended_format_BUILD_LIST(
Expand Down Expand Up @@ -426,7 +426,7 @@ def extended_format_BUILD_SLICE(
if instructions[0].argval == 0:
# Degenerate case
return "set()", instructions[0].start_offset
return "", None
return NULL_EXTENDED_OP


def extended_format_BUILD_TUPLE(
Expand All @@ -448,6 +448,21 @@ def extended_format_COMPARE_OP(
)


def extended_format_DUP_TOP(
opc, instructions: List[Instruction]
) -> Tuple[str, Optional[int]]:
"""Try to extract TOS value and show that surrounded in a "push() ".
The trailing space at the used as a sentinal for `get_instruction_tos_str()`
which tries to remove the push() part when the operand value string is needed.
"""

# We add a space at the end as a sentinal to use in get_instruction_tos_str()
if instructions[1].optype not in ['jrel', 'jabs']:
return extended_format_unary_op(opc, instructions, "push(%s) ")
else:
return NULL_EXTENDED_OP


def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int]]:
"""call_function_inst should be a "CALL_FUNCTION" instruction. Look in
`instructions` to see if we can find a method name. If not we'll
Expand All @@ -464,11 +479,11 @@ def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int]
arglist, arg_count, i = get_arglist(instructions, 0, arg_count)

if arglist is None:
return "", None
return NULL_EXTENDED_OP

assert i is not None
if i >= len(instructions) - 1:
return "", None
return NULL_EXTENDED_OP

fn_inst = instructions[i + 1]
if fn_inst.opcode in opc.operator_set:
Expand All @@ -480,7 +495,7 @@ def extended_format_CALL_FUNCTION(opc, instructions) -> Tuple[str, Optional[int]
arglist.reverse()
s = f'{fn_name}({", ".join(arglist)})'
return s, start_offset
return "", None
return NULL_EXTENDED_OP


def extended_format_IMPORT_FROM(
Expand All @@ -493,7 +508,8 @@ def extended_format_IMPORT_FROM(
instructions[i].start_offset, instructions, 1
)
if i is None:
return "", None
return NULL_EXTENDED_OP

module_name = get_instruction_arg(instructions[i])
if module_name.startswith("import_module("):
module_name = module_name[len("import_module(") : -1]
Expand Down Expand Up @@ -647,7 +663,7 @@ def extended_format_CALL_METHOD(opc, instructions) -> Tuple[str, Optional[int]]:
arglist, arg_count, first_arg = get_arglist(instructions, 0, arg_count)

if first_arg is None or first_arg >= len(instructions) - 1:
return "", None
return NULL_EXTENDED_OP

fn_inst = instructions[first_arg + 1]
if fn_inst.opcode in opc.operator_set and arglist is not None:
Expand All @@ -657,7 +673,8 @@ def extended_format_CALL_METHOD(opc, instructions) -> Tuple[str, Optional[int]]:
arglist.reverse()
s = f'{fn_name}({", ".join(arglist)})'
return s, start_offset
return "", None
return NULL_EXTENDED_OP



def extended_format_RAISE_VARARGS_older(
Expand Down Expand Up @@ -771,6 +788,18 @@ def get_instruction_arg(inst: Instruction, argval=None) -> str:
return inst.tos_str if inst.tos_str is not None else argval


def get_instruction_tos_str(inst: Instruction) -> str:
if inst.tos_str is not None:
argval = inst.tos_str
argval_without_push = re.match(r"^(?:push|copy)\((.+)\) ", argval)
if argval_without_push:
# remove surrounding "push(...) or copy(...)" string
argval = argval_without_push.group(1)
else:
argval = inst.argrepr
return argval


def get_instruction_index_from_offset(
target_offset: int, instructions: List[Instruction], start_index: int = 1
) -> Optional[int]:
Expand Down Expand Up @@ -858,6 +887,7 @@ def skip_cache(instructions: List[Instruction], i: int) -> int:
"BUILD_TUPLE": extended_format_BUILD_TUPLE,
"CALL_FUNCTION": extended_format_CALL_FUNCTION,
"COMPARE_OP": extended_format_COMPARE_OP,
"DUP_TOP": extended_format_DUP_TOP,
"IMPORT_FROM": extended_format_IMPORT_FROM,
"IMPORT_NAME": extended_format_IMPORT_NAME,
"INPLACE_ADD": extended_format_INPLACE_ADD,
Expand Down
55 changes: 53 additions & 2 deletions xdis/opcodes/opcode_311.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from typing import Dict, List, Optional, Tuple

import xdis.opcodes.opcode_310 as opcode_310
from xdis.instruction import Instruction
from xdis.opcodes.base import (
binary_op,
def_op,
Expand All @@ -35,7 +36,11 @@
store_op,
update_pj3,
)
from xdis.opcodes.format.extended import extended_format_binary_op
from xdis.opcodes.format.extended import (
NULL_EXTENDED_OP,
extended_format_binary_op,
extended_format_unary_op,
)
from xdis.opcodes.opcode_310 import opcode_arg_fmt310, opcode_extended_fmt310

version_tuple = (3, 11)
Expand Down Expand Up @@ -244,10 +249,54 @@ def extended_format_BINARY_OP(opc, instructions) -> Tuple[str, Optional[int]]:
return extended_format_binary_op(opc, instructions, f"%s {opname} %s")


def format_BINARY_OP(arg) -> str:
def extended_format_COPY_OP(
opc, instructions: List[Instruction]
) -> Tuple[str, Optional[int]]:
"""Try to extract TOS value and show that surrounded in a "push() ".
The trailing space at the used as a sentinal for `get_instruction_tos_str()`
which tries to remove the push() part when the operand value string is needed.
"""

# We add a space at the end as a sentinal to use in get_instruction_tos_str()
if instructions[1].optype not in ["jrel", "jabs"]:
return extended_format_unary_op(opc, instructions, "copy(%s) ")
else:
return NULL_EXTENDED_OP


def extended_format_SWAP(
opc, instructions: List[Instruction]
) -> Tuple[str, Optional[int]]:
"""call_function_inst should be a "SWAP" instruction. See if
`we can find the two instructions to be swapped. If not we'll
return None.

"""
# From opcode description: argc indicates the total number of
# positional and keyword arguments. Sometimes the function name
# is in the stack arg positions back.
# From opcode description: arg_count indicates the total number of
# positional and keyword arguments.

swap_instr = instructions[0]
i = swap_instr.argval
# s = ""

if i is None or not (0 < i < len(instructions)):
return "", None

# To be continued
return "", None


def format_BINARY_OP(arg: int) -> str:
return _nb_ops[arg][1]


def format_SWAP_OP(arg: int) -> str:
return f"TOS <-> TOS{arg-1}"


opcode_arg_fmt311 = opcode_arg_fmt310.copy()
del opcode_arg_fmt311["CALL_FUNCTION"]
del opcode_arg_fmt311["CALL_FUNCTION_KW"]
Expand All @@ -257,13 +306,15 @@ def format_BINARY_OP(arg) -> str:
**opcode_arg_fmt310,
**{
"BINARY_OP": format_BINARY_OP,
"SWAP": format_SWAP_OP,
},
}

opcode_extended_fmt = opcode_extended_fmt311 = {
**opcode_extended_fmt310,
**{
"BINARY_OP": extended_format_BINARY_OP,
"COPY": extended_format_COPY_OP,
},
}

Expand Down