Fix execution paths generated by group-size detector

`detect_missing_tx_field_validations` and `search_paths` already address the issues with the generation of execution paths.
https://github.com/crytic/tealer/blob/c590caacfb33aa1f68498b2cb596701e7dd2fd18/tealer/detectors/utils.py#L51-L229

`group-size` detector implements a function to generate execution paths. This function does not address the recently uncovered issues.

https://github.com/crytic/tealer/blob/c590caacfb33aa1f68498b2cb596701e7dd2fd18/tealer/detectors/groupsize.py#L119-L164

The `group-size` could be updated to use the `detect_missing_tx_field_validations`

```py
"""Detector for finding execution paths missing GroupSize check."""

from typing import List, TYPE_CHECKING
from functools import lru_cache

from tealer.detectors.abstract_detector import (
    AbstractDetector,
    DetectorClassification,
    DetectorType,
)
from tealer.teal.basic_blocks import BasicBlock
from tealer.teal.instructions.instructions import (
    Gtxn,
    Gtxna,
    Gtxnas,
    Gtxns,
    Gtxnsa,
    Gtxnsas,
)
from tealer.teal.teal import Teal

from tealer.utils.algorand_constants import MAX_GROUP_SIZE
from tealer.utils.analyses import is_int_push_ins
from tealer.analyses.utils.stack_ast_builder import construct_stack_ast, UnknownStackValue
from tealer.detectors.utils import (
    detect_missing_tx_field_validations,
    detector_terminal_description
)

if TYPE_CHECKING:
    from tealer.utils.output import SupportedOutput
    from tealer.teal.instructions.instructions import Instruction
    from tealer.teal.context.block_transaction_context import BlockTransactionContext


class MissingGroupSize(AbstractDetector):  # pylint: disable=too-few-public-methods
   # ...

    @lru_cache(maxsize=None)
    @staticmethod
    def _accessed_using_absolute_index(bb: BasicBlock) -> bool:
        """Return True if a instruction in bb access a field using absolute index

        a. gtxn t f, gtxna t f i, gtxnas t f,
        b. gtxns f, gtxnsa f i, gtxnsas f

        Instructions in (a) take transaction index as a immediate argument.
        Return True if bb contains any one of those instructions.

        Instructions in (b) take transaction index from the stack.
        `gtxns f` and `gtxnsa f i` take only one argument and it is the transaction index.
        `gtxnsas f` takes two arguments and transaction index is the first argument.
        Return True if the transaction index is pushed by an int instruction.
        """
        stack_gtxns_ins: List["Instruction"] = []
        for ins in bb.instructions:
            if isinstance(ins, (Gtxn, Gtxna, Gtxnas)):
                return True
            if isinstance(ins, (Gtxns, Gtxnsa, Gtxnsas)):
                stack_gtxns_ins.append(ins)
        if not stack_gtxns_ins:
            return False
        ast_values = construct_stack_ast(bb)
        for ins in stack_gtxns_ins:
            index_value = ast_values[ins].args[0]
            if isinstance(index_value, UnknownStackValue):
                continue
            is_int, _ = is_int_push_ins(index_value.instruction)
            if is_int:
                return True
        return False


    def detect(self) -> "SupportedOutput":
        """Detect execution paths with missing GroupSize check.

        Returns:
            ExecutionPaths instance containing the list of vulnerable execution
            paths along with name, check, impact, confidence and other detector
            information.
        """

        def checks_group_size(block_ctx: "BlockTransactionContext") -> bool:
            # return True if group-size is checked in the path
            # otherwise return False
            return MAX_GROUP_SIZE not in block_ctx.group_sizes
        
        paths_without_check: List[List[BasicBlock]] = detect_missing_tx_field_validations(
            self.teal.bbs[0], checks_group_size
        )

        # paths_without_check contain all the execution paths not checking the GroupSize
        # Only report paths which also use absolute_index

        paths_to_report: List[List[BasicBlock]] = []
        for path in paths_without_check:
            for bb in path:
                if self._accessed_using_absolute_index(bb):
                    paths_to_report.append(path)
                    break

        description = detector_terminal_description(self)
        filename = "missing_group_size"

        results = self.generate_result(paths_to_report, description, filename)
        construct_stack_ast.cache_clear()
        return results
```

	def detect_missing_tx_field_validations(
	entry_block: "BasicBlock", checks_field: Callable[["BlockTransactionContext"], bool]
	) -> List[List["BasicBlock"]]:
	"""search for execution paths lacking validations on transaction fields.

	`tealer.analyses.dataflow` calculates possible values a transaction field can have to successfully complete execution.
	Information is computed at block level. Each block's context(`BlockTransactionContext`)
	will have information on possible values for a transaction field such that
	- Execution reaches to the first instruction of block `B`
	- Execution reaches exit instruction of block `B` without failing.

	The goal of current detectors is to find if a given transaction field(s) can have a certain vulnerable value(s).
	e.g: is_updatable detector checks if the transaction can be UpdateApplication type transaction. Here, the vulnerable value for
	transaction type is "UpdateApplication". if "UpdateApplication" is a possible value then the contract is considered vulnerable and is reported.

	In order to find whether the given contract is vulnerable, it is sufficient to check the leaf blocks: whether transaction field can
	have target value and execution reaches end of a leaf block.
	However, The current output format requires "a execution path" from entry block to leaf block. And when a leaf block is vulnerable,
	it does not mean that all execution paths ending at that leaf block are vulnerable. It only means that atleast one path ending at
	that leaf block is vulnerable.

	For this reason, a traversal over the CFG is performed to enumerate execution paths. And for each block in the path it is checked
	whether transaction field(s) can have the target vulnerable value.

	Args:
	entry_block: entry basic block of the CFG
	checks_field: Given a block context, should return True if the target field cannot have the vulnerable value
	or else False.

	e.g: For is_updatable detector, vulnerable value is `UpdateApplication`.
	if `block_ctx.transaction_types` can have `UpdateApplication`, this method will
	return false or else returns True.
	Returns:
	Returns a list of vulnerable execution paths: none of the blocks in the path check the fields.
	"""

	def search_paths(
	bb: "BasicBlock",
	current_path: List["BasicBlock"],
	paths_without_check: List[List["BasicBlock"]],
	current_call_stack: List[Tuple[Optional["BasicBlock"], "Subroutine"]],
	current_subroutine_executed: List[List["BasicBlock"]],
	) -> None:
	"""
	Args:
	current_call_stack: list of callsub blocks and called subroutine along the current path.
	e.g current_callsub_blocks = [(Bi, S1), (Bj, S2), (Bk, S3), ...]
	=> 1. Bi, Bj, Bk, .. all end with callsub instruction.
	2. Bi called subroutine S1. S1 contains block Bj which called subroutine S2.
	S2 contains Bk which calls S3.
	S1 calls S2, S2 calls S3, ...
	current_subroutine_executed: list of subroutine basic blocks that were executed in the path.
	e.g if call stack is [(Bi, S1), (Bj, S2), (Bk, S3)]
	1. current_subroutine_executed[0] contains basic blocks of S1 that were executed before call to S2.
	2. current_subroutine_executed[1] contains basic blocks of S2 that were executed before call to S3.
	3. current_subroutine_executed[2] contains basic blocks of S3 that were executed upto this call.

	Main purpose of this argument is to identify loops. A loop contains a back edge to a previously executed block.
	if presence of loop is checked using `return True if bb in current_path else False`, detector misses paths that
	does not contain a loop.
	Mainly, when a subroutine is called twice in a path, the second call is treated as a back edge because the subroutine's blocks
	will be present in the current path (added in the first call.)
	e.g
	```
	callsub Sa // B0
	callsub Sa // B1
	int 1 // B2
	return
	Sa: // B3
	callsub Sb
	retsub // B4
	Sb: // B5
	retsub
	```
	before executing B1, current_path = [B0, B3, B5, B4], bb = B1
	after executing B1, current_path = [B0, B3, B5, B4, B1], bb = B3

	B3 is already present in the current_path and so is treated as a loop even though it's not.

	In order to identify loops properly, basic blocks that are executed as part of the current subroutines are tracked.
	For above example,
	At the start of the function
	bb = B0, current_path = [], current_call_stack = [(None, Main)], current_subroutine_executed = [[]]

	At the end of the function before calling search_paths again
	bb = B0, current_path = [B0], current_call_stack = [(None, Main)], current_subroutine_executed = [[B0]]

	# B0 calls Sa. Calling a subroutine adds a item to current_call_stack and current_subroutine_executed.
	Start
	bb = B3, current_path = [B0], current_call_stack = [(None, Main), (B0, Sa)], current_subroutine_executed = [[B0], []]
	End
	bb = B3, current_path = [B0, B3], current_call_stack = [(None, Main), (B0, Sa)], current_subroutine_executed = [[B0], [B3]]

	# B3 calls Sb. Calling a subroutine adds a item to current_call_stack and current_subroutine_executed.
	Start
	bb = B5, current_path = [B0, B3], current_call_stack = [(None, Main), (B0, Sa), (B3, Sb)], current_subroutine_executed = [[B0], [B3], []]
	End
	bb = B5, current_path = [B0, B3, B5], current_call_stack = [(None, Main), (B0, Sa), (B3, Sb)], current_subroutine_executed = [[B0], [B3], [B5]]

	# B5 returns the execution from Sb. Returning from a subroutine removes a item from current_call_stack, current_subroutine_executed.
	Start
	bb = B4, current_path = [B0, B3, B5], current_call_stack = [(None, Main), (B0, Sa)], current_subroutine_executed = [[B0], [B3]]
	End
	bb = B4, current_path = [B0, B3, B5, B4], current_call_stack = [(None, Main), (B0, Sa)], current_subroutine_executed = [[B0], [B3, B4]]

	Start
	bb = B1, current_path = [B0, B3, B5, B4], current_call_stack = [(None, Main)], current_subroutine_executed = [[B0]]
	End
	bb = B1, current_path = [B0, B3, B5, B4, B1], current_call_stack = [(None, Main)], current_subroutine_executed = [[B0, B1]]

	....
	"""
	# check for loops
	if bb in current_subroutine_executed[-1]:
	logger_detectors.debug(
	f"Loop Path: current_full_path = {current_path}\n, current_subroutine_executed = {current_subroutine_executed[-1]}, current_block: {repr(bb)}"
	)
	return

	if validated_in_block(bb, checks_field):
	logger_detectors.debug(
	f"Validated Path: current_full_path = {current_path}\n, current_block: {repr(bb)}"
	)
	return

	current_path = current_path + [bb]

	# leaf block
	if len(bb.next) == 0:
	logger_detectors.debug(f"Vulnerable Path: current_full_path = {current_path}")
	paths_without_check.append(current_path)
	return

	# do we need to make a copy of lists in [:-1]???
	current_subroutine_executed = current_subroutine_executed[:-1] + [
	current_subroutine_executed[-1] + [bb]
	]
	if isinstance(bb.exit_instr, Callsub):
	teal = bb.teal
	assert teal is not None
	called_subroutine = teal.subroutines[bb.exit_instr.label]
	# check for recursion
	already_called_subroutines = [frame[1] for frame in current_call_stack]
	if called_subroutine in already_called_subroutines:
	# recursion
	return
	current_call_stack = current_call_stack + [(bb, called_subroutine)]
	current_subroutine_executed = current_subroutine_executed + [[]]

	if isinstance(bb.exit_instr, Retsub):
	# if this block is retsub then it returns execution to the return
	# point of callsub block. return point is the next instruction after the callsub
	# instruction.
	(callsub_block, _) = current_call_stack[-1]
	assert callsub_block is not None
	return_point = callsub_block.sub_return_point
	if return_point is not None and return_point in bb.next:
	search_paths(
	return_point,
	current_path,
	paths_without_check,
	current_call_stack[:-1], # returning from a subroutine
	current_subroutine_executed[:-1],
	)
	else:
	for next_bb in bb.next:
	search_paths(
	next_bb,
	current_path,
	paths_without_check,
	current_call_stack,
	current_subroutine_executed,
	)

	teal = entry_block.teal
	assert teal is not None
	paths_without_check: List[List["BasicBlock"]] = []
	search_paths(entry_block, [], paths_without_check, [(None, teal.main)], [[]])
	return paths_without_check

	def _check_groupsize(
	self,
	bb: BasicBlock,
	current_path: List[BasicBlock],
	paths_without_check: List[List[BasicBlock]],
	used_abs_index: bool,
	) -> None:
	"""Find execution paths with missing GroupSize check.

	This function recursively explores the Control Flow Graph(CFG) of the
	contract and reports execution paths with missing GroupSize
	check.

	This function is "in place", modifies arguments with the data it is
	supposed to return.

	Args:
	bb: Current basic block being checked(whose execution is simulated.)
	current_path: Current execution path being explored.
	paths_without_check:
	Execution paths with missing GroupSize check. This is a
	"in place" argument. Vulnerable paths found by this function are
	appended to this list.
	used_abs_index: Should be True if absolute index in `current_path`.
	"""

	# check for loops
	if bb in current_path:
	return

	if MAX_GROUP_SIZE not in bb.transaction_context.group_sizes:
	# GroupSize is checked
	return

	if not used_abs_index:
	used_abs_index = self._accessed_using_absolute_index(bb)

	current_path = current_path + [bb]
	if not bb.next:
	# leaf block
	if used_abs_index:
	# accessed a field using absolute index in this path.
	paths_without_check.append(current_path)
	return
	for next_bb in bb.next:
	self._check_groupsize(next_bb, current_path, paths_without_check, used_abs_index)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix execution paths generated by group-size detector #160

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Fix execution paths generated by group-size detector #160

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions