From a1da8b3e331a03ae6bb71b7ca98ba808e3f323c8 Mon Sep 17 00:00:00 2001 From: Daniel Ibarrola Sanchez Date: Mon, 29 Dec 2025 15:36:29 +0000 Subject: [PATCH] Add retry mechanism for workflow runs --- culprit_finder/README.md | 1 + culprit_finder/src/culprit_finder/cli.py | 9 ++++ .../src/culprit_finder/culprit_finder.py | 31 ++++++++++--- culprit_finder/tests/test_cli.py | 2 + culprit_finder/tests/test_culprit_finder.py | 46 +++++++++++++++++-- 5 files changed, 78 insertions(+), 11 deletions(-) diff --git a/culprit_finder/README.md b/culprit_finder/README.md index 8edea27..5aae23a 100644 --- a/culprit_finder/README.md +++ b/culprit_finder/README.md @@ -57,6 +57,7 @@ culprit-finder --repo --start --end --workflow - `--start`: The full or short SHA of the last known **good** commit. - `--end`: The full or short SHA of the first known **bad** commit. - `--workflow`: The filename of the GitHub Actions workflow to run (e.g., `ci.yml`, `tests.yaml`). +- `--retry`: (Optional) Number of times to retry the workflow run if it fails (default: 0). - `--clear-cache`: (Optional) Deletes the local state file before execution to start a fresh bisection. ### State Persistence and Resuming diff --git a/culprit_finder/src/culprit_finder/cli.py b/culprit_finder/src/culprit_finder/cli.py index 37d7668..75b9266 100644 --- a/culprit_finder/src/culprit_finder/cli.py +++ b/culprit_finder/src/culprit_finder/cli.py @@ -55,6 +55,13 @@ def main() -> None: action="store_true", help="Deletes the local state file before execution", ) + parser.add_argument( + "--retry", + required=False, + help="Number of times to retry the workflow run if it fails (default: 0).", + default=0, + type=int, + ) args = parser.parse_args() @@ -107,6 +114,7 @@ def main() -> None: logging.info("Start commit: %s", args.start) logging.info("End commit: %s", args.end) logging.info("Workflow: %s", args.workflow) + logging.info("Retries: %s", args.retry) has_culprit_finder_workflow = any( wf["path"] == ".github/workflows/culprit_finder.yml" @@ -124,6 +132,7 @@ def main() -> None: github_client=gh_client, state=state, state_persister=state_persister, + retries=args.retry, ) try: diff --git a/culprit_finder/src/culprit_finder/culprit_finder.py b/culprit_finder/src/culprit_finder/culprit_finder.py index ce29408..d98dc66 100644 --- a/culprit_finder/src/culprit_finder/culprit_finder.py +++ b/culprit_finder/src/culprit_finder/culprit_finder.py @@ -27,6 +27,7 @@ def __init__( github_client: github.GithubClient, state: culprit_finder_state.CulpritFinderState, state_persister: culprit_finder_state.StatePersister, + retries: int = 0, ): """ Initializes the CulpritFinder instance. @@ -40,6 +41,7 @@ def __init__( github_client: The GithubClient instance used to interact with GitHub. state: The CulpritFinderState object containing the current bisection state. state_persister: The StatePersister object used to save the bisection state. + retries: Number of times to retry workflow runs in case of failure. """ self._repo = repo self._start_sha = start_sha @@ -50,6 +52,7 @@ def __init__( self._gh_client = github_client self._state = state self._state_persister = state_persister + self._retries = retries def _wait_for_workflow_completion( self, @@ -145,14 +148,28 @@ def _test_commit( inputs, ) - run = self._wait_for_workflow_completion( - workflow_to_trigger, - branch_name, - commit_sha, - previous_run_id, - ) + run: github.Run | None = None + for attempt in range(self._retries + 1): + run = self._wait_for_workflow_completion( + workflow_to_trigger, + branch_name, + commit_sha, + previous_run_id, + ) + + if run and run["conclusion"] == "success": + return True + + if attempt < self._retries: + logging.info( + "Retrying workflow for commit %s (attempt %d/%d)", + commit_sha, + attempt + 1, + self._retries, + ) + if not run: - logging.error("Workflow failed to complete") + logging.error("Workflow failed to complete for commit %s", commit_sha) return False return run["conclusion"] == "success" diff --git a/culprit_finder/tests/test_cli.py b/culprit_finder/tests/test_cli.py index 23e5c09..b278890 100644 --- a/culprit_finder/tests/test_cli.py +++ b/culprit_finder/tests/test_cli.py @@ -235,6 +235,7 @@ def test_cli_success( workflow_file="test.yml", has_culprit_finder_workflow=has_culprit_workflow, github_client=mock_gh_client_instance, + retries=0, state=expected_state, state_persister=patches["state_persister_inst"], ) @@ -310,6 +311,7 @@ def test_cli_state_management( state=existing_state, github_client=mock_gh_client_instance, state_persister=patches["state_persister_inst"], + retries=0, ) else: # If not exists or discarded, new state created diff --git a/culprit_finder/tests/test_culprit_finder.py b/culprit_finder/tests/test_culprit_finder.py index f2a7fd7..ae9f944 100644 --- a/culprit_finder/tests/test_culprit_finder.py +++ b/culprit_finder/tests/test_culprit_finder.py @@ -24,9 +24,8 @@ def mock_state_persister(mocker): @pytest.fixture -def finder(request, mock_gh_client, mock_state_persister): - """Returns a CulpritFinder instance for testing.""" - state: culprit_finder_state.CulpritFinderState = { +def mock_state() -> culprit_finder_state.CulpritFinderState: + return { "repo": "test_repo", "workflow": "test_workflow", "original_start": "original_start_sha", @@ -35,6 +34,11 @@ def finder(request, mock_gh_client, mock_state_persister): "current_bad": "", "cache": {}, } + + +@pytest.fixture +def finder(request, mock_gh_client, mock_state_persister, mock_state): + """Returns a CulpritFinder instance for testing.""" has_culprit_finder_workflow = getattr(request, "param", True) return culprit_finder.CulpritFinder( repo=REPO, @@ -43,7 +47,7 @@ def finder(request, mock_gh_client, mock_state_persister): workflow_file=WORKFLOW_FILE, has_culprit_finder_workflow=has_culprit_finder_workflow, github_client=mock_gh_client, - state=state, + state=mock_state, state_persister=mock_state_persister, ) @@ -91,6 +95,40 @@ def test_wait_for_workflow_completion_success(mocker, finder, mock_gh_client): assert call_args[0][0] == workflow +def test_test_commit_with_retries( + mocker, mock_gh_client, mock_state, mock_state_persister +): + """Tests that _test_commit retries the specified number of times on failure.""" + mocker.patch("culprit_finder.culprit_finder.github") + + finder = culprit_finder.CulpritFinder( + repo=REPO, + start_sha="start_sha", + end_sha="end_sha", + workflow_file=WORKFLOW_FILE, + has_culprit_finder_workflow=True, + github_client=mock_gh_client, + state=mock_state, + state_persister=mock_state_persister, + retries=2, + ) + + branch = "test-branch" + commit_sha = "sha1" + + mock_wait = mocker.patch.object(finder, "_wait_for_workflow_completion") + mock_wait.side_effect = [ + {"conclusion": "failure"}, # Initial attempt + {"conclusion": "failure"}, # First retry + {"conclusion": "success"}, # Second retry + ] + + is_good = finder._test_commit(commit_sha, branch) + + assert is_good is True + assert mock_wait.call_count == 3 + + @pytest.mark.parametrize("finder", [True, False], indirect=True) def test_test_commit_success(mocker, finder, mock_gh_client): """Tests that _test_commit triggers the workflow and returns True on success."""