From 6f3afc04af285147ae398432deb35ff5321f8897 Mon Sep 17 00:00:00 2001 From: Daniel Ibarrola Sanchez Date: Thu, 11 Dec 2025 15:22:02 +0000 Subject: [PATCH] Bisect a specific job --- culprit_finder/README.md | 2 + culprit_finder/src/culprit_finder/cli.py | 7 + .../src/culprit_finder/culprit_finder.py | 42 ++++++ culprit_finder/src/culprit_finder/github.py | 54 +++++++- culprit_finder/tests/test_cli.py | 2 + culprit_finder/tests/test_culprit_finder.py | 124 +++++++++++++++++- 6 files changed, 226 insertions(+), 5 deletions(-) diff --git a/culprit_finder/README.md b/culprit_finder/README.md index 8edea27..eee0a41 100644 --- a/culprit_finder/README.md +++ b/culprit_finder/README.md @@ -57,6 +57,8 @@ culprit-finder --repo --start --end --workflow - `--start`: The full or short SHA of the last known **good** commit. - `--end`: The full or short SHA of the first known **bad** commit. - `--workflow`: The filename of the GitHub Actions workflow to run (e.g., `ci.yml`, `tests.yaml`). +- `--job` (Optional): The specific job name within the workflow to monitor for pass/fail. +If not provided, the tool checks the overall workflow conclusion. - `--clear-cache`: (Optional) Deletes the local state file before execution to start a fresh bisection. ### State Persistence and Resuming diff --git a/culprit_finder/src/culprit_finder/cli.py b/culprit_finder/src/culprit_finder/cli.py index 37d7668..3d9f751 100644 --- a/culprit_finder/src/culprit_finder/cli.py +++ b/culprit_finder/src/culprit_finder/cli.py @@ -50,6 +50,11 @@ def main() -> None: required=True, help="Workflow filename (e.g., build_and_test.yml)", ) + parser.add_argument( + "--job", + required=False, + help="The specific job name within the workflow to monitor for pass/fail", + ) parser.add_argument( "--clear-cache", action="store_true", @@ -107,6 +112,7 @@ def main() -> None: logging.info("Start commit: %s", args.start) logging.info("End commit: %s", args.end) logging.info("Workflow: %s", args.workflow) + logging.info("Job: %s", args.job) has_culprit_finder_workflow = any( wf["path"] == ".github/workflows/culprit_finder.yml" @@ -124,6 +130,7 @@ def main() -> None: github_client=gh_client, state=state, state_persister=state_persister, + job=args.job, ) try: diff --git a/culprit_finder/src/culprit_finder/culprit_finder.py b/culprit_finder/src/culprit_finder/culprit_finder.py index ce29408..aa66d3e 100644 --- a/culprit_finder/src/culprit_finder/culprit_finder.py +++ b/culprit_finder/src/culprit_finder/culprit_finder.py @@ -27,6 +27,7 @@ def __init__( github_client: github.GithubClient, state: culprit_finder_state.CulpritFinderState, state_persister: culprit_finder_state.StatePersister, + job: str | None = None, ): """ Initializes the CulpritFinder instance. @@ -40,6 +41,7 @@ def __init__( github_client: The GithubClient instance used to interact with GitHub. state: The CulpritFinderState object containing the current bisection state. state_persister: The StatePersister object used to save the bisection state. + job: The specific job name within the workflow to monitor for pass/fail. """ self._repo = repo self._start_sha = start_sha @@ -50,6 +52,7 @@ def __init__( self._gh_client = github_client self._state = state self._state_persister = state_persister + self._job = job def _wait_for_workflow_completion( self, @@ -105,6 +108,40 @@ def _wait_for_workflow_completion( time.sleep(poll_interval) raise TimeoutError("Timed out waiting for workflow to complete") + def _get_target_job(self, jobs: list[github.Job]) -> github.Job: + """ + Finds a specific job in the list, handling nested caller/called names. + + Args: + jobs: A list of Job objects from a workflow run. + + Returns: + The Job object that matches the target job name. + + Raises: + ValueError: If the specified job is not found in the workflow run. + """ + + def get_job_name(name: str) -> str: + if self._has_culprit_finder_workflow: + # when calling a workflow from another workflow, the job name is + # in the format "Caller Job Name / Called Job Name" + return name.split("/")[-1].strip() + return name + + target_job = next( + (job for job in jobs if get_job_name(job["name"]) == self._job), None + ) + if target_job: + return target_job + + logging.error( + "Job %s not found, jobs in workflow %s", + self._job, + self._workflow_file, + ) + raise ValueError(f"Job {self._job} not found in workflow {self._workflow_file}") + def _test_commit( self, commit_sha: str, @@ -155,6 +192,11 @@ def _test_commit( logging.error("Workflow failed to complete") return False + if self._job: + jobs = self._gh_client.get_run_jobs(run["databaseId"]) + target_job = self._get_target_job(jobs) + return target_job["conclusion"] == "success" + return run["conclusion"] == "success" def run_bisection(self) -> github.Commit | None: diff --git a/culprit_finder/src/culprit_finder/github.py b/culprit_finder/src/culprit_finder/github.py index 02a6760..a23ef22 100644 --- a/culprit_finder/src/culprit_finder/github.py +++ b/culprit_finder/src/culprit_finder/github.py @@ -6,7 +6,7 @@ import json import logging import time -from typing import Optional, TypedDict +from typing import Optional, TypedDict, NotRequired class Commit(TypedDict): @@ -19,6 +19,22 @@ class Workflow(TypedDict): path: str +class Job(TypedDict): + """Represents a GitHub Actions workflow job. + + Attributes: + name: The name of the job. + databaseId: The unique identifier for the job in the GitHub database. + conclusion: The conclusion of the job (e.g., "success", "failure", "cancelled"). + status: The current status of the job (e.g., "completed", "in_progress", "queued"). + """ + + name: str + databaseId: int + conclusion: str + status: str + + class Run(TypedDict): """Represents a GitHub Actions workflow run. @@ -37,6 +53,7 @@ class Run(TypedDict): conclusion: Optional[str] databaseId: int url: str + jobs: NotRequired[list[Job]] class GithubClient: @@ -246,3 +263,38 @@ def get_workflows(self) -> list[Workflow]: cmd = ["workflow", "list", "--json", "path,name", "--repo", self.repo] workflows = self._run_command(cmd) return json.loads(workflows) + + def get_run(self, run_id: str) -> Run: + """ + Retrieves detailed information about a specific workflow run. + + Args: + run_id: The unique database ID or number of the workflow run. + + Returns: + A Run object containing metadata such as head SHA, status, and conclusion. + """ + cmd = [ + "run", + "view", + run_id, + "--json", + "headSha,status,createdAt,conclusion,databaseId,url,jobs", + "--repo", + self.repo, + ] + run = self._run_command(cmd) + return json.loads(run) + + def get_run_jobs(self, run_id: int) -> list[Job]: + """ + Retrieves the list of jobs for a specific workflow run. + + Args: + run_id: The database ID of the workflow run. + + Returns: + A list of Job objects. Returns an empty list if no jobs are found. + """ + run = self.get_run(str(run_id)) + return run.get("jobs", []) diff --git a/culprit_finder/tests/test_cli.py b/culprit_finder/tests/test_cli.py index 23e5c09..6316cdb 100644 --- a/culprit_finder/tests/test_cli.py +++ b/culprit_finder/tests/test_cli.py @@ -235,6 +235,7 @@ def test_cli_success( workflow_file="test.yml", has_culprit_finder_workflow=has_culprit_workflow, github_client=mock_gh_client_instance, + job=None, state=expected_state, state_persister=patches["state_persister_inst"], ) @@ -310,6 +311,7 @@ def test_cli_state_management( state=existing_state, github_client=mock_gh_client_instance, state_persister=patches["state_persister_inst"], + job=None, ) else: # If not exists or discarded, new state created diff --git a/culprit_finder/tests/test_culprit_finder.py b/culprit_finder/tests/test_culprit_finder.py index f2a7fd7..2651b58 100644 --- a/culprit_finder/tests/test_culprit_finder.py +++ b/culprit_finder/tests/test_culprit_finder.py @@ -5,6 +5,7 @@ import re import pytest from datetime import datetime, timezone +import random WORKFLOW_FILE = "test_workflow.yml" CULPRIT_WORKFLOW = "culprit_finder.yml" @@ -24,9 +25,8 @@ def mock_state_persister(mocker): @pytest.fixture -def finder(request, mock_gh_client, mock_state_persister): - """Returns a CulpritFinder instance for testing.""" - state: culprit_finder_state.CulpritFinderState = { +def mock_state() -> culprit_finder_state.CulpritFinderState: + return { "repo": "test_repo", "workflow": "test_workflow", "original_start": "original_start_sha", @@ -35,6 +35,11 @@ def finder(request, mock_gh_client, mock_state_persister): "current_bad": "", "cache": {}, } + + +@pytest.fixture +def finder(request, mock_gh_client, mock_state_persister, mock_state): + """Returns a CulpritFinder instance for testing.""" has_culprit_finder_workflow = getattr(request, "param", True) return culprit_finder.CulpritFinder( repo=REPO, @@ -43,7 +48,7 @@ def finder(request, mock_gh_client, mock_state_persister): workflow_file=WORKFLOW_FILE, has_culprit_finder_workflow=has_culprit_finder_workflow, github_client=mock_gh_client, - state=state, + state=mock_state, state_persister=mock_state_persister, ) @@ -134,6 +139,117 @@ def test_test_commit_failure(mocker, finder, mock_gh_client): assert finder._test_commit("sha", "branch") is False +def _create_job(name: str, conclusion: str) -> github.Job: + return { + "name": name, + "conclusion": conclusion, + "status": "completed", + "databaseId": random.randint(1, 10000), + } + + +@pytest.mark.parametrize("has_culprit_workflow", [True, False]) +def test_test_commit_with_specific_job( + mocker, mock_gh_client, has_culprit_workflow, mock_state, mock_state_persister +): + """Tests that _test_commit checks a specific job when the job parameter is set.""" + finder = culprit_finder.CulpritFinder( + repo=REPO, + start_sha="start_sha", + end_sha="end_sha", + workflow_file=WORKFLOW_FILE, + has_culprit_finder_workflow=has_culprit_workflow, + github_client=mock_gh_client, + job="test-job", + state=mock_state, + state_persister=mock_state_persister, + ) + + branch = "test-branch" + commit_sha = "sha1" + run_id = 123 + + mock_wait = mocker.patch.object(finder, "_wait_for_workflow_completion") + mock_wait.return_value = {"conclusion": "failure", "databaseId": run_id} + + prefix = "Caller Job / " if has_culprit_workflow else "" + + mock_gh_client.get_run_jobs.return_value = [ + _create_job(f"{prefix}test-job", "success"), + _create_job(f"{prefix}other-job", "failure"), + ] + + is_good = finder._test_commit(commit_sha, branch) + + assert is_good is True + mock_gh_client.get_run_jobs.assert_called_once_with(run_id) + + if has_culprit_workflow: + expected_workflow = CULPRIT_WORKFLOW + expected_inputs = {"workflow-to-debug": WORKFLOW_FILE} + else: + expected_workflow = WORKFLOW_FILE + expected_inputs = {} + + mock_gh_client.trigger_workflow.assert_called_once_with( + expected_workflow, + branch, + expected_inputs, + ) + + +@pytest.mark.parametrize("has_culprit_workflow", [True, False]) +def test_find_job( + mock_gh_client, has_culprit_workflow, mock_state, mock_state_persister +): + """Tests that _find_job correctly finds a job with or without culprit workflow.""" + finder = culprit_finder.CulpritFinder( + repo=REPO, + start_sha="start_sha", + end_sha="end_sha", + workflow_file=WORKFLOW_FILE, + has_culprit_finder_workflow=has_culprit_workflow, + github_client=mock_gh_client, + job="target-job", + state=mock_state, + state_persister=mock_state_persister, + ) + + prefix = "Caller Job / " if has_culprit_workflow else "" + jobs = [ + _create_job(f"{prefix}other-job", "success"), + _create_job(f"{prefix}target-job", "failure"), + _create_job(f"{prefix}another-job", "success"), + ] + + job = finder._get_target_job(jobs) + + assert job == jobs[1] + + +def test_find_job_not_found(mock_gh_client, mock_state, mock_state_persister): + """Tests that _find_job raises ValueError when the job is not found.""" + finder = culprit_finder.CulpritFinder( + repo=REPO, + start_sha="start_sha", + end_sha="end_sha", + workflow_file=WORKFLOW_FILE, + has_culprit_finder_workflow=False, + github_client=mock_gh_client, + job="missing-job", + state=mock_state, + state_persister=mock_state_persister, + ) + + jobs = [ + _create_job("job1", "success"), + _create_job("job2", "success"), + ] + + with pytest.raises(ValueError, match="Job missing-job not found in workflow"): + finder._get_target_job(jobs) + + def _create_commit(sha: str, message: str) -> github.Commit: return {"sha": sha, "message": message}