Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions culprit_finder/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ culprit-finder --repo <OWNER/REPO> --start <GOOD_SHA> --end <BAD_SHA> --workflow
- `--start`: The full or short SHA of the last known **good** commit.
- `--end`: The full or short SHA of the first known **bad** commit.
- `--workflow`: The filename of the GitHub Actions workflow to run (e.g., `ci.yml`, `tests.yaml`).
- `--job` (Optional): The specific job name within the workflow to monitor for pass/fail.
If not provided, the tool checks the overall workflow conclusion.

### Example

Expand All @@ -66,6 +68,7 @@ culprit-finder
--start a1b2c3d
--end e5f6g7h
--workflow build_and_test.yml
--job unit-tests
```


Expand Down
7 changes: 7 additions & 0 deletions culprit_finder/src/culprit_finder/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ def main() -> None:
required=True,
help="Workflow filename (e.g., build_and_test.yml)",
)
parser.add_argument(
"--job",
required=False,
help="The specific job name within the workflow to monitor for pass/fail",
)

args = parser.parse_args()

Expand All @@ -65,6 +70,7 @@ def main() -> None:
logging.info("Start commit: %s", args.start)
logging.info("End commit: %s", args.end)
logging.info("Workflow: %s", args.workflow)
logging.info("Job: %s", args.job)

has_culprit_finder_workflow = any(
wf["path"] == ".github/workflows/culprit_finder.yml"
Expand All @@ -80,6 +86,7 @@ def main() -> None:
workflow_file=args.workflow,
has_culprit_finder_workflow=has_culprit_finder_workflow,
github_client=gh_client,
job=args.job,
)
culprit_commit = finder.run_bisection()
if culprit_commit:
Expand Down
42 changes: 42 additions & 0 deletions culprit_finder/src/culprit_finder/culprit_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(
workflow_file: str,
has_culprit_finder_workflow: bool,
github_client: github.GithubClient,
job: str | None = None,
):
"""
Initializes the CulpritFinder instance.
Expand All @@ -34,6 +35,7 @@ def __init__(
end_sha: The SHA of the first known bad commit.
workflow_file: The name of the workflow file to test (e.g., 'build.yml').
has_culprit_finder_workflow: Whether the repo being tested has a Culprit Finder workflow.
job: The specific job name within the workflow to monitor for pass/fail.
"""
self._repo = repo
self._start_sha = start_sha
Expand All @@ -42,6 +44,7 @@ def __init__(
self._workflow_file = workflow_file
self._has_culprit_finder_workflow = has_culprit_finder_workflow
self._gh_client = github_client
self._job = job

def _wait_for_workflow_completion(
self,
Expand Down Expand Up @@ -97,6 +100,40 @@ def _wait_for_workflow_completion(
time.sleep(poll_interval)
raise TimeoutError("Timed out waiting for workflow to complete")

def _get_target_job(self, jobs: list[github.Job]) -> github.Job:
"""
Finds a specific job in the list, handling nested caller/called names.

Args:
jobs: A list of Job objects from a workflow run.

Returns:
The Job object that matches the target job name.

Raises:
ValueError: If the specified job is not found in the workflow run.
"""

def get_job_name(name: str) -> str:
if self._has_culprit_finder_workflow:
# when calling a workflow from another workflow, the job name is
# in the format "Caller Job Name / Called Job Name"
return name.split("/")[-1].strip()
return name

target_job = next(
(job for job in jobs if get_job_name(job["name"]) == self._job), None
)
if target_job:
return target_job

logging.error(
"Job %s not found, jobs in workflow %s",
self._job,
self._workflow_file,
)
raise ValueError(f"Job {self._job} not found in workflow {self._workflow_file}")

def _test_commit(
self,
commit_sha: str,
Expand Down Expand Up @@ -147,6 +184,11 @@ def _test_commit(
logging.error("Workflow failed to complete")
return False

if self._job:
jobs = self._gh_client.get_run_jobs(run["databaseId"])
target_job = self._get_target_job(jobs)
return target_job["conclusion"] == "success"

return run["conclusion"] == "success"

def run_bisection(self) -> github.Commit | None:
Expand Down
55 changes: 54 additions & 1 deletion culprit_finder/src/culprit_finder/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import subprocess
import json
import logging
from typing import Optional, TypedDict
from typing import Optional, TypedDict, NotRequired


class Commit(TypedDict):
Expand All @@ -18,6 +18,22 @@ class Workflow(TypedDict):
path: str


class Job(TypedDict):
"""Represents a GitHub Actions workflow job.

Attributes:
name: The name of the job.
databaseId: The unique identifier for the job in the GitHub database.
conclusion: The conclusion of the job (e.g., "success", "failure", "cancelled").
status: The current status of the job (e.g., "completed", "in_progress", "queued").
"""

name: str
databaseId: int
conclusion: str
status: str


class Run(TypedDict):
"""Represents a GitHub Actions workflow run.

Expand All @@ -28,6 +44,7 @@ class Run(TypedDict):
conclusion: The conclusion of the workflow run if completed (e.g., "success", "failure", "cancelled"). Optional.
databaseId: The unique identifier for the workflow run in the GitHub database.
url: The URL to the workflow run on GitHub.
jobs: A list of jobs associated with the workflow run. Optional.
"""

headSha: str
Expand All @@ -36,6 +53,7 @@ class Run(TypedDict):
conclusion: Optional[str]
databaseId: int
url: str
jobs: NotRequired[list[Job]]


class GithubClient:
Expand Down Expand Up @@ -223,3 +241,38 @@ def get_workflows(self) -> list[Workflow]:
cmd = ["workflow", "list", "--json", "path,name", "--repo", self.repo]
workflows = self._run_command(cmd)
return json.loads(workflows)

def get_run(self, run_id: str) -> Run:
"""
Retrieves detailed information about a specific workflow run.

Args:
run_id: The unique database ID or number of the workflow run.

Returns:
A Run object containing metadata such as head SHA, status, and conclusion.
"""
cmd = [
"run",
"view",
run_id,
"--json",
"headSha,status,createdAt,conclusion,databaseId,url,jobs",
"--repo",
self.repo,
]
run = self._run_command(cmd)
return json.loads(run)

def get_run_jobs(self, run_id: int) -> list[Job]:
"""
Retrieves the list of jobs for a specific workflow run.

Args:
run_id: The database ID of the workflow run.

Returns:
A list of Job objects. Returns an empty list if no jobs are found.
"""
run = self.get_run(str(run_id))
return run.get("jobs", [])
1 change: 1 addition & 0 deletions culprit_finder/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ def test_cli_success(
workflow_file="test.yml",
has_culprit_finder_workflow=has_culprit_workflow,
github_client=mock_gh_client_instance,
job=None,
)
mock_finder.return_value.run_bisection.assert_called_once()

Expand Down
102 changes: 102 additions & 0 deletions culprit_finder/tests/test_culprit_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import re
import pytest
from datetime import datetime, timezone
import random

WORKFLOW_FILE = "test_workflow.yml"
CULPRIT_WORKFLOW = "culprit_finder.yml"
Expand Down Expand Up @@ -116,6 +117,107 @@ def test_test_commit_failure(mocker, finder, mock_gh_client):
assert finder._test_commit("sha", "branch") is False


def _create_job(name: str, conclusion: str) -> github.Job:
return {
"name": name,
"conclusion": conclusion,
"status": "completed",
"databaseId": random.randint(1, 10000),
}


@pytest.mark.parametrize("has_culprit_workflow", [True, False])
def test_test_commit_with_specific_job(mocker, mock_gh_client, has_culprit_workflow):
"""Tests that _test_commit checks a specific job when the job parameter is set."""
finder = culprit_finder.CulpritFinder(
repo=REPO,
start_sha="start_sha",
end_sha="end_sha",
workflow_file=WORKFLOW_FILE,
has_culprit_finder_workflow=has_culprit_workflow,
github_client=mock_gh_client,
job="test-job",
)

branch = "test-branch"
commit_sha = "sha1"
run_id = 123

mock_wait = mocker.patch.object(finder, "_wait_for_workflow_completion")
mock_wait.return_value = {"conclusion": "failure", "databaseId": run_id}

prefix = "Caller Job / " if has_culprit_workflow else ""

mock_gh_client.get_run_jobs.return_value = [
_create_job(f"{prefix}test-job", "success"),
_create_job(f"{prefix}other-job", "failure"),
]

is_good = finder._test_commit(commit_sha, branch)

assert is_good is True
mock_gh_client.get_run_jobs.assert_called_once_with(run_id)

if has_culprit_workflow:
expected_workflow = CULPRIT_WORKFLOW
expected_inputs = {"workflow-to-debug": WORKFLOW_FILE}
else:
expected_workflow = WORKFLOW_FILE
expected_inputs = {}

mock_gh_client.trigger_workflow.assert_called_once_with(
expected_workflow,
branch,
expected_inputs,
)


@pytest.mark.parametrize("has_culprit_workflow", [True, False])
def test_find_job(mock_gh_client, has_culprit_workflow):
"""Tests that _find_job correctly finds a job with or without culprit workflow."""
finder = culprit_finder.CulpritFinder(
repo=REPO,
start_sha="start_sha",
end_sha="end_sha",
workflow_file=WORKFLOW_FILE,
has_culprit_finder_workflow=has_culprit_workflow,
github_client=mock_gh_client,
job="target-job",
)

prefix = "Caller Job / " if has_culprit_workflow else ""
jobs = [
_create_job(f"{prefix}other-job", "success"),
_create_job(f"{prefix}target-job", "failure"),
_create_job(f"{prefix}another-job", "success"),
]

job = finder._get_target_job(jobs)

assert job == jobs[1]


def test_find_job_not_found(mock_gh_client):
"""Tests that _find_job raises ValueError when the job is not found."""
finder = culprit_finder.CulpritFinder(
repo=REPO,
start_sha="start_sha",
end_sha="end_sha",
workflow_file=WORKFLOW_FILE,
has_culprit_finder_workflow=False,
github_client=mock_gh_client,
job="missing-job",
)

jobs = [
_create_job("job1", "success"),
_create_job("job2", "success"),
]

with pytest.raises(ValueError, match="Job missing-job not found in workflow"):
finder._get_target_job(jobs)


def _create_commit(sha: str, message: str) -> github.Commit:
return {"sha": sha, "message": message}

Expand Down