google-ml-infra · danielibarrola · Dec 11, 2025
diff --git a/culprit_finder/README.md b/culprit_finder/README.md
@@ -57,6 +57,8 @@ culprit-finder --repo <OWNER/REPO> --start <GOOD_SHA> --end <BAD_SHA> --workflow
 - `--start`: The full or short SHA of the last known **good** commit.
 - `--end`: The full or short SHA of the first known **bad** commit.
 - `--workflow`: The filename of the GitHub Actions workflow to run (e.g., `ci.yml`, `tests.yaml`).
+- `--job` (Optional): The specific job name within the workflow to monitor for pass/fail.
+If not provided, the tool checks the overall workflow conclusion.
 
 ### Example
 
@@ -66,6 +68,7 @@ culprit-finder
 --start a1b2c3d
 --end e5f6g7h
 --workflow build_and_test.yml
+--job unit-tests
 ```
 
 

diff --git a/culprit_finder/src/culprit_finder/cli.py b/culprit_finder/src/culprit_finder/cli.py
@@ -49,6 +49,11 @@ def main() -> None:
     required=True,
     help="Workflow filename (e.g., build_and_test.yml)",
   )
+  parser.add_argument(
+    "--job",
+    required=False,
+    help="The specific job name within the workflow to monitor for pass/fail",
+  )
 
   args = parser.parse_args()
 
@@ -65,6 +70,7 @@ def main() -> None:
   logging.info("Start commit: %s", args.start)
   logging.info("End commit: %s", args.end)
   logging.info("Workflow: %s", args.workflow)
+  logging.info("Job: %s", args.job)
 
   has_culprit_finder_workflow = any(
     wf["path"] == ".github/workflows/culprit_finder.yml"
@@ -80,6 +86,7 @@ def main() -> None:
     workflow_file=args.workflow,
     has_culprit_finder_workflow=has_culprit_finder_workflow,
     github_client=gh_client,
+    job=args.job,
   )
   culprit_commit = finder.run_bisection()
   if culprit_commit:

diff --git a/culprit_finder/src/culprit_finder/culprit_finder.py b/culprit_finder/src/culprit_finder/culprit_finder.py
@@ -24,6 +24,7 @@ def __init__(
     workflow_file: str,
     has_culprit_finder_workflow: bool,
     github_client: github.GithubClient,
+    job: str | None = None,
   ):
     """
     Initializes the CulpritFinder instance.
@@ -34,6 +35,7 @@ def __init__(
         end_sha: The SHA of the first known bad commit.
         workflow_file: The name of the workflow file to test (e.g., 'build.yml').
         has_culprit_finder_workflow: Whether the repo being tested has a Culprit Finder workflow.
+        job: The specific job name within the workflow to monitor for pass/fail.
     """
     self._repo = repo
     self._start_sha = start_sha
@@ -42,6 +44,7 @@ def __init__(
     self._workflow_file = workflow_file
     self._has_culprit_finder_workflow = has_culprit_finder_workflow
     self._gh_client = github_client
+    self._job = job
 
   def _wait_for_workflow_completion(
     self,
@@ -97,6 +100,40 @@ def _wait_for_workflow_completion(
       time.sleep(poll_interval)
     raise TimeoutError("Timed out waiting for workflow to complete")
 
+  def _get_target_job(self, jobs: list[github.Job]) -> github.Job:
+    """
+    Finds a specific job in the list, handling nested caller/called names.
+
+    Args:
+        jobs: A list of Job objects from a workflow run.
+
+    Returns:
+        The Job object that matches the target job name.
+
+    Raises:
+        ValueError: If the specified job is not found in the workflow run.
+    """
+
+    def get_job_name(name: str) -> str:
+      if self._has_culprit_finder_workflow:
+        # when calling a workflow from another workflow, the job name is
+        # in the format "Caller Job Name / Called Job Name"
+        return name.split("/")[-1].strip()
+      return name
+
+    target_job = next(
+      (job for job in jobs if get_job_name(job["name"]) == self._job), None
+    )
+    if target_job:
+      return target_job
+
+    logging.error(
+      "Job %s not found, jobs in workflow %s",
+      self._job,
+      self._workflow_file,
+    )
+    raise ValueError(f"Job {self._job} not found in workflow {self._workflow_file}")
+
   def _test_commit(
     self,
     commit_sha: str,
@@ -147,6 +184,11 @@ def _test_commit(
       logging.error("Workflow failed to complete")
       return False
 
+    if self._job:
+      jobs = self._gh_client.get_run_jobs(run["databaseId"])
+      target_job = self._get_target_job(jobs)
+      return target_job["conclusion"] == "success"
+
     return run["conclusion"] == "success"
 
   def run_bisection(self) -> github.Commit | None:

diff --git a/culprit_finder/src/culprit_finder/github.py b/culprit_finder/src/culprit_finder/github.py
@@ -5,7 +5,7 @@
 import subprocess
 import json
 import logging
-from typing import Optional, TypedDict
+from typing import Optional, TypedDict, NotRequired
 
 
 class Commit(TypedDict):
@@ -18,6 +18,22 @@ class Workflow(TypedDict):
   path: str
 
 
+class Job(TypedDict):
+  """Represents a GitHub Actions workflow job.
+
+  Attributes:
+      name: The name of the job.
+      databaseId: The unique identifier for the job in the GitHub database.
+      conclusion: The conclusion of the job (e.g., "success", "failure", "cancelled").
+      status: The current status of the job (e.g., "completed", "in_progress", "queued").
+  """
+
+  name: str
+  databaseId: int
+  conclusion: str
+  status: str
+
+
 class Run(TypedDict):
   """Represents a GitHub Actions workflow run.
 
@@ -28,6 +44,7 @@ class Run(TypedDict):
       conclusion: The conclusion of the workflow run if completed (e.g., "success", "failure", "cancelled"). Optional.
       databaseId: The unique identifier for the workflow run in the GitHub database.
       url: The URL to the workflow run on GitHub.
+      jobs: A list of jobs associated with the workflow run. Optional.
   """
 
   headSha: str
@@ -36,6 +53,7 @@ class Run(TypedDict):
   conclusion: Optional[str]
   databaseId: int
   url: str
+  jobs: NotRequired[list[Job]]
 
 
 class GithubClient:
@@ -223,3 +241,38 @@ def get_workflows(self) -> list[Workflow]:
     cmd = ["workflow", "list", "--json", "path,name", "--repo", self.repo]
     workflows = self._run_command(cmd)
     return json.loads(workflows)
+
+  def get_run(self, run_id: str) -> Run:
+    """
+    Retrieves detailed information about a specific workflow run.
+
+    Args:
+        run_id: The unique database ID or number of the workflow run.
+
+    Returns:
+        A Run object containing metadata such as head SHA, status, and conclusion.
+    """
+    cmd = [
+      "run",
+      "view",
+      run_id,
+      "--json",
+      "headSha,status,createdAt,conclusion,databaseId,url,jobs",
+      "--repo",
+      self.repo,
+    ]
+    run = self._run_command(cmd)
+    return json.loads(run)
+
+  def get_run_jobs(self, run_id: int) -> list[Job]:
+    """
+    Retrieves the list of jobs for a specific workflow run.
+
+    Args:
+        run_id: The database ID of the workflow run.
+
+    Returns:
+        A list of Job objects. Returns an empty list if no jobs are found.
+    """
+    run = self.get_run(str(run_id))
+    return run.get("jobs", [])
diff --git a/culprit_finder/tests/test_cli.py b/culprit_finder/tests/test_cli.py
@@ -195,6 +195,7 @@ def test_cli_success(
     workflow_file="test.yml",
     has_culprit_finder_workflow=has_culprit_workflow,
     github_client=mock_gh_client_instance,
+    job=None,
   )
   mock_finder.return_value.run_bisection.assert_called_once()
 

diff --git a/culprit_finder/tests/test_culprit_finder.py b/culprit_finder/tests/test_culprit_finder.py
@@ -4,6 +4,7 @@
 import re
 import pytest
 from datetime import datetime, timezone
+import random
 
 WORKFLOW_FILE = "test_workflow.yml"
 CULPRIT_WORKFLOW = "culprit_finder.yml"
@@ -116,6 +117,107 @@ def test_test_commit_failure(mocker, finder, mock_gh_client):
   assert finder._test_commit("sha", "branch") is False
 
 
+def _create_job(name: str, conclusion: str) -> github.Job:
+  return {
+    "name": name,
+    "conclusion": conclusion,
+    "status": "completed",
+    "databaseId": random.randint(1, 10000),
+  }
+
+
+@pytest.mark.parametrize("has_culprit_workflow", [True, False])
+def test_test_commit_with_specific_job(mocker, mock_gh_client, has_culprit_workflow):
+  """Tests that _test_commit checks a specific job when the job parameter is set."""
+  finder = culprit_finder.CulpritFinder(
+    repo=REPO,
+    start_sha="start_sha",
+    end_sha="end_sha",
+    workflow_file=WORKFLOW_FILE,
+    has_culprit_finder_workflow=has_culprit_workflow,
+    github_client=mock_gh_client,
+    job="test-job",
+  )
+
+  branch = "test-branch"
+  commit_sha = "sha1"
+  run_id = 123
+
+  mock_wait = mocker.patch.object(finder, "_wait_for_workflow_completion")
+  mock_wait.return_value = {"conclusion": "failure", "databaseId": run_id}
+
+  prefix = "Caller Job / " if has_culprit_workflow else ""
+
+  mock_gh_client.get_run_jobs.return_value = [
+    _create_job(f"{prefix}test-job", "success"),
+    _create_job(f"{prefix}other-job", "failure"),
+  ]
+
+  is_good = finder._test_commit(commit_sha, branch)
+
+  assert is_good is True
+  mock_gh_client.get_run_jobs.assert_called_once_with(run_id)
+
+  if has_culprit_workflow:
+    expected_workflow = CULPRIT_WORKFLOW
+    expected_inputs = {"workflow-to-debug": WORKFLOW_FILE}
+  else:
+    expected_workflow = WORKFLOW_FILE
+    expected_inputs = {}
+
+  mock_gh_client.trigger_workflow.assert_called_once_with(
+    expected_workflow,
+    branch,
+    expected_inputs,
+  )
+
+
+@pytest.mark.parametrize("has_culprit_workflow", [True, False])
+def test_find_job(mock_gh_client, has_culprit_workflow):
+  """Tests that _find_job correctly finds a job with or without culprit workflow."""
+  finder = culprit_finder.CulpritFinder(
+    repo=REPO,
+    start_sha="start_sha",
+    end_sha="end_sha",
+    workflow_file=WORKFLOW_FILE,
+    has_culprit_finder_workflow=has_culprit_workflow,
+    github_client=mock_gh_client,
+    job="target-job",
+  )
+
+  prefix = "Caller Job / " if has_culprit_workflow else ""
+  jobs = [
+    _create_job(f"{prefix}other-job", "success"),
+    _create_job(f"{prefix}target-job", "failure"),
+    _create_job(f"{prefix}another-job", "success"),
+  ]
+
+  job = finder._get_target_job(jobs)
+
+  assert job == jobs[1]
+
+
+def test_find_job_not_found(mock_gh_client):
+  """Tests that _find_job raises ValueError when the job is not found."""
+  finder = culprit_finder.CulpritFinder(
+    repo=REPO,
+    start_sha="start_sha",
+    end_sha="end_sha",
+    workflow_file=WORKFLOW_FILE,
+    has_culprit_finder_workflow=False,
+    github_client=mock_gh_client,
+    job="missing-job",
+  )
+
+  jobs = [
+    _create_job("job1", "success"),
+    _create_job("job2", "success"),
+  ]
+
+  with pytest.raises(ValueError, match="Job missing-job not found in workflow"):
+    finder._get_target_job(jobs)
+
+
 def _create_commit(sha: str, message: str) -> github.Commit:
   return {"sha": sha, "message": message}