google-ml-infra · danielibarrola · Dec 26, 2025 · Jan 7, 2026
diff --git a/culprit_finder/README.md b/culprit_finder/README.md
@@ -47,12 +47,19 @@ set -o history
 After installation, you can run the tool using the `culprit-finder` command.
 
 ```shell
-culprit-finder --repo <OWNER/REPO> --start <GOOD_SHA> --end <BAD_SHA> --workflow <WORKFLOW_FILE>
+culprit-finder [URL] --repo <OWNER/REPO> --start <GOOD_SHA> --end <BAD_SHA> --workflow <WORKFLOW_FILE> [FLAGS]
 ```
 
 
 ### Arguments
 
+- `URL`: (Optional) A GitHub Actions Run URL (e.g., `https://github.com/owner/repo/actions/runs/12345`).
+If provided, the tool infers the repository, workflow name, and either the start or end SHA based on the
+run's status (success implies start SHA, failure implies end SHA).
+- `--repo`: The target GitHub repository in the format `owner/repo`. (Optional if URL is provided).
+- `--start`: The full or short SHA of the last known **good** commit. (Optional if inferred from a successful URL run).
+- `--end`: The full or short SHA of the first known **bad** commit. (Optional if inferred from a failed URL run).
+- `--workflow`: The filename of the GitHub Actions workflow to run (e.g., `ci.yml`, `tests.yaml`). (Optional if URL is provided).
 - `--repo`: The target GitHub repository in the format `owner/repo`.
 - `--start`: The full or short SHA of the last known **good** commit.
 - `--end`: The full or short SHA of the first known **bad** commit.
@@ -78,6 +85,10 @@ culprit-finder
 --workflow build_and_test.yml
 ```
 
+Using a URL to infer details (e.g., starting with a known bad run):
+```shell
+culprit-finder https://github.com/google-ml-infra/actions/actions/runs/123456789 --start a1b2c3d
+```
 
 ## Developer Notes
 

diff --git a/culprit_finder/src/culprit_finder/cli.py b/culprit_finder/src/culprit_finder/cli.py
@@ -10,6 +10,7 @@
 import logging
 import os
 import sys
+import re
 
 from culprit_finder import culprit_finder
 from culprit_finder import culprit_finder_state
@@ -28,26 +29,32 @@ def _validate_repo(repo: str) -> str:
   return repo
 
 
+def _get_repo_from_url(url: str) -> str:
+  match = re.search(r"github\.com/([^/]+/[^/]+)", url)
+  if not match:
+    raise ValueError(f"Could not extract repo from URL: {url}")
+  return match.group(1)
+
+
 def main() -> None:
   """
   Entry point for the culprit finder CLI.
 
   Parses command-line arguments then initiates the bisection process using CulpritFinder.
   """
   parser = argparse.ArgumentParser(description="Culprit finder for GitHub Actions.")
+  parser.add_argument("url", nargs="?", help="GitHub Actions Run URL")
   parser.add_argument(
     "-r",
     "--repo",
-    required=True,
     help="Target GitHub repository (e.g., owner/repo)",
     type=_validate_repo,
   )
-  parser.add_argument("-s", "--start", required=True, help="Last known good commit SHA")
-  parser.add_argument("-e", "--end", required=True, help="First known bad commit SHA")
+  parser.add_argument("-s", "--start", help="Last known good commit SHA")
+  parser.add_argument("-e", "--end", help="First known bad commit SHA")
   parser.add_argument(
     "-w",
     "--workflow",
-    required=True,
     help="Workflow filename (e.g., build_and_test.yml)",
   )
   parser.add_argument(
@@ -58,7 +65,20 @@ def main() -> None:
 
   args = parser.parse_args()
 
-  gh_client = github.GithubClient(repo=args.repo)
+  repo = args.repo
+  start = args.start
+  end = args.end
+  workflow_file_name = args.workflow
+
+  if args.url:
+    repo = _get_repo_from_url(args.url)
+
+  if not repo:
+    parser.error(
+      "the following arguments are required: -r/--repo (or provided via URL)"
+    )
+
+  gh_client = github.GithubClient(repo=repo)
 
   is_authenticated_with_cli = gh_client.check_auth_status()
   has_access_token = os.environ.get("GH_TOKEN") is not None
@@ -67,8 +87,29 @@ def main() -> None:
     logging.error("Not authenticated with GitHub CLI or GH_TOKEN env var is not set.")
     sys.exit(1)
 
+  if args.url:
+    run = gh_client.get_run_from_url(args.url)
+    workflow = gh_client.get_workflow(run["workflowDatabaseId"])
+    if run["status"] == "success":
+      start = run["headSha"]
+    else:
+      end = run["headSha"]
+    workflow_file_name = workflow["path"].split("/")[-1]
+
+  if not start:
+    parser.error("the following arguments are required: -s/--start")
+  if not end:
+    parser.error("the following arguments are required: -e/--end")
+  if not workflow_file_name:
+    parser.error("the following arguments are required: -w/--workflow")
+
+  logging.info("Initializing culprit finder for %s", repo)
+  logging.info("Start commit: %s", start)
+  logging.info("End commit: %s", end)
+  logging.info("Workflow: %s", workflow_file_name)
+
   state_persister = culprit_finder_state.StatePersister(
-    repo=args.repo, workflow=args.workflow
+    repo=repo, workflow=workflow_file_name
   )
 
   if args.clear_cache and state_persister.exists():
@@ -81,10 +122,10 @@ def main() -> None:
       print("Starting a new bisection. Deleting the old state...")
       state_persister.delete()
       state: culprit_finder_state.CulpritFinderState = {
-        "repo": args.repo,
-        "workflow": args.workflow,
-        "original_start": args.start,
-        "original_end": args.end,
+        "repo": repo,
+        "workflow": workflow_file_name,
+        "original_start": start,
+        "original_end": end,
         "current_good": "",
         "current_bad": "",
         "cache": {},
@@ -94,20 +135,15 @@ def main() -> None:
       print("Resuming from the saved state.")
   else:
     state: culprit_finder_state.CulpritFinderState = {
-      "repo": args.repo,
-      "workflow": args.workflow,
-      "original_start": args.start,
-      "original_end": args.end,
+      "repo": repo,
+      "workflow": workflow_file_name,
+      "original_start": start,
+      "original_end": end,
       "current_good": "",
       "current_bad": "",
       "cache": {},
     }
 
-  logging.info("Initializing culprit finder for %s", args.repo)
-  logging.info("Start commit: %s", args.start)
-  logging.info("End commit: %s", args.end)
-  logging.info("Workflow: %s", args.workflow)
-
   has_culprit_finder_workflow = any(
     wf["path"] == ".github/workflows/culprit_finder.yml"
     for wf in gh_client.get_workflows()
@@ -116,10 +152,10 @@ def main() -> None:
   logging.info("Using culprit finder workflow: %s", has_culprit_finder_workflow)
 
   finder = culprit_finder.CulpritFinder(
-    repo=args.repo,
-    start_sha=args.start,
-    end_sha=args.end,
-    workflow_file=args.workflow,
+    repo=repo,
+    start_sha=start,
+    end_sha=end,
+    workflow_file=workflow_file_name,
     has_culprit_finder_workflow=has_culprit_finder_workflow,
     github_client=gh_client,
     state=state,

diff --git a/culprit_finder/src/culprit_finder/github.py b/culprit_finder/src/culprit_finder/github.py
@@ -5,6 +5,7 @@
 import subprocess
 import json
 import logging
+import re
 import time
 from typing import Optional, TypedDict
 
@@ -29,6 +30,8 @@ class Run(TypedDict):
       conclusion: The conclusion of the workflow run if completed (e.g., "success", "failure", "cancelled"). Optional.
       databaseId: The unique identifier for the workflow run in the GitHub database.
       url: The URL to the workflow run on GitHub.
+      workflowName: The name of the workflow file (e.g. "test.yml") or the name of the workflow.
+      workflowDatabaseId: The unique identifier for the workflow in the GitHub database.
   """
 
   headSha: str
@@ -37,6 +40,8 @@ class Run(TypedDict):
   conclusion: Optional[str]
   databaseId: int
   url: str
+  workflowName: str
+  workflowDatabaseId: int
 
 
 class GithubClient:
@@ -246,3 +251,64 @@ def get_workflows(self) -> list[Workflow]:
     cmd = ["workflow", "list", "--json", "path,name", "--repo", self.repo]
     workflows = self._run_command(cmd)
     return json.loads(workflows)
+
+  def get_workflow(self, workflow_id: int | str) -> Workflow:
+    """
+    Retrieves details of a specific workflow by its ID or filename.
+
+    Args:
+        workflow_id: The ID or filename (e.g., 'main.yml') of the workflow.
+
+    Returns:
+        A dictionary containing workflow details (id, name, path, state, etc.).
+    """
+    endpoint = f"repos/{self.repo}/actions/workflows/{workflow_id}"
+    cmd = ["api", endpoint]
+    output = self._run_command(cmd)
+    return json.loads(output)
+
+  def get_run(self, run_id: str) -> Run:
+    """
+    Retrieves detailed information about a specific workflow run.
+
+    Args:
+        run_id: The unique database ID or number of the workflow run.
+
+    Returns:
+        A Run object containing metadata such as head SHA, status, and conclusion.
+    """
+    cmd = [
+      "run",
+      "view",
+      run_id,
+      "--json",
+      "headSha,status,createdAt,conclusion,databaseId,url,workflowName,workflowDatabaseId",
+      "--repo",
+      self.repo,
+    ]
+    run = self._run_command(cmd)
+    return json.loads(run)
+
+  def get_run_from_url(self, url: str) -> Run:
+    """
+    Retrieves workflow run details using a GitHub Actions URL.
+
+    The URL must follow one of these structures:
+    - https://github.com/owner/repo/actions/runs/:runId
+    - https://github.com/owner/repo/actions/runs/:runId/jobs/:jobId
+
+    Args:
+        url: The full GitHub URL to the workflow run or specific job.
+
+    Returns:
+        A Run object containing metadata for the extracted run ID.
+
+    Raises:
+        ValueError: If the run ID cannot be parsed from the provided URL.
+    """
+    match = re.search(r"actions/runs/(\d+)", url)
+    if not match:
+      raise ValueError(f"Could not extract run ID from URL: {url}")
+
+    run_id = match.group(1)
+    return self.get_run(run_id)