Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion culprit_finder/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,19 @@ set -o history
After installation, you can run the tool using the `culprit-finder` command.

```shell
culprit-finder --repo <OWNER/REPO> --start <GOOD_SHA> --end <BAD_SHA> --workflow <WORKFLOW_FILE>
culprit-finder [URL] --repo <OWNER/REPO> --start <GOOD_SHA> --end <BAD_SHA> --workflow <WORKFLOW_FILE> [FLAGS]
```


### Arguments

- `URL`: (Optional) A GitHub Actions Run URL (e.g., `https://github.com/owner/repo/actions/runs/12345`).
If provided, the tool infers the repository, workflow name, and either the start or end SHA based on the
run's status (success implies start SHA, failure implies end SHA).
- `--repo`: The target GitHub repository in the format `owner/repo`. (Optional if URL is provided).
- `--start`: The full or short SHA of the last known **good** commit. (Optional if inferred from a successful URL run).
- `--end`: The full or short SHA of the first known **bad** commit. (Optional if inferred from a failed URL run).
- `--workflow`: The filename of the GitHub Actions workflow to run (e.g., `ci.yml`, `tests.yaml`). (Optional if URL is provided).
- `--repo`: The target GitHub repository in the format `owner/repo`.
- `--start`: The full or short SHA of the last known **good** commit.
- `--end`: The full or short SHA of the first known **bad** commit.
Expand All @@ -78,6 +85,10 @@ culprit-finder
--workflow build_and_test.yml
```

Using a URL to infer details (e.g., starting with a known bad run):
```shell
culprit-finder https://github.com/google-ml-infra/actions/actions/runs/123456789 --start a1b2c3d
```

## Developer Notes

Expand Down
82 changes: 59 additions & 23 deletions culprit_finder/src/culprit_finder/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import logging
import os
import sys
import re

from culprit_finder import culprit_finder
from culprit_finder import culprit_finder_state
Expand All @@ -28,26 +29,32 @@ def _validate_repo(repo: str) -> str:
return repo


def _get_repo_from_url(url: str) -> str:
match = re.search(r"github\.com/([^/]+/[^/]+)", url)
if not match:
raise ValueError(f"Could not extract repo from URL: {url}")
return match.group(1)


def main() -> None:
"""
Entry point for the culprit finder CLI.

Parses command-line arguments then initiates the bisection process using CulpritFinder.
"""
parser = argparse.ArgumentParser(description="Culprit finder for GitHub Actions.")
parser.add_argument("url", nargs="?", help="GitHub Actions Run URL")
parser.add_argument(
"-r",
"--repo",
required=True,
help="Target GitHub repository (e.g., owner/repo)",
type=_validate_repo,
)
parser.add_argument("-s", "--start", required=True, help="Last known good commit SHA")
parser.add_argument("-e", "--end", required=True, help="First known bad commit SHA")
parser.add_argument("-s", "--start", help="Last known good commit SHA")
parser.add_argument("-e", "--end", help="First known bad commit SHA")
parser.add_argument(
"-w",
"--workflow",
required=True,
help="Workflow filename (e.g., build_and_test.yml)",
)
parser.add_argument(
Expand All @@ -58,7 +65,20 @@ def main() -> None:

args = parser.parse_args()

gh_client = github.GithubClient(repo=args.repo)
repo = args.repo
start = args.start
end = args.end
workflow_file_name = args.workflow

if args.url:
repo = _get_repo_from_url(args.url)

if not repo:
parser.error(
"the following arguments are required: -r/--repo (or provided via URL)"
)

gh_client = github.GithubClient(repo=repo)

is_authenticated_with_cli = gh_client.check_auth_status()
has_access_token = os.environ.get("GH_TOKEN") is not None
Expand All @@ -67,8 +87,29 @@ def main() -> None:
logging.error("Not authenticated with GitHub CLI or GH_TOKEN env var is not set.")
sys.exit(1)

if args.url:
run = gh_client.get_run_from_url(args.url)
workflow = gh_client.get_workflow(run["workflowDatabaseId"])
if run["status"] == "success":
start = run["headSha"]
else:
end = run["headSha"]
workflow_file_name = workflow["path"].split("/")[-1]

if not start:
parser.error("the following arguments are required: -s/--start")
if not end:
parser.error("the following arguments are required: -e/--end")
if not workflow_file_name:
parser.error("the following arguments are required: -w/--workflow")

logging.info("Initializing culprit finder for %s", repo)
logging.info("Start commit: %s", start)
logging.info("End commit: %s", end)
logging.info("Workflow: %s", workflow_file_name)

state_persister = culprit_finder_state.StatePersister(
repo=args.repo, workflow=args.workflow
repo=repo, workflow=workflow_file_name
)

if args.clear_cache and state_persister.exists():
Expand All @@ -81,10 +122,10 @@ def main() -> None:
print("Starting a new bisection. Deleting the old state...")
state_persister.delete()
state: culprit_finder_state.CulpritFinderState = {
"repo": args.repo,
"workflow": args.workflow,
"original_start": args.start,
"original_end": args.end,
"repo": repo,
"workflow": workflow_file_name,
"original_start": start,
"original_end": end,
"current_good": "",
"current_bad": "",
"cache": {},
Expand All @@ -94,20 +135,15 @@ def main() -> None:
print("Resuming from the saved state.")
else:
state: culprit_finder_state.CulpritFinderState = {
"repo": args.repo,
"workflow": args.workflow,
"original_start": args.start,
"original_end": args.end,
"repo": repo,
"workflow": workflow_file_name,
"original_start": start,
"original_end": end,
"current_good": "",
"current_bad": "",
"cache": {},
}

logging.info("Initializing culprit finder for %s", args.repo)
logging.info("Start commit: %s", args.start)
logging.info("End commit: %s", args.end)
logging.info("Workflow: %s", args.workflow)

has_culprit_finder_workflow = any(
wf["path"] == ".github/workflows/culprit_finder.yml"
for wf in gh_client.get_workflows()
Expand All @@ -116,10 +152,10 @@ def main() -> None:
logging.info("Using culprit finder workflow: %s", has_culprit_finder_workflow)

finder = culprit_finder.CulpritFinder(
repo=args.repo,
start_sha=args.start,
end_sha=args.end,
workflow_file=args.workflow,
repo=repo,
start_sha=start,
end_sha=end,
workflow_file=workflow_file_name,
has_culprit_finder_workflow=has_culprit_finder_workflow,
github_client=gh_client,
state=state,
Expand Down
66 changes: 66 additions & 0 deletions culprit_finder/src/culprit_finder/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import subprocess
import json
import logging
import re
import time
from typing import Optional, TypedDict

Expand All @@ -29,6 +30,8 @@ class Run(TypedDict):
conclusion: The conclusion of the workflow run if completed (e.g., "success", "failure", "cancelled"). Optional.
databaseId: The unique identifier for the workflow run in the GitHub database.
url: The URL to the workflow run on GitHub.
workflowName: The name of the workflow file (e.g. "test.yml") or the name of the workflow.
workflowDatabaseId: The unique identifier for the workflow in the GitHub database.
"""

headSha: str
Expand All @@ -37,6 +40,8 @@ class Run(TypedDict):
conclusion: Optional[str]
databaseId: int
url: str
workflowName: str
workflowDatabaseId: int


class GithubClient:
Expand Down Expand Up @@ -246,3 +251,64 @@ def get_workflows(self) -> list[Workflow]:
cmd = ["workflow", "list", "--json", "path,name", "--repo", self.repo]
workflows = self._run_command(cmd)
return json.loads(workflows)

def get_workflow(self, workflow_id: int | str) -> Workflow:
"""
Retrieves details of a specific workflow by its ID or filename.

Args:
workflow_id: The ID or filename (e.g., 'main.yml') of the workflow.

Returns:
A dictionary containing workflow details (id, name, path, state, etc.).
"""
endpoint = f"repos/{self.repo}/actions/workflows/{workflow_id}"
cmd = ["api", endpoint]
output = self._run_command(cmd)
return json.loads(output)

def get_run(self, run_id: str) -> Run:
"""
Retrieves detailed information about a specific workflow run.

Args:
run_id: The unique database ID or number of the workflow run.

Returns:
A Run object containing metadata such as head SHA, status, and conclusion.
"""
cmd = [
"run",
"view",
run_id,
"--json",
"headSha,status,createdAt,conclusion,databaseId,url,workflowName,workflowDatabaseId",
"--repo",
self.repo,
]
run = self._run_command(cmd)
return json.loads(run)

def get_run_from_url(self, url: str) -> Run:
"""
Retrieves workflow run details using a GitHub Actions URL.

The URL must follow one of these structures:
- https://github.com/owner/repo/actions/runs/:runId
- https://github.com/owner/repo/actions/runs/:runId/jobs/:jobId

Args:
url: The full GitHub URL to the workflow run or specific job.

Returns:
A Run object containing metadata for the extracted run ID.

Raises:
ValueError: If the run ID cannot be parsed from the provided URL.
"""
match = re.search(r"actions/runs/(\d+)", url)
if not match:
raise ValueError(f"Could not extract run ID from URL: {url}")

run_id = match.group(1)
return self.get_run(run_id)
Loading