diff --git a/culprit_finder/README.md b/culprit_finder/README.md index 8edea27..e130d76 100644 --- a/culprit_finder/README.md +++ b/culprit_finder/README.md @@ -47,12 +47,19 @@ set -o history After installation, you can run the tool using the `culprit-finder` command. ```shell -culprit-finder --repo --start --end --workflow +culprit-finder [URL] --repo --start --end --workflow [FLAGS] ``` ### Arguments +- `URL`: (Optional) A GitHub Actions Run URL (e.g., `https://github.com/owner/repo/actions/runs/12345`). +If provided, the tool infers the repository, workflow name, and either the start or end SHA based on the +run's status (success implies start SHA, failure implies end SHA). +- `--repo`: The target GitHub repository in the format `owner/repo`. (Optional if URL is provided). +- `--start`: The full or short SHA of the last known **good** commit. (Optional if inferred from a successful URL run). +- `--end`: The full or short SHA of the first known **bad** commit. (Optional if inferred from a failed URL run). +- `--workflow`: The filename of the GitHub Actions workflow to run (e.g., `ci.yml`, `tests.yaml`). (Optional if URL is provided). - `--repo`: The target GitHub repository in the format `owner/repo`. - `--start`: The full or short SHA of the last known **good** commit. - `--end`: The full or short SHA of the first known **bad** commit. @@ -78,6 +85,10 @@ culprit-finder --workflow build_and_test.yml ``` +Using a URL to infer details (e.g., starting with a known bad run): +```shell +culprit-finder https://github.com/google-ml-infra/actions/actions/runs/123456789 --start a1b2c3d +``` ## Developer Notes diff --git a/culprit_finder/src/culprit_finder/cli.py b/culprit_finder/src/culprit_finder/cli.py index 37d7668..a9140cb 100644 --- a/culprit_finder/src/culprit_finder/cli.py +++ b/culprit_finder/src/culprit_finder/cli.py @@ -10,6 +10,7 @@ import logging import os import sys +import re from culprit_finder import culprit_finder from culprit_finder import culprit_finder_state @@ -28,6 +29,13 @@ def _validate_repo(repo: str) -> str: return repo +def _get_repo_from_url(url: str) -> str: + match = re.search(r"github\.com/([^/]+/[^/]+)", url) + if not match: + raise ValueError(f"Could not extract repo from URL: {url}") + return match.group(1) + + def main() -> None: """ Entry point for the culprit finder CLI. @@ -35,19 +43,18 @@ def main() -> None: Parses command-line arguments then initiates the bisection process using CulpritFinder. """ parser = argparse.ArgumentParser(description="Culprit finder for GitHub Actions.") + parser.add_argument("url", nargs="?", help="GitHub Actions Run URL") parser.add_argument( "-r", "--repo", - required=True, help="Target GitHub repository (e.g., owner/repo)", type=_validate_repo, ) - parser.add_argument("-s", "--start", required=True, help="Last known good commit SHA") - parser.add_argument("-e", "--end", required=True, help="First known bad commit SHA") + parser.add_argument("-s", "--start", help="Last known good commit SHA") + parser.add_argument("-e", "--end", help="First known bad commit SHA") parser.add_argument( "-w", "--workflow", - required=True, help="Workflow filename (e.g., build_and_test.yml)", ) parser.add_argument( @@ -58,7 +65,20 @@ def main() -> None: args = parser.parse_args() - gh_client = github.GithubClient(repo=args.repo) + repo = args.repo + start = args.start + end = args.end + workflow_file_name = args.workflow + + if args.url: + repo = _get_repo_from_url(args.url) + + if not repo: + parser.error( + "the following arguments are required: -r/--repo (or provided via URL)" + ) + + gh_client = github.GithubClient(repo=repo) is_authenticated_with_cli = gh_client.check_auth_status() has_access_token = os.environ.get("GH_TOKEN") is not None @@ -67,8 +87,29 @@ def main() -> None: logging.error("Not authenticated with GitHub CLI or GH_TOKEN env var is not set.") sys.exit(1) + if args.url: + run = gh_client.get_run_from_url(args.url) + workflow = gh_client.get_workflow(run["workflowDatabaseId"]) + if run["status"] == "success": + start = run["headSha"] + else: + end = run["headSha"] + workflow_file_name = workflow["path"].split("/")[-1] + + if not start: + parser.error("the following arguments are required: -s/--start") + if not end: + parser.error("the following arguments are required: -e/--end") + if not workflow_file_name: + parser.error("the following arguments are required: -w/--workflow") + + logging.info("Initializing culprit finder for %s", repo) + logging.info("Start commit: %s", start) + logging.info("End commit: %s", end) + logging.info("Workflow: %s", workflow_file_name) + state_persister = culprit_finder_state.StatePersister( - repo=args.repo, workflow=args.workflow + repo=repo, workflow=workflow_file_name ) if args.clear_cache and state_persister.exists(): @@ -81,10 +122,10 @@ def main() -> None: print("Starting a new bisection. Deleting the old state...") state_persister.delete() state: culprit_finder_state.CulpritFinderState = { - "repo": args.repo, - "workflow": args.workflow, - "original_start": args.start, - "original_end": args.end, + "repo": repo, + "workflow": workflow_file_name, + "original_start": start, + "original_end": end, "current_good": "", "current_bad": "", "cache": {}, @@ -94,20 +135,15 @@ def main() -> None: print("Resuming from the saved state.") else: state: culprit_finder_state.CulpritFinderState = { - "repo": args.repo, - "workflow": args.workflow, - "original_start": args.start, - "original_end": args.end, + "repo": repo, + "workflow": workflow_file_name, + "original_start": start, + "original_end": end, "current_good": "", "current_bad": "", "cache": {}, } - logging.info("Initializing culprit finder for %s", args.repo) - logging.info("Start commit: %s", args.start) - logging.info("End commit: %s", args.end) - logging.info("Workflow: %s", args.workflow) - has_culprit_finder_workflow = any( wf["path"] == ".github/workflows/culprit_finder.yml" for wf in gh_client.get_workflows() @@ -116,10 +152,10 @@ def main() -> None: logging.info("Using culprit finder workflow: %s", has_culprit_finder_workflow) finder = culprit_finder.CulpritFinder( - repo=args.repo, - start_sha=args.start, - end_sha=args.end, - workflow_file=args.workflow, + repo=repo, + start_sha=start, + end_sha=end, + workflow_file=workflow_file_name, has_culprit_finder_workflow=has_culprit_finder_workflow, github_client=gh_client, state=state, diff --git a/culprit_finder/src/culprit_finder/github.py b/culprit_finder/src/culprit_finder/github.py index 02a6760..a4f4570 100644 --- a/culprit_finder/src/culprit_finder/github.py +++ b/culprit_finder/src/culprit_finder/github.py @@ -5,6 +5,7 @@ import subprocess import json import logging +import re import time from typing import Optional, TypedDict @@ -29,6 +30,8 @@ class Run(TypedDict): conclusion: The conclusion of the workflow run if completed (e.g., "success", "failure", "cancelled"). Optional. databaseId: The unique identifier for the workflow run in the GitHub database. url: The URL to the workflow run on GitHub. + workflowName: The name of the workflow file (e.g. "test.yml") or the name of the workflow. + workflowDatabaseId: The unique identifier for the workflow in the GitHub database. """ headSha: str @@ -37,6 +40,8 @@ class Run(TypedDict): conclusion: Optional[str] databaseId: int url: str + workflowName: str + workflowDatabaseId: int class GithubClient: @@ -246,3 +251,64 @@ def get_workflows(self) -> list[Workflow]: cmd = ["workflow", "list", "--json", "path,name", "--repo", self.repo] workflows = self._run_command(cmd) return json.loads(workflows) + + def get_workflow(self, workflow_id: int | str) -> Workflow: + """ + Retrieves details of a specific workflow by its ID or filename. + + Args: + workflow_id: The ID or filename (e.g., 'main.yml') of the workflow. + + Returns: + A dictionary containing workflow details (id, name, path, state, etc.). + """ + endpoint = f"repos/{self.repo}/actions/workflows/{workflow_id}" + cmd = ["api", endpoint] + output = self._run_command(cmd) + return json.loads(output) + + def get_run(self, run_id: str) -> Run: + """ + Retrieves detailed information about a specific workflow run. + + Args: + run_id: The unique database ID or number of the workflow run. + + Returns: + A Run object containing metadata such as head SHA, status, and conclusion. + """ + cmd = [ + "run", + "view", + run_id, + "--json", + "headSha,status,createdAt,conclusion,databaseId,url,workflowName,workflowDatabaseId", + "--repo", + self.repo, + ] + run = self._run_command(cmd) + return json.loads(run) + + def get_run_from_url(self, url: str) -> Run: + """ + Retrieves workflow run details using a GitHub Actions URL. + + The URL must follow one of these structures: + - https://github.com/owner/repo/actions/runs/:runId + - https://github.com/owner/repo/actions/runs/:runId/jobs/:jobId + + Args: + url: The full GitHub URL to the workflow run or specific job. + + Returns: + A Run object containing metadata for the extracted run ID. + + Raises: + ValueError: If the run ID cannot be parsed from the provided URL. + """ + match = re.search(r"actions/runs/(\d+)", url) + if not match: + raise ValueError(f"Could not extract run ID from URL: {url}") + + run_id = match.group(1) + return self.get_run(run_id) diff --git a/culprit_finder/tests/test_cli.py b/culprit_finder/tests/test_cli.py index 23e5c09..c4b47ba 100644 --- a/culprit_finder/tests/test_cli.py +++ b/culprit_finder/tests/test_cli.py @@ -33,25 +33,6 @@ def _get_culprit_finder_command( @pytest.mark.parametrize( "args, expected_error_msg", [ - # Missing Arguments Scenarios - (["culprit_finder"], "error"), # No args - ( - _get_culprit_finder_command(None, "sha1", "sha2", "test.yml"), - "error", - ), # Missing repo - ( - _get_culprit_finder_command("owner/repo", None, "sha2", "test.yml"), - "error", - ), # Missing start - ( - _get_culprit_finder_command("owner/repo", "sha1", None, "test.yml"), - "error", - ), # Missing end - ( - _get_culprit_finder_command("owner/repo", "sha1", "sha2", None), - "error", - ), # Missing workflow - # Invalid Repo Format Scenarios ( _get_culprit_finder_command("invalidrepo", "sha1", "sha2", "test.yml"), "Invalid repo format: invalidrepo", @@ -67,7 +48,7 @@ def _get_culprit_finder_command( (_get_culprit_finder_command("", "sha1", "sha2", "test.yml"), "error"), ], ) -def test_cli_args_failures(monkeypatch, capsys, args, expected_error_msg): +def test_invalid_repo_format(monkeypatch, capsys, args, expected_error_msg): """Tests that the CLI exits with an error for invalid inputs (missing args or invalid formats).""" monkeypatch.setattr(sys, "argv", args) @@ -369,3 +350,142 @@ def test_cli_clear_cache_deletes_state(monkeypatch, mocker): # delete() should be called at start (due to clear-cache) and potentially at end (if no culprit found/successful run) assert mock_persister_inst.delete.called + + +@pytest.mark.parametrize( + "run_status, extra_args, expected_start, expected_end", + [ + ("success", ["--end", "sha2"], "sha_from_url", "sha2"), + ("failure", ["--start", "sha1"], "sha1", "sha_from_url"), + ], +) +def test_cli_with_url( + monkeypatch, mocker, run_status, extra_args, expected_start, expected_end +): + """Tests that the CLI correctly infers arguments from a URL based on run status.""" + mock_finder = mocker.patch("culprit_finder.cli.culprit_finder.CulpritFinder") + mock_gh_client_instance = _mock_gh_client( + mocker, + True, + [{"path": ".github/workflows/culprit_finder.yml", "name": "Culprit Finder"}], + ) + patches = _mock_state(mocker) + + mock_gh_client_instance.get_run_from_url.return_value = { + "headSha": "sha_from_url", + "status": run_status, + "workflowName": "Test Workflow", + "workflowDatabaseId": 123, + } + mock_gh_client_instance.get_workflow.return_value = { + "path": ".github/workflows/test.yml" + } + + url = "https://github.com/owner/repo/actions/runs/123" + args = ["culprit_finder", url] + extra_args + monkeypatch.setattr(sys, "argv", args) + + cli.main() + + expected_state = { + "repo": "owner/repo", + "workflow": "test.yml", + "original_start": expected_start, + "original_end": expected_end, + "current_good": "", + "current_bad": "", + "cache": {}, + } + mock_finder.assert_called_once_with( + repo="owner/repo", + start_sha=expected_start, + end_sha=expected_end, + workflow_file="test.yml", + has_culprit_finder_workflow=True, + github_client=mock_gh_client_instance, + state=expected_state, + state_persister=patches["state_persister_inst"], + ) + + +@pytest.mark.parametrize( + "args, expected_error_msg", + [ + ( + ["culprit_finder"], + "the following arguments are required: -r/--repo (or provided via URL)", + ), + ( + _get_culprit_finder_command(None, "sha1", "sha2", "test.yml"), + "the following arguments are required: -r/--repo (or provided via URL)", + ), + ], +) +def test_missing_repo_args(monkeypatch, capsys, args, expected_error_msg): + """Tests that the CLI exits with an error when repo is missing (before auth check).""" + monkeypatch.setattr(sys, "argv", args) + with pytest.raises(SystemExit): + cli.main() + captured = capsys.readouterr() + assert expected_error_msg in captured.err + + +@pytest.mark.parametrize( + "args, expected_error_msg", + [ + ( + _get_culprit_finder_command("owner/repo", None, "sha2", "test.yml"), + "the following arguments are required: -s/--start", + ), + ( + _get_culprit_finder_command("owner/repo", "sha1", None, "test.yml"), + "the following arguments are required: -e/--end", + ), + ( + _get_culprit_finder_command("owner/repo", "sha1", "sha2", None), + "the following arguments are required: -w/--workflow", + ), + ], +) +def test_missing_args_standard_authenticated( + monkeypatch, mocker, capsys, args, expected_error_msg +): + """Tests that the CLI exits with an error for missing args after repo check (requires auth).""" + _mock_gh_client(mocker, True) + monkeypatch.setattr(sys, "argv", args) + + with pytest.raises(SystemExit): + cli.main() + + captured = capsys.readouterr() + assert expected_error_msg in captured.err + + +@pytest.mark.parametrize( + "run_status, extra_args, expected_error_msg", + [ + ("success", [], "the following arguments are required: -e/--end"), + ("failure", [], "the following arguments are required: -s/--start"), + ], +) +def test_missing_args_with_url( + monkeypatch, mocker, capsys, run_status, extra_args, expected_error_msg +): + """Tests that the CLI fails when required arguments are missing even with URL.""" + mock_gh_client_instance = _mock_gh_client(mocker, True) + mock_gh_client_instance.get_run_from_url.return_value = { + "headSha": "sha_from_url", + "status": run_status, + "workflowName": "test.yml", + "workflowDatabaseId": 123, + } + + url = "https://github.com/owner/repo/actions/runs/123" + args = ["culprit_finder", url] + extra_args + monkeypatch.setattr(sys, "argv", args) + + with pytest.raises(SystemExit): + cli.main() + + captured = capsys.readouterr() + assert expected_error_msg in captured.err