Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions culprit_finder/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ culprit-finder --repo <OWNER/REPO> --start <GOOD_SHA> --end <BAD_SHA> --workflow
- `--end`: The full or short SHA of the first known **bad** commit.
- `--workflow`: The filename of the GitHub Actions workflow to run (e.g., `ci.yml`, `tests.yaml`).
- `--clear-cache`: (Optional) Deletes the local state file before execution to start a fresh bisection.
- `--no-cache`: (Optional) Disabled cached results. This will run the workflow on all commits.

### State Persistence and Resuming

Expand Down
8 changes: 8 additions & 0 deletions culprit_finder/src/culprit_finder/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ def main() -> None:
action="store_true",
help="Deletes the local state file before execution",
)
parser.add_argument(
"--no-cache",
action="store_true",
help="Disabled cached results. This will run the workflow on all commits.",
)

args = parser.parse_args()

Expand All @@ -67,6 +72,7 @@ def main() -> None:
logging.error("Not authenticated with GitHub CLI or GH_TOKEN env var is not set.")
sys.exit(1)

use_cache = not args.no_cache
state_persister = culprit_finder_state.StatePersister(
repo=args.repo, workflow=args.workflow
)
Expand Down Expand Up @@ -107,6 +113,7 @@ def main() -> None:
logging.info("Start commit: %s", args.start)
logging.info("End commit: %s", args.end)
logging.info("Workflow: %s", args.workflow)
logging.info("Use cache: %s", use_cache)

has_culprit_finder_workflow = any(
wf["path"] == ".github/workflows/culprit_finder.yml"
Expand All @@ -124,6 +131,7 @@ def main() -> None:
github_client=gh_client,
state=state,
state_persister=state_persister,
use_cache=use_cache,
)

try:
Expand Down
109 changes: 80 additions & 29 deletions culprit_finder/src/culprit_finder/culprit_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(
github_client: github.GithubClient,
state: culprit_finder_state.CulpritFinderState,
state_persister: culprit_finder_state.StatePersister,
use_cache: bool = True,
):
"""
Initializes the CulpritFinder instance.
Expand All @@ -40,6 +41,7 @@ def __init__(
github_client: The GithubClient instance used to interact with GitHub.
state: The CulpritFinderState object containing the current bisection state.
state_persister: The StatePersister object used to save the bisection state.
use_cache: Whether to use the cached results from previous runs. Defaults to True.
"""
self._repo = repo
self._start_sha = start_sha
Expand All @@ -50,6 +52,7 @@ def __init__(
self._gh_client = github_client
self._state = state
self._state_persister = state_persister
self._use_cache = use_cache

def _wait_for_workflow_completion(
self,
Expand All @@ -75,7 +78,9 @@ def _wait_for_workflow_completion(
"""
start_time = time.time()
while time.time() - start_time < timeout:
latest_run = self._gh_client.get_latest_run(workflow_file, branch_name)
latest_run = self._gh_client.get_latest_run(
workflow_file=workflow_file, branch=branch_name, event="workflow_dispatch"
)

if not latest_run:
logging.info(
Expand Down Expand Up @@ -136,7 +141,9 @@ def _test_commit(
)

# Get the ID of the previous run (if any) to distinguish it from the new one we are about to trigger
previous_run = self._gh_client.get_latest_run(workflow_to_trigger, branch_name)
previous_run = self._gh_client.get_latest_run(
workflow_file=workflow_to_trigger, branch=branch_name, event="workflow_dispatch"
)
previous_run_id = previous_run["databaseId"] if previous_run else None

self._gh_client.trigger_workflow(
Expand All @@ -157,6 +164,68 @@ def _test_commit(

return run["conclusion"] == "success"

def _check_existing_run(self, commit_sha: str) -> bool | None:
"""
Checks for an existing workflow run for the commit.

Args:
commit_sha: The SHA of the commit to check for existing runs.

Returns:
True if a successful run is found, False if a failed run is found,
or None if no completed run exists.
"""
previous_run = self._gh_client.get_latest_run(
workflow_file=self._workflow_file, commit=commit_sha, status="completed"
)
if previous_run:
logging.info(
"Found result from previous run for commit %s, skipping test", commit_sha
)
return previous_run["conclusion"] == "success"
return None

def _execute_test_with_branch(self, commit_sha: str) -> bool:
"""
Creates a branch, runs the test, and cleans up.

Args:
commit_sha: The SHA of the commit to be tested.

Returns:
True if the test passed, False otherwise.
"""
branch_name = f"culprit-finder/test-{commit_sha}_{uuid.uuid4()}"

# Ensure the branch does not exist from a previous run
if not self._gh_client.check_branch_exists(branch_name):
self._gh_client.create_branch(branch_name, commit_sha)
logging.info("Created branch %s", branch_name)
self._gh_client.wait_for_branch_creation(branch_name, timeout=180)

try:
return self._test_commit(commit_sha, branch_name)
finally:
if self._gh_client.check_branch_exists(branch_name):
logging.info("Deleting branch %s", branch_name)
self._gh_client.delete_branch(branch_name)

def _update_state(self, commit_sha: str, is_good: bool) -> None:
"""
Updates the state and persists it.

Args:
commit_sha: The SHA of the commit that was tested.
is_good: Whether the commit was identified as good (True) or bad (False).
"""
if is_good:
self._state["current_good"] = commit_sha
self._state["cache"][commit_sha] = "PASS"
else:
self._state["current_bad"] = commit_sha
self._state["cache"][commit_sha] = "FAIL"
self._state_persister.save(self._state)

def run_bisection(self) -> github.Commit | None:
"""
Runs bisection logic (binary search) to find the culprit commit for a GitHub workflow.
Expand Down Expand Up @@ -184,47 +253,29 @@ def run_bisection(self) -> github.Commit | None:
while bad_idx - good_idx > 1:
mid_idx = (good_idx + bad_idx) // 2
commit_sha = commits[mid_idx]["sha"]
is_good = None
is_cached = False

if commit_sha in self._state["cache"]:
logging.info("Using cached result for commit %s", commit_sha)
is_good = self._state["cache"][commit_sha] == "PASS"
is_cached = True

if is_good:
good_idx = mid_idx
logging.info("Commit %s is good", commit_sha)
else:
bad_idx = mid_idx
logging.info("Commit %s is bad", commit_sha)

continue

branch_name = f"culprit-finder/test-{commit_sha}_{uuid.uuid4()}"

# Ensure the branch does not exist from a previous run
if not self._gh_client.check_branch_exists(branch_name):
self._gh_client.create_branch(branch_name, commit_sha)
logging.info("Created branch %s", branch_name)
self._gh_client.wait_for_branch_creation(branch_name, timeout=180)
if is_good is None and self._use_cache:
is_good = self._check_existing_run(commit_sha)

try:
is_good = self._test_commit(commit_sha, branch_name)
finally:
if self._gh_client.check_branch_exists(branch_name):
logging.info("Deleting branch %s", branch_name)
self._gh_client.delete_branch(branch_name)
if is_good is None:
is_good = self._execute_test_with_branch(commit_sha)

if is_good:
good_idx = mid_idx
self._state["current_good"] = commit_sha
self._state["cache"][commit_sha] = "PASS"
logging.info("Commit %s is good", commit_sha)
else:
bad_idx = mid_idx
self._state["current_bad"] = commit_sha
self._state["cache"][commit_sha] = "FAIL"
logging.info("Commit %s is bad", commit_sha)

self._state_persister.save(self._state)
if not is_cached:
self._update_state(commit_sha, is_good)

if bad_idx == len(commits):
return None
Expand Down
29 changes: 22 additions & 7 deletions culprit_finder/src/culprit_finder/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,24 @@ def trigger_workflow(

self._run_command(cmd)

def get_latest_run(self, workflow_file: str, branch: str) -> Run | None:
def get_latest_run(
self,
workflow_file: str,
branch: Optional[str] = None,
commit: Optional[str] = None,
event: Optional[str] = None,
status: Optional[str] = None,
) -> Run | None:
"""
Gets the latest workflow run for a specific branch and workflow.

Args:
workflow_file: The filename or ID of the workflow to query.
branch: The git branch reference to filter runs by.
workflow_file: The filename or ID of the workflow to query (e.g., "build.yml" or "12345").
branch: Optional. The git branch reference to filter runs by (e.g., "main", "feature-branch").
commit: Optional. The commit SHA to filter runs by.
event: Optional. The workflow event type to filter runs by (e.g., "push", "pull_request", "workflow_dispatch").
status: Optional. The run status to filter runs by (e.g., "completed", "in_progress", "queued", "failure").


Returns:
A dictionary representing the latest workflow run object (containing fields like
Expand All @@ -154,17 +165,21 @@ def get_latest_run(self, workflow_file: str, branch: str) -> Run | None:
"list",
"--workflow",
workflow_file,
"--branch",
branch,
"--event",
"workflow_dispatch",
"--limit",
"1",
"--json",
fields,
"--repo",
self.repo,
]
if branch:
cmd.extend(["--branch", branch])
if commit:
cmd.extend(["--commit", commit])
if event:
cmd.extend(["--event", event])
if status:
cmd.extend(["--status", status])

output = self._run_command(cmd)
runs = json.loads(output)
Expand Down
2 changes: 2 additions & 0 deletions culprit_finder/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ def test_cli_success(
workflow_file="test.yml",
has_culprit_finder_workflow=has_culprit_workflow,
github_client=mock_gh_client_instance,
use_cache=True,
state=expected_state,
state_persister=patches["state_persister_inst"],
)
Expand Down Expand Up @@ -310,6 +311,7 @@ def test_cli_state_management(
state=existing_state,
github_client=mock_gh_client_instance,
state_persister=patches["state_persister_inst"],
use_cache=True,
)
else:
# If not exists or discarded, new state created
Expand Down
Loading