Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 160 additions & 2 deletions mod_ci/controllers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""maintains all functionality related running virtual machines, starting and tracking tests."""

import base64
import datetime
import fnmatch
import hashlib
import json
import os
Expand All @@ -15,6 +17,7 @@

import googleapiclient.discovery
import requests
import yaml
from flask import (Blueprint, abort, flash, g, jsonify, redirect, request,
url_for)
from github import (Auth, Commit, Github, GithubException, GithubObject,
Expand Down Expand Up @@ -682,6 +685,126 @@ def update_github_status():
return db_success and github_success


# Mapping of workflow names to their YAML files in the repository
WORKFLOW_FILES = {
"Build CCExtractor on Linux": ".github/workflows/build_linux.yml",
"Build CCExtractor on Windows": ".github/workflows/build_windows.yml",
}

# Cache for workflow path filters (refreshed periodically)
_workflow_filters_cache: Dict[str, tuple] = {} # {workflow_name: (filters, timestamp)}
WORKFLOW_CACHE_TTL = 3600 # 1 hour cache TTL


def _get_workflow_path_filters(repository, workflow_name: str, log) -> Optional[list]:
"""
Fetch and parse the workflow YAML to extract path filters.

Returns a list of path patterns that trigger the workflow, or None if
the workflow has no path filters (runs on all changes).

:param repository: GitHub repository object
:param workflow_name: Name of the workflow (e.g., "Build CCExtractor on Linux")
:param log: Logger instance
:return: List of path patterns, or None if no filters
"""
import time as time_module

# Check cache first
if workflow_name in _workflow_filters_cache:
filters, cached_at = _workflow_filters_cache[workflow_name]
if time_module.time() - cached_at < WORKFLOW_CACHE_TTL:
return filters

workflow_file = WORKFLOW_FILES.get(workflow_name)
if not workflow_file:
log.warning(f"Unknown workflow: {workflow_name}")
return None

try:
# Fetch the workflow file content from the repository
contents = repository.get_contents(workflow_file)
workflow_yaml = base64.b64decode(contents.content).decode('utf-8')

# Parse the YAML
workflow_config = yaml.safe_load(workflow_yaml)

# Extract path filters from 'on.push.paths' and 'on.pull_request.paths'
on_config = workflow_config.get('on', {})
path_filters = set()

# Check push paths
push_config = on_config.get('push', {})
if isinstance(push_config, dict) and 'paths' in push_config:
path_filters.update(push_config['paths'])

# Check pull_request paths
pr_config = on_config.get('pull_request', {})
if isinstance(pr_config, dict) and 'paths' in pr_config:
path_filters.update(pr_config['paths'])

filters = list(path_filters) if path_filters else None

# Cache the result
_workflow_filters_cache[workflow_name] = (filters, time_module.time())

log.debug(f"Workflow '{workflow_name}' path filters: {filters}")
return filters

except Exception as e:
log.warning(f"Failed to fetch/parse workflow file {workflow_file}: {e}")
return None # Assume no filters on error (will retry)


def _will_workflow_run_for_commit(repository, commit_sha: str, workflow_name: str, log) -> Optional[bool]:
"""
Check if a workflow will run for a given commit based on path filters.

:param repository: GitHub repository object
:param commit_sha: The commit SHA to check
:param workflow_name: Name of the workflow
:param log: Logger instance
:return: True if workflow will run (files match), False if it won't (no match),
None if cannot determine (no path filters or error)
"""
# Get the workflow's path filters
path_filters = _get_workflow_path_filters(repository, workflow_name, log)

if path_filters is None:
# No path filters means workflow runs on all changes
return None

try:
# Get the list of files changed in this commit
commit = repository.get_commit(commit_sha)
changed_files = [f.filename for f in commit.files]

log.debug(f"Commit {commit_sha[:7]} changed {len(changed_files)} files")

# Check if any changed file matches any path filter
for changed_file in changed_files:
for pattern in path_filters:
# Handle ** patterns (match any path depth)
if '**' in pattern:
# Convert ** glob to regex-compatible pattern
regex_pattern = pattern.replace('**/', '(.*/)?').replace('**', '.*')
regex_pattern = regex_pattern.replace('*', '[^/]*')
if re.match(regex_pattern, changed_file):
log.debug(f"File '{changed_file}' matches pattern '{pattern}'")
return True
elif fnmatch.fnmatch(changed_file, pattern):
log.debug(f"File '{changed_file}' matches pattern '{pattern}'")
return True

# No files matched any pattern
log.info(f"Commit {commit_sha[:7]}: No changed files match workflow '{workflow_name}' path filters")
return False

except Exception as e:
log.warning(f"Failed to check commit files against workflow filters: {e}")
return None # Cannot determine


def _diagnose_missing_artifact(repository, commit_sha: str, platform, log) -> tuple:
"""
Diagnose why an artifact was not found for a commit.
Expand All @@ -703,6 +826,16 @@ def _diagnose_missing_artifact(repository, commit_sha: str, platform, log) -> tu
else:
expected_workflow = Workflow_builds.WINDOWS

# First, check if the workflow will even run based on path filters
will_run = _will_workflow_run_for_commit(repository, commit_sha, expected_workflow, log)
if will_run is False:
# Workflow will definitely NOT run - no matching files in commit
message = (f"No build will be created: commit {commit_sha[:7]} does not modify any files "
f"that trigger the '{expected_workflow}' workflow. "
f"Only documentation or non-code files were changed.")
log.info(f"Commit {commit_sha[:7]}: workflow '{expected_workflow}' will not run due to path filters")
return (message, False) # NOT retryable - workflow will never run

try:
# Build workflow name lookup
workflow: Dict[int, Optional[str]] = defaultdict(lambda: None)
Expand Down Expand Up @@ -762,8 +895,33 @@ def _diagnose_missing_artifact(repository, commit_sha: str, platform, log) -> tu
return (message, False) # Not retryable

if not workflow_found:
# No workflow run found - could be queued or not triggered
# This is RETRYABLE (workflow might be queued or path filters excluded it)
# No workflow run found - could be queued, not triggered, or too old
# Check commit age to determine if we should keep waiting
MAX_WAIT_HOURS = 3
try:
from datetime import datetime, timedelta, timezone
commit_obj = repository.get_commit(commit_sha)
commit_date = commit_obj.commit.author.date
if commit_date.tzinfo is None:
commit_date = commit_date.replace(tzinfo=timezone.utc)
now = datetime.now(timezone.utc)
commit_age = now - commit_date

if commit_age > timedelta(hours=MAX_WAIT_HOURS):
# Commit is too old - workflow was never triggered or history expired
hours_old = commit_age.total_seconds() / 3600
message = (f"No build available: '{expected_workflow}' never ran for commit "
f"{commit_sha[:7]} (commit is {hours_old:.1f} hours old). "
f"The workflow was likely not triggered due to path filters, "
f"or the commit predates the workflow configuration.")
log.info(f"Commit {commit_sha[:7]} is {hours_old:.1f} hours old with no workflow run - "
f"marking as permanent failure")
return (message, False) # NOT retryable - too old
except Exception as e:
log.warning(f"Could not check commit age: {e}")
# Fall through to retryable on error

# Commit is recent enough - workflow might still be queued
message = (f"No workflow run found: '{expected_workflow}' has not run for commit "
f"{commit_sha[:7]}. The workflow may be queued, or was not triggered "
f"due to path filters. Will retry.")
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ cffi==2.0.0
PyGithub==2.8.1
blinker==1.9.0
click==8.3.1
PyYAML==6.0.2
155 changes: 153 additions & 2 deletions tests/test_ci/test_controllers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3490,8 +3490,10 @@ def test_old_completed_build_is_not_retryable(self):
self.assertFalse(is_retryable)
self.assertIn("Artifact not found", message)

def test_no_workflow_run_is_retryable(self):
"""Test that missing workflow run returns retryable=True."""
def test_no_workflow_run_recent_commit_is_retryable(self):
"""Test that missing workflow run for a recent commit returns retryable=True."""
from datetime import datetime, timezone

from mod_ci.controllers import _diagnose_missing_artifact

repository = MagicMock()
Expand All @@ -3506,6 +3508,11 @@ def test_no_workflow_run_is_retryable(self):
# No workflow runs for this commit
repository.get_workflow_runs.return_value = []

# Mock a recent commit (1 hour ago)
commit_obj = MagicMock()
commit_obj.commit.author.date = datetime.now(timezone.utc)
repository.get_commit.return_value = commit_obj

message, is_retryable = _diagnose_missing_artifact(
repository, "abc123", TestPlatform.linux, log
)
Expand All @@ -3514,6 +3521,38 @@ def test_no_workflow_run_is_retryable(self):
self.assertIn("No workflow run found", message)
self.assertIn("Will retry", message)

def test_no_workflow_run_old_commit_not_retryable(self):
"""Test that missing workflow run for an old commit returns retryable=False."""
from datetime import datetime, timedelta, timezone

from mod_ci.controllers import _diagnose_missing_artifact

repository = MagicMock()
log = MagicMock()

# Mock workflow
workflow = MagicMock()
workflow.id = 1
workflow.name = "Build CCExtractor on Linux"
repository.get_workflows.return_value = [workflow]

# No workflow runs for this commit
repository.get_workflow_runs.return_value = []

# Mock an old commit (5 hours ago - beyond the 3 hour limit)
commit_obj = MagicMock()
commit_obj.commit.author.date = datetime.now(timezone.utc) - timedelta(hours=5)
repository.get_commit.return_value = commit_obj

message, is_retryable = _diagnose_missing_artifact(
repository, "abc123", TestPlatform.linux, log
)

self.assertFalse(is_retryable)
self.assertIn("No build available", message)
self.assertIn("never ran", message)
self.assertIn("5.0 hours old", message)

def test_diagnostic_exception_is_retryable(self):
"""Test that diagnostic failures default to retryable=True."""
from mod_ci.controllers import _diagnose_missing_artifact
Expand All @@ -3531,6 +3570,118 @@ def test_diagnostic_exception_is_retryable(self):
self.assertTrue(is_retryable)
self.assertIn("diagnostic check failed", message)

@mock.patch('mod_ci.controllers._will_workflow_run_for_commit')
def test_workflow_will_not_run_not_retryable(self, mock_will_run):
"""Test that when workflow won't run due to path filters, it's not retryable."""
from mod_ci.controllers import _diagnose_missing_artifact

repository = MagicMock()
log = MagicMock()

# Workflow will NOT run due to path filters
mock_will_run.return_value = False

message, is_retryable = _diagnose_missing_artifact(
repository, "abc123", TestPlatform.linux, log
)

self.assertFalse(is_retryable)
self.assertIn("No build will be created", message)
self.assertIn("does not modify any files", message)


class TestWorkflowPathFilters(BaseTestCase):
"""Test the workflow path filter checking functions."""

@mock.patch('mod_ci.controllers._get_workflow_path_filters')
def test_will_workflow_run_matches_c_file(self, mock_get_filters):
"""Test that changing a .c file triggers the workflow."""
from mod_ci.controllers import _will_workflow_run_for_commit

repository = MagicMock()
log = MagicMock()

# Mock path filters
mock_get_filters.return_value = ['**.c', '**.h', 'linux/**']

# Mock commit with a .c file changed
mock_file = MagicMock()
mock_file.filename = 'src/ccextractor.c'
mock_commit = MagicMock()
mock_commit.files = [mock_file]
repository.get_commit.return_value = mock_commit

result = _will_workflow_run_for_commit(
repository, "abc123", "Build CCExtractor on Linux", log
)

self.assertTrue(result)

@mock.patch('mod_ci.controllers._get_workflow_path_filters')
def test_will_workflow_run_no_match_docs_only(self, mock_get_filters):
"""Test that changing only docs doesn't trigger the workflow."""
from mod_ci.controllers import _will_workflow_run_for_commit

repository = MagicMock()
log = MagicMock()

# Mock path filters
mock_get_filters.return_value = ['**.c', '**.h', 'linux/**']

# Mock commit with only README changed
mock_file = MagicMock()
mock_file.filename = 'README.md'
mock_commit = MagicMock()
mock_commit.files = [mock_file]
repository.get_commit.return_value = mock_commit

result = _will_workflow_run_for_commit(
repository, "abc123", "Build CCExtractor on Linux", log
)

self.assertFalse(result)

@mock.patch('mod_ci.controllers._get_workflow_path_filters')
def test_will_workflow_run_no_filters_returns_none(self, mock_get_filters):
"""Test that workflows without path filters return None."""
from mod_ci.controllers import _will_workflow_run_for_commit

repository = MagicMock()
log = MagicMock()

# No path filters (workflow runs on everything)
mock_get_filters.return_value = None

result = _will_workflow_run_for_commit(
repository, "abc123", "Build CCExtractor on Linux", log
)

self.assertIsNone(result)

@mock.patch('mod_ci.controllers._get_workflow_path_filters')
def test_will_workflow_run_matches_subdirectory(self, mock_get_filters):
"""Test that ** patterns match subdirectories."""
from mod_ci.controllers import _will_workflow_run_for_commit

repository = MagicMock()
log = MagicMock()

# Mock path filters with ** pattern
mock_get_filters.return_value = ['src/rust/**']

# Mock commit with a file in src/rust subdirectory
mock_file = MagicMock()
mock_file.filename = 'src/rust/lib_ccxr/src/main.rs'
mock_commit = MagicMock()
mock_commit.files = [mock_file]
repository.get_commit.return_value = mock_commit

result = _will_workflow_run_for_commit(
repository, "abc123", "Build CCExtractor on Linux", log
)

self.assertTrue(result)


class TestStartTestRetryBehavior(BaseTestCase):
"""Test that start_test correctly handles retryable vs permanent failures."""
Expand Down