diff --git a/mod_ci/controllers.py b/mod_ci/controllers.py index 36510f16..1414e1b0 100755 --- a/mod_ci/controllers.py +++ b/mod_ci/controllers.py @@ -1,6 +1,8 @@ """maintains all functionality related running virtual machines, starting and tracking tests.""" +import base64 import datetime +import fnmatch import hashlib import json import os @@ -15,6 +17,7 @@ import googleapiclient.discovery import requests +import yaml from flask import (Blueprint, abort, flash, g, jsonify, redirect, request, url_for) from github import (Auth, Commit, Github, GithubException, GithubObject, @@ -682,6 +685,126 @@ def update_github_status(): return db_success and github_success +# Mapping of workflow names to their YAML files in the repository +WORKFLOW_FILES = { + "Build CCExtractor on Linux": ".github/workflows/build_linux.yml", + "Build CCExtractor on Windows": ".github/workflows/build_windows.yml", +} + +# Cache for workflow path filters (refreshed periodically) +_workflow_filters_cache: Dict[str, tuple] = {} # {workflow_name: (filters, timestamp)} +WORKFLOW_CACHE_TTL = 3600 # 1 hour cache TTL + + +def _get_workflow_path_filters(repository, workflow_name: str, log) -> Optional[list]: + """ + Fetch and parse the workflow YAML to extract path filters. + + Returns a list of path patterns that trigger the workflow, or None if + the workflow has no path filters (runs on all changes). + + :param repository: GitHub repository object + :param workflow_name: Name of the workflow (e.g., "Build CCExtractor on Linux") + :param log: Logger instance + :return: List of path patterns, or None if no filters + """ + import time as time_module + + # Check cache first + if workflow_name in _workflow_filters_cache: + filters, cached_at = _workflow_filters_cache[workflow_name] + if time_module.time() - cached_at < WORKFLOW_CACHE_TTL: + return filters + + workflow_file = WORKFLOW_FILES.get(workflow_name) + if not workflow_file: + log.warning(f"Unknown workflow: {workflow_name}") + return None + + try: + # Fetch the workflow file content from the repository + contents = repository.get_contents(workflow_file) + workflow_yaml = base64.b64decode(contents.content).decode('utf-8') + + # Parse the YAML + workflow_config = yaml.safe_load(workflow_yaml) + + # Extract path filters from 'on.push.paths' and 'on.pull_request.paths' + on_config = workflow_config.get('on', {}) + path_filters = set() + + # Check push paths + push_config = on_config.get('push', {}) + if isinstance(push_config, dict) and 'paths' in push_config: + path_filters.update(push_config['paths']) + + # Check pull_request paths + pr_config = on_config.get('pull_request', {}) + if isinstance(pr_config, dict) and 'paths' in pr_config: + path_filters.update(pr_config['paths']) + + filters = list(path_filters) if path_filters else None + + # Cache the result + _workflow_filters_cache[workflow_name] = (filters, time_module.time()) + + log.debug(f"Workflow '{workflow_name}' path filters: {filters}") + return filters + + except Exception as e: + log.warning(f"Failed to fetch/parse workflow file {workflow_file}: {e}") + return None # Assume no filters on error (will retry) + + +def _will_workflow_run_for_commit(repository, commit_sha: str, workflow_name: str, log) -> Optional[bool]: + """ + Check if a workflow will run for a given commit based on path filters. + + :param repository: GitHub repository object + :param commit_sha: The commit SHA to check + :param workflow_name: Name of the workflow + :param log: Logger instance + :return: True if workflow will run (files match), False if it won't (no match), + None if cannot determine (no path filters or error) + """ + # Get the workflow's path filters + path_filters = _get_workflow_path_filters(repository, workflow_name, log) + + if path_filters is None: + # No path filters means workflow runs on all changes + return None + + try: + # Get the list of files changed in this commit + commit = repository.get_commit(commit_sha) + changed_files = [f.filename for f in commit.files] + + log.debug(f"Commit {commit_sha[:7]} changed {len(changed_files)} files") + + # Check if any changed file matches any path filter + for changed_file in changed_files: + for pattern in path_filters: + # Handle ** patterns (match any path depth) + if '**' in pattern: + # Convert ** glob to regex-compatible pattern + regex_pattern = pattern.replace('**/', '(.*/)?').replace('**', '.*') + regex_pattern = regex_pattern.replace('*', '[^/]*') + if re.match(regex_pattern, changed_file): + log.debug(f"File '{changed_file}' matches pattern '{pattern}'") + return True + elif fnmatch.fnmatch(changed_file, pattern): + log.debug(f"File '{changed_file}' matches pattern '{pattern}'") + return True + + # No files matched any pattern + log.info(f"Commit {commit_sha[:7]}: No changed files match workflow '{workflow_name}' path filters") + return False + + except Exception as e: + log.warning(f"Failed to check commit files against workflow filters: {e}") + return None # Cannot determine + + def _diagnose_missing_artifact(repository, commit_sha: str, platform, log) -> tuple: """ Diagnose why an artifact was not found for a commit. @@ -703,6 +826,16 @@ def _diagnose_missing_artifact(repository, commit_sha: str, platform, log) -> tu else: expected_workflow = Workflow_builds.WINDOWS + # First, check if the workflow will even run based on path filters + will_run = _will_workflow_run_for_commit(repository, commit_sha, expected_workflow, log) + if will_run is False: + # Workflow will definitely NOT run - no matching files in commit + message = (f"No build will be created: commit {commit_sha[:7]} does not modify any files " + f"that trigger the '{expected_workflow}' workflow. " + f"Only documentation or non-code files were changed.") + log.info(f"Commit {commit_sha[:7]}: workflow '{expected_workflow}' will not run due to path filters") + return (message, False) # NOT retryable - workflow will never run + try: # Build workflow name lookup workflow: Dict[int, Optional[str]] = defaultdict(lambda: None) @@ -762,8 +895,33 @@ def _diagnose_missing_artifact(repository, commit_sha: str, platform, log) -> tu return (message, False) # Not retryable if not workflow_found: - # No workflow run found - could be queued or not triggered - # This is RETRYABLE (workflow might be queued or path filters excluded it) + # No workflow run found - could be queued, not triggered, or too old + # Check commit age to determine if we should keep waiting + MAX_WAIT_HOURS = 3 + try: + from datetime import datetime, timedelta, timezone + commit_obj = repository.get_commit(commit_sha) + commit_date = commit_obj.commit.author.date + if commit_date.tzinfo is None: + commit_date = commit_date.replace(tzinfo=timezone.utc) + now = datetime.now(timezone.utc) + commit_age = now - commit_date + + if commit_age > timedelta(hours=MAX_WAIT_HOURS): + # Commit is too old - workflow was never triggered or history expired + hours_old = commit_age.total_seconds() / 3600 + message = (f"No build available: '{expected_workflow}' never ran for commit " + f"{commit_sha[:7]} (commit is {hours_old:.1f} hours old). " + f"The workflow was likely not triggered due to path filters, " + f"or the commit predates the workflow configuration.") + log.info(f"Commit {commit_sha[:7]} is {hours_old:.1f} hours old with no workflow run - " + f"marking as permanent failure") + return (message, False) # NOT retryable - too old + except Exception as e: + log.warning(f"Could not check commit age: {e}") + # Fall through to retryable on error + + # Commit is recent enough - workflow might still be queued message = (f"No workflow run found: '{expected_workflow}' has not run for commit " f"{commit_sha[:7]}. The workflow may be queued, or was not triggered " f"due to path filters. Will retry.") diff --git a/requirements.txt b/requirements.txt index ef3bfccb..558018ee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,3 +26,4 @@ cffi==2.0.0 PyGithub==2.8.1 blinker==1.9.0 click==8.3.1 +PyYAML==6.0.2 diff --git a/tests/test_ci/test_controllers.py b/tests/test_ci/test_controllers.py index 24643f00..cf62edd2 100644 --- a/tests/test_ci/test_controllers.py +++ b/tests/test_ci/test_controllers.py @@ -3490,8 +3490,10 @@ def test_old_completed_build_is_not_retryable(self): self.assertFalse(is_retryable) self.assertIn("Artifact not found", message) - def test_no_workflow_run_is_retryable(self): - """Test that missing workflow run returns retryable=True.""" + def test_no_workflow_run_recent_commit_is_retryable(self): + """Test that missing workflow run for a recent commit returns retryable=True.""" + from datetime import datetime, timezone + from mod_ci.controllers import _diagnose_missing_artifact repository = MagicMock() @@ -3506,6 +3508,11 @@ def test_no_workflow_run_is_retryable(self): # No workflow runs for this commit repository.get_workflow_runs.return_value = [] + # Mock a recent commit (1 hour ago) + commit_obj = MagicMock() + commit_obj.commit.author.date = datetime.now(timezone.utc) + repository.get_commit.return_value = commit_obj + message, is_retryable = _diagnose_missing_artifact( repository, "abc123", TestPlatform.linux, log ) @@ -3514,6 +3521,38 @@ def test_no_workflow_run_is_retryable(self): self.assertIn("No workflow run found", message) self.assertIn("Will retry", message) + def test_no_workflow_run_old_commit_not_retryable(self): + """Test that missing workflow run for an old commit returns retryable=False.""" + from datetime import datetime, timedelta, timezone + + from mod_ci.controllers import _diagnose_missing_artifact + + repository = MagicMock() + log = MagicMock() + + # Mock workflow + workflow = MagicMock() + workflow.id = 1 + workflow.name = "Build CCExtractor on Linux" + repository.get_workflows.return_value = [workflow] + + # No workflow runs for this commit + repository.get_workflow_runs.return_value = [] + + # Mock an old commit (5 hours ago - beyond the 3 hour limit) + commit_obj = MagicMock() + commit_obj.commit.author.date = datetime.now(timezone.utc) - timedelta(hours=5) + repository.get_commit.return_value = commit_obj + + message, is_retryable = _diagnose_missing_artifact( + repository, "abc123", TestPlatform.linux, log + ) + + self.assertFalse(is_retryable) + self.assertIn("No build available", message) + self.assertIn("never ran", message) + self.assertIn("5.0 hours old", message) + def test_diagnostic_exception_is_retryable(self): """Test that diagnostic failures default to retryable=True.""" from mod_ci.controllers import _diagnose_missing_artifact @@ -3531,6 +3570,118 @@ def test_diagnostic_exception_is_retryable(self): self.assertTrue(is_retryable) self.assertIn("diagnostic check failed", message) + @mock.patch('mod_ci.controllers._will_workflow_run_for_commit') + def test_workflow_will_not_run_not_retryable(self, mock_will_run): + """Test that when workflow won't run due to path filters, it's not retryable.""" + from mod_ci.controllers import _diagnose_missing_artifact + + repository = MagicMock() + log = MagicMock() + + # Workflow will NOT run due to path filters + mock_will_run.return_value = False + + message, is_retryable = _diagnose_missing_artifact( + repository, "abc123", TestPlatform.linux, log + ) + + self.assertFalse(is_retryable) + self.assertIn("No build will be created", message) + self.assertIn("does not modify any files", message) + + +class TestWorkflowPathFilters(BaseTestCase): + """Test the workflow path filter checking functions.""" + + @mock.patch('mod_ci.controllers._get_workflow_path_filters') + def test_will_workflow_run_matches_c_file(self, mock_get_filters): + """Test that changing a .c file triggers the workflow.""" + from mod_ci.controllers import _will_workflow_run_for_commit + + repository = MagicMock() + log = MagicMock() + + # Mock path filters + mock_get_filters.return_value = ['**.c', '**.h', 'linux/**'] + + # Mock commit with a .c file changed + mock_file = MagicMock() + mock_file.filename = 'src/ccextractor.c' + mock_commit = MagicMock() + mock_commit.files = [mock_file] + repository.get_commit.return_value = mock_commit + + result = _will_workflow_run_for_commit( + repository, "abc123", "Build CCExtractor on Linux", log + ) + + self.assertTrue(result) + + @mock.patch('mod_ci.controllers._get_workflow_path_filters') + def test_will_workflow_run_no_match_docs_only(self, mock_get_filters): + """Test that changing only docs doesn't trigger the workflow.""" + from mod_ci.controllers import _will_workflow_run_for_commit + + repository = MagicMock() + log = MagicMock() + + # Mock path filters + mock_get_filters.return_value = ['**.c', '**.h', 'linux/**'] + + # Mock commit with only README changed + mock_file = MagicMock() + mock_file.filename = 'README.md' + mock_commit = MagicMock() + mock_commit.files = [mock_file] + repository.get_commit.return_value = mock_commit + + result = _will_workflow_run_for_commit( + repository, "abc123", "Build CCExtractor on Linux", log + ) + + self.assertFalse(result) + + @mock.patch('mod_ci.controllers._get_workflow_path_filters') + def test_will_workflow_run_no_filters_returns_none(self, mock_get_filters): + """Test that workflows without path filters return None.""" + from mod_ci.controllers import _will_workflow_run_for_commit + + repository = MagicMock() + log = MagicMock() + + # No path filters (workflow runs on everything) + mock_get_filters.return_value = None + + result = _will_workflow_run_for_commit( + repository, "abc123", "Build CCExtractor on Linux", log + ) + + self.assertIsNone(result) + + @mock.patch('mod_ci.controllers._get_workflow_path_filters') + def test_will_workflow_run_matches_subdirectory(self, mock_get_filters): + """Test that ** patterns match subdirectories.""" + from mod_ci.controllers import _will_workflow_run_for_commit + + repository = MagicMock() + log = MagicMock() + + # Mock path filters with ** pattern + mock_get_filters.return_value = ['src/rust/**'] + + # Mock commit with a file in src/rust subdirectory + mock_file = MagicMock() + mock_file.filename = 'src/rust/lib_ccxr/src/main.rs' + mock_commit = MagicMock() + mock_commit.files = [mock_file] + repository.get_commit.return_value = mock_commit + + result = _will_workflow_run_for_commit( + repository, "abc123", "Build CCExtractor on Linux", log + ) + + self.assertTrue(result) + class TestStartTestRetryBehavior(BaseTestCase): """Test that start_test correctly handles retryable vs permanent failures."""