fix: detect when workflow won't run due to path filters #1015

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

canihavesomecoffee merged 3 commits into master from fix/artifact-path-filter-check

Feb 1, 2026

+314 −4

mod_ci/controllers.py

-Original file line number
+Diff line change
@@ -1,6 +1,8 @@
     """maintains all functionality related running virtual machines, starting and tracking tests."""
+    import base64
     import datetime
+    import fnmatch
     import hashlib
     import json
     import os
@@ Expand All / @@ -15,6 +17,7 @@ @@
     import googleapiclient.discovery
     import requests
+    import yaml
     from flask import (Blueprint, abort, flash, g, jsonify, redirect, request,
                        url_for)
     from github import (Auth, Commit, Github, GithubException, GithubObject,
@@ Expand Down Expand Up / @@ -682,6 +685,126 @@ def update_github_status(): @@
         return db_success and github_success
+    # Mapping of workflow names to their YAML files in the repository
+    WORKFLOW_FILES = {
+        "Build CCExtractor on Linux": ".github/workflows/build_linux.yml",
+        "Build CCExtractor on Windows": ".github/workflows/build_windows.yml",
+    }
+    # Cache for workflow path filters (refreshed periodically)
+    _workflow_filters_cache: Dict[str, tuple] = {}  # {workflow_name: (filters, timestamp)}
+    WORKFLOW_CACHE_TTL = 3600  # 1 hour cache TTL
+    def _get_workflow_path_filters(repository, workflow_name: str, log) -> Optional[list]:
+        """
+        Fetch and parse the workflow YAML to extract path filters.
+        Returns a list of path patterns that trigger the workflow, or None if
+        the workflow has no path filters (runs on all changes).
+        :param repository: GitHub repository object
+        :param workflow_name: Name of the workflow (e.g., "Build CCExtractor on Linux")
+        :param log: Logger instance
+        :return: List of path patterns, or None if no filters
+        """
+        import time as time_module
+        # Check cache first
+        if workflow_name in _workflow_filters_cache:
+            filters, cached_at = _workflow_filters_cache[workflow_name]
+            if time_module.time() - cached_at < WORKFLOW_CACHE_TTL:
+                return filters
+        workflow_file = WORKFLOW_FILES.get(workflow_name)
+        if not workflow_file:
+            log.warning(f"Unknown workflow: {workflow_name}")
+            return None
+        try:
+            # Fetch the workflow file content from the repository
+            contents = repository.get_contents(workflow_file)
+            workflow_yaml = base64.b64decode(contents.content).decode('utf-8')
+            # Parse the YAML
+            workflow_config = yaml.safe_load(workflow_yaml)
+            # Extract path filters from 'on.push.paths' and 'on.pull_request.paths'
+            on_config = workflow_config.get('on', {})
+            path_filters = set()
+            # Check push paths
+            push_config = on_config.get('push', {})
+            if isinstance(push_config, dict) and 'paths' in push_config:
+                path_filters.update(push_config['paths'])
+            # Check pull_request paths
+            pr_config = on_config.get('pull_request', {})
+            if isinstance(pr_config, dict) and 'paths' in pr_config:
+                path_filters.update(pr_config['paths'])
+            filters = list(path_filters) if path_filters else None
+            # Cache the result
+            _workflow_filters_cache[workflow_name] = (filters, time_module.time())
+            log.debug(f"Workflow '{workflow_name}' path filters: {filters}")
+            return filters
+        except Exception as e:
+            log.warning(f"Failed to fetch/parse workflow file {workflow_file}: {e}")
+            return None  # Assume no filters on error (will retry)
+    def _will_workflow_run_for_commit(repository, commit_sha: str, workflow_name: str, log) -> Optional[bool]:
+        """
+        Check if a workflow will run for a given commit based on path filters.
+        :param repository: GitHub repository object
+        :param commit_sha: The commit SHA to check
+        :param workflow_name: Name of the workflow
+        :param log: Logger instance
+        :return: True if workflow will run (files match), False if it won't (no match),
+                 None if cannot determine (no path filters or error)
+        """
+        # Get the workflow's path filters
+        path_filters = _get_workflow_path_filters(repository, workflow_name, log)
+        if path_filters is None:
+            # No path filters means workflow runs on all changes
+            return None
+        try:
+            # Get the list of files changed in this commit
+            commit = repository.get_commit(commit_sha)
+            changed_files = [f.filename for f in commit.files]
+            log.debug(f"Commit {commit_sha[:7]} changed {len(changed_files)} files")
+            # Check if any changed file matches any path filter
+            for changed_file in changed_files:
+                for pattern in path_filters:
+                    # Handle ** patterns (match any path depth)
+                    if '**' in pattern:
+                        # Convert ** glob to regex-compatible pattern
+                        regex_pattern = pattern.replace('**/', '(.*/)?').replace('**', '.*')
+                        regex_pattern = regex_pattern.replace('*', '[^/]*')
+                        if re.match(regex_pattern, changed_file):
+                            log.debug(f"File '{changed_file}' matches pattern '{pattern}'")
+                            return True
+                    elif fnmatch.fnmatch(changed_file, pattern):
+                        log.debug(f"File '{changed_file}' matches pattern '{pattern}'")
+                        return True
+            # No files matched any pattern
+            log.info(f"Commit {commit_sha[:7]}: No changed files match workflow '{workflow_name}' path filters")
+            return False
+        except Exception as e:
+            log.warning(f"Failed to check commit files against workflow filters: {e}")
+            return None  # Cannot determine
     def _diagnose_missing_artifact(repository, commit_sha: str, platform, log) -> tuple:
         """
         Diagnose why an artifact was not found for a commit.
@@ Expand All @@
         else:
             expected_workflow = Workflow_builds.WINDOWS
+        # First, check if the workflow will even run based on path filters
+        will_run = _will_workflow_run_for_commit(repository, commit_sha, expected_workflow, log)
+        if will_run is False:
+            # Workflow will definitely NOT run - no matching files in commit
+            message = (f"No build will be created: commit {commit_sha[:7]} does not modify any files "
+                       f"that trigger the '{expected_workflow}' workflow. "
+                       f"Only documentation or non-code files were changed.")
+            log.info(f"Commit {commit_sha[:7]}: workflow '{expected_workflow}' will not run due to path filters")
+            return (message, False)  # NOT retryable - workflow will never run
         try:
             # Build workflow name lookup
             workflow: Dict[int, Optional[str]] = defaultdict(lambda: None)
@@ Expand Down Expand Up @@
                     return (message, False)  # Not retryable
             if not workflow_found:
-                # No workflow run found - could be queued or not triggered
-                # This is RETRYABLE (workflow might be queued or path filters excluded it)
+                # No workflow run found - could be queued, not triggered, or too old
+                # Check commit age to determine if we should keep waiting
+                MAX_WAIT_HOURS = 3
+                try:
+                    from datetime import datetime, timedelta, timezone
+                    commit_obj = repository.get_commit(commit_sha)
+                    commit_date = commit_obj.commit.author.date
+                    if commit_date.tzinfo is None:
+                        commit_date = commit_date.replace(tzinfo=timezone.utc)
+                    now = datetime.now(timezone.utc)
+                    commit_age = now - commit_date
+                    if commit_age > timedelta(hours=MAX_WAIT_HOURS):
+                        # Commit is too old - workflow was never triggered or history expired
+                        hours_old = commit_age.total_seconds() / 3600
+                        message = (f"No build available: '{expected_workflow}' never ran for commit "
+                                   f"{commit_sha[:7]} (commit is {hours_old:.1f} hours old). "
+                                   f"The workflow was likely not triggered due to path filters, "
+                                   f"or the commit predates the workflow configuration.")
+                        log.info(f"Commit {commit_sha[:7]} is {hours_old:.1f} hours old with no workflow run - "
+                                 f"marking as permanent failure")
+                        return (message, False)  # NOT retryable - too old
+                except Exception as e:
+                    log.warning(f"Could not check commit age: {e}")
+                    # Fall through to retryable on error
+                # Commit is recent enough - workflow might still be queued
                 message = (f"No workflow run found: '{expected_workflow}' has not run for commit "
                            f"{commit_sha[:7]}. The workflow may be queued, or was not triggered "
                            f"due to path filters. Will retry.")
@@ Expand Down @@

requirements.txt

-Original file line number
+Diff line change
@@ Expand Up / @@ -26,3 +26,4 @@ cffi==2.0.0 @@
     PyGithub==2.8.1
     blinker==1.9.0
     click==8.3.1
+    PyYAML==6.0.2

tests/test_ci/test_controllers.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -3490,8 +3490,10 @@ def test_old_completed_build_is_not_retryable(self): @@
             self.assertFalse(is_retryable)
             self.assertIn("Artifact not found", message)
-        def test_no_workflow_run_is_retryable(self):
-            """Test that missing workflow run returns retryable=True."""
+        def test_no_workflow_run_recent_commit_is_retryable(self):
+            """Test that missing workflow run for a recent commit returns retryable=True."""
+            from datetime import datetime, timezone
             from mod_ci.controllers import _diagnose_missing_artifact
             repository = MagicMock()
@@ Expand All / @@ -3506,6 +3508,11 @@ def test_no_workflow_run_is_retryable(self): @@
             # No workflow runs for this commit
             repository.get_workflow_runs.return_value = []
+            # Mock a recent commit (1 hour ago)
+            commit_obj = MagicMock()
+            commit_obj.commit.author.date = datetime.now(timezone.utc)
+            repository.get_commit.return_value = commit_obj
             message, is_retryable = _diagnose_missing_artifact(
                 repository, "abc123", TestPlatform.linux, log
             )
@@ Expand All / @@ -3514,6 +3521,38 @@ def test_no_workflow_run_is_retryable(self): @@
             self.assertIn("No workflow run found", message)
             self.assertIn("Will retry", message)
+        def test_no_workflow_run_old_commit_not_retryable(self):
+            """Test that missing workflow run for an old commit returns retryable=False."""
+            from datetime import datetime, timedelta, timezone
+            from mod_ci.controllers import _diagnose_missing_artifact
+            repository = MagicMock()
+            log = MagicMock()
+            # Mock workflow
+            workflow = MagicMock()
+            workflow.id = 1
+            workflow.name = "Build CCExtractor on Linux"
+            repository.get_workflows.return_value = [workflow]
+            # No workflow runs for this commit
+            repository.get_workflow_runs.return_value = []
+            # Mock an old commit (5 hours ago - beyond the 3 hour limit)
+            commit_obj = MagicMock()
+            commit_obj.commit.author.date = datetime.now(timezone.utc) - timedelta(hours=5)
+            repository.get_commit.return_value = commit_obj
+            message, is_retryable = _diagnose_missing_artifact(
+                repository, "abc123", TestPlatform.linux, log
+            )
+            self.assertFalse(is_retryable)
+            self.assertIn("No build available", message)
+            self.assertIn("never ran", message)
+            self.assertIn("5.0 hours old", message)
         def test_diagnostic_exception_is_retryable(self):
             """Test that diagnostic failures default to retryable=True."""
             from mod_ci.controllers import _diagnose_missing_artifact
@@ Expand All / @@ -3531,6 +3570,118 @@ def test_diagnostic_exception_is_retryable(self): @@
             self.assertTrue(is_retryable)
             self.assertIn("diagnostic check failed", message)
+        @mock.patch('mod_ci.controllers._will_workflow_run_for_commit')
+        def test_workflow_will_not_run_not_retryable(self, mock_will_run):
+            """Test that when workflow won't run due to path filters, it's not retryable."""
+            from mod_ci.controllers import _diagnose_missing_artifact
+            repository = MagicMock()
+            log = MagicMock()
+            # Workflow will NOT run due to path filters
+            mock_will_run.return_value = False
+            message, is_retryable = _diagnose_missing_artifact(
+                repository, "abc123", TestPlatform.linux, log
+            )
+            self.assertFalse(is_retryable)
+            self.assertIn("No build will be created", message)
+            self.assertIn("does not modify any files", message)
+    class TestWorkflowPathFilters(BaseTestCase):
+        """Test the workflow path filter checking functions."""
+        @mock.patch('mod_ci.controllers._get_workflow_path_filters')
+        def test_will_workflow_run_matches_c_file(self, mock_get_filters):
+            """Test that changing a .c file triggers the workflow."""
+            from mod_ci.controllers import _will_workflow_run_for_commit
+            repository = MagicMock()
+            log = MagicMock()
+            # Mock path filters
+            mock_get_filters.return_value = ['**.c', '**.h', 'linux/**']
+            # Mock commit with a .c file changed
+            mock_file = MagicMock()
+            mock_file.filename = 'src/ccextractor.c'
+            mock_commit = MagicMock()
+            mock_commit.files = [mock_file]
+            repository.get_commit.return_value = mock_commit
+            result = _will_workflow_run_for_commit(
+                repository, "abc123", "Build CCExtractor on Linux", log
+            )
+            self.assertTrue(result)
+        @mock.patch('mod_ci.controllers._get_workflow_path_filters')
+        def test_will_workflow_run_no_match_docs_only(self, mock_get_filters):
+            """Test that changing only docs doesn't trigger the workflow."""
+            from mod_ci.controllers import _will_workflow_run_for_commit
+            repository = MagicMock()
+            log = MagicMock()
+            # Mock path filters
+            mock_get_filters.return_value = ['**.c', '**.h', 'linux/**']
+            # Mock commit with only README changed
+            mock_file = MagicMock()
+            mock_file.filename = 'README.md'
+            mock_commit = MagicMock()
+            mock_commit.files = [mock_file]
+            repository.get_commit.return_value = mock_commit
+            result = _will_workflow_run_for_commit(
+                repository, "abc123", "Build CCExtractor on Linux", log
+            )
+            self.assertFalse(result)
+        @mock.patch('mod_ci.controllers._get_workflow_path_filters')
+        def test_will_workflow_run_no_filters_returns_none(self, mock_get_filters):
+            """Test that workflows without path filters return None."""
+            from mod_ci.controllers import _will_workflow_run_for_commit
+            repository = MagicMock()
+            log = MagicMock()
+            # No path filters (workflow runs on everything)
+            mock_get_filters.return_value = None
+            result = _will_workflow_run_for_commit(
+                repository, "abc123", "Build CCExtractor on Linux", log
+            )
+            self.assertIsNone(result)
+        @mock.patch('mod_ci.controllers._get_workflow_path_filters')
+        def test_will_workflow_run_matches_subdirectory(self, mock_get_filters):
+            """Test that ** patterns match subdirectories."""
+            from mod_ci.controllers import _will_workflow_run_for_commit
+            repository = MagicMock()
+            log = MagicMock()
+            # Mock path filters with ** pattern
+            mock_get_filters.return_value = ['src/rust/**']
+            # Mock commit with a file in src/rust subdirectory
+            mock_file = MagicMock()
+            mock_file.filename = 'src/rust/lib_ccxr/src/main.rs'
+            mock_commit = MagicMock()
+            mock_commit.files = [mock_file]
+            repository.get_commit.return_value = mock_commit
+            result = _will_workflow_run_for_commit(
+                repository, "abc123", "Build CCExtractor on Linux", log
+            )
+            self.assertTrue(result)
     class TestStartTestRetryBehavior(BaseTestCase):
         """Test that start_test correctly handles retryable vs permanent failures."""
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix: detect when workflow won't run due to path filters #1015

Diff view

Diff view

There are no files selected for viewing

fix: detect when workflow won't run due to path filters #1015

fix: detect when workflow won't run due to path filters #1015

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing