From 251f807b6a9f6ea1a718099711f0a565ccaee4a3 Mon Sep 17 00:00:00 2001
From: Carlos <carlos@sanz.dev>
Date: Sat, 31 Jan 2026 17:30:41 -0800
Subject: [PATCH 1/3] fix: detect when workflow won't run due to path filters

Instead of waiting up to 3 hours for artifacts that will never be created,
we now check upfront if the workflow will run by:

1. Fetching the workflow YAML from the repository
2. Parsing the path filters (on.push.paths, on.pull_request.paths)
3. Comparing commit's changed files against the filters

If no files match the path filters, the test is immediately marked as
failed (not retryable) since the build workflow will never run.

This prevents the queue from being blocked by commits that only change
documentation or other non-code files.

Also includes:
- 3-hour timeout for commits with no workflow run (fallback)
- Workflow filter cache (1 hour TTL) to reduce API calls
- Comprehensive tests for path filter matching

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 mod_ci/controllers.py             | 166 +++++++++++++++++++++++++++++-
 requirements.txt                  |   1 +
 tests/test_ci/test_controllers.py | 155 +++++++++++++++++++++++++++-
 3 files changed, 318 insertions(+), 4 deletions(-)

diff --git a/mod_ci/controllers.py b/mod_ci/controllers.py
index 36510f16..1d1a97c6 100755
--- a/mod_ci/controllers.py
+++ b/mod_ci/controllers.py
@@ -1,6 +1,8 @@
 """maintains all functionality related running virtual machines, starting and tracking tests."""
 
+import base64
 import datetime
+import fnmatch
 import hashlib
 import json
 import os
@@ -22,6 +24,7 @@
 from google.oauth2 import service_account
 from lxml import etree
 from markdown2 import markdown
+import yaml
 from pymysql.err import IntegrityError
 from sqlalchemy import and_, func
 from sqlalchemy.sql import label
@@ -682,6 +685,130 @@ def update_github_status():
     return db_success and github_success
 
 
+# Mapping of workflow names to their YAML files in the repository
+WORKFLOW_FILES = {
+    "Build CCExtractor on Linux": ".github/workflows/build_linux.yml",
+    "Build CCExtractor on Windows": ".github/workflows/build_windows.yml",
+}
+
+# Cache for workflow path filters (refreshed periodically)
+_workflow_filters_cache: Dict[str, tuple] = {}  # {workflow_name: (filters, timestamp)}
+WORKFLOW_CACHE_TTL = 3600  # 1 hour cache TTL
+
+
+def _get_workflow_path_filters(repository, workflow_name: str, log) -> Optional[list]:
+    """
+    Fetch and parse the workflow YAML to extract path filters.
+
+    Returns a list of path patterns that trigger the workflow, or None if
+    the workflow has no path filters (runs on all changes).
+
+    :param repository: GitHub repository object
+    :param workflow_name: Name of the workflow (e.g., "Build CCExtractor on Linux")
+    :param log: Logger instance
+    :return: List of path patterns, or None if no filters
+    """
+    import time as time_module
+
+    # Check cache first
+    if workflow_name in _workflow_filters_cache:
+        filters, cached_at = _workflow_filters_cache[workflow_name]
+        if time_module.time() - cached_at < WORKFLOW_CACHE_TTL:
+            return filters
+
+    workflow_file = WORKFLOW_FILES.get(workflow_name)
+    if not workflow_file:
+        log.warning(f"Unknown workflow: {workflow_name}")
+        return None
+
+    try:
+        # Fetch the workflow file content from the repository
+        contents = repository.get_contents(workflow_file)
+        workflow_yaml = base64.b64decode(contents.content).decode('utf-8')
+
+        # Parse the YAML
+        workflow_config = yaml.safe_load(workflow_yaml)
+
+        # Extract path filters from 'on.push.paths' and 'on.pull_request.paths'
+        on_config = workflow_config.get('on', {})
+        path_filters = set()
+
+        # Check push paths
+        push_config = on_config.get('push', {})
+        if isinstance(push_config, dict) and 'paths' in push_config:
+            path_filters.update(push_config['paths'])
+
+        # Check pull_request paths
+        pr_config = on_config.get('pull_request', {})
+        if isinstance(pr_config, dict) and 'paths' in pr_config:
+            path_filters.update(pr_config['paths'])
+
+        filters = list(path_filters) if path_filters else None
+
+        # Cache the result
+        _workflow_filters_cache[workflow_name] = (filters, time_module.time())
+
+        log.debug(f"Workflow '{workflow_name}' path filters: {filters}")
+        return filters
+
+    except Exception as e:
+        log.warning(f"Failed to fetch/parse workflow file {workflow_file}: {e}")
+        return None  # Assume no filters on error (will retry)
+
+
+def _will_workflow_run_for_commit(repository, commit_sha: str, workflow_name: str, log) -> Optional[bool]:
+    """
+    Check if a workflow will run for a given commit based on path filters.
+
+    Returns:
+        True: Workflow will definitely run (files match path filters)
+        False: Workflow will definitely NOT run (no files match)
+        None: Cannot determine (no path filters, or error occurred)
+
+    :param repository: GitHub repository object
+    :param commit_sha: The commit SHA to check
+    :param workflow_name: Name of the workflow
+    :param log: Logger instance
+    :return: True/False/None as described above
+    """
+    # Get the workflow's path filters
+    path_filters = _get_workflow_path_filters(repository, workflow_name, log)
+
+    if path_filters is None:
+        # No path filters means workflow runs on all changes
+        return None
+
+    try:
+        # Get the list of files changed in this commit
+        commit = repository.get_commit(commit_sha)
+        changed_files = [f.filename for f in commit.files]
+
+        log.debug(f"Commit {commit_sha[:7]} changed {len(changed_files)} files")
+
+        # Check if any changed file matches any path filter
+        for changed_file in changed_files:
+            for pattern in path_filters:
+                # Handle ** patterns (match any path depth)
+                if '**' in pattern:
+                    # Convert ** glob to regex-compatible pattern
+                    regex_pattern = pattern.replace('**/', '(.*/)?').replace('**', '.*')
+                    regex_pattern = regex_pattern.replace('*', '[^/]*')
+                    if re.match(regex_pattern, changed_file):
+                        log.debug(f"File '{changed_file}' matches pattern '{pattern}'")
+                        return True
+                elif fnmatch.fnmatch(changed_file, pattern):
+                    log.debug(f"File '{changed_file}' matches pattern '{pattern}'")
+                    return True
+
+        # No files matched any pattern
+        log.info(f"Commit {commit_sha[:7]}: No changed files match workflow '{workflow_name}' path filters")
+        return False
+
+    except Exception as e:
+        log.warning(f"Failed to check commit files against workflow filters: {e}")
+        return None  # Cannot determine
+
+
 def _diagnose_missing_artifact(repository, commit_sha: str, platform, log) -> tuple:
     """
     Diagnose why an artifact was not found for a commit.
@@ -703,6 +830,16 @@ def _diagnose_missing_artifact(repository, commit_sha: str, platform, log) -> tu
     else:
         expected_workflow = Workflow_builds.WINDOWS
 
+    # First, check if the workflow will even run based on path filters
+    will_run = _will_workflow_run_for_commit(repository, commit_sha, expected_workflow, log)
+    if will_run is False:
+        # Workflow will definitely NOT run - no matching files in commit
+        message = (f"No build will be created: commit {commit_sha[:7]} does not modify any files "
+                   f"that trigger the '{expected_workflow}' workflow. "
+                   f"Only documentation or non-code files were changed.")
+        log.info(f"Commit {commit_sha[:7]}: workflow '{expected_workflow}' will not run due to path filters")
+        return (message, False)  # NOT retryable - workflow will never run
+
     try:
         # Build workflow name lookup
         workflow: Dict[int, Optional[str]] = defaultdict(lambda: None)
@@ -762,8 +899,33 @@ def _diagnose_missing_artifact(repository, commit_sha: str, platform, log) -> tu
                 return (message, False)  # Not retryable
 
         if not workflow_found:
-            # No workflow run found - could be queued or not triggered
-            # This is RETRYABLE (workflow might be queued or path filters excluded it)
+            # No workflow run found - could be queued, not triggered, or too old
+            # Check commit age to determine if we should keep waiting
+            MAX_WAIT_HOURS = 3
+            try:
+                from datetime import datetime, timezone, timedelta
+                commit_obj = repository.get_commit(commit_sha)
+                commit_date = commit_obj.commit.author.date
+                if commit_date.tzinfo is None:
+                    commit_date = commit_date.replace(tzinfo=timezone.utc)
+                now = datetime.now(timezone.utc)
+                commit_age = now - commit_date
+
+                if commit_age > timedelta(hours=MAX_WAIT_HOURS):
+                    # Commit is too old - workflow was never triggered or history expired
+                    hours_old = commit_age.total_seconds() / 3600
+                    message = (f"No build available: '{expected_workflow}' never ran for commit "
+                               f"{commit_sha[:7]} (commit is {hours_old:.1f} hours old). "
+                               f"The workflow was likely not triggered due to path filters, "
+                               f"or the commit predates the workflow configuration.")
+                    log.info(f"Commit {commit_sha[:7]} is {hours_old:.1f} hours old with no workflow run - "
+                             f"marking as permanent failure")
+                    return (message, False)  # NOT retryable - too old
+            except Exception as e:
+                log.warning(f"Could not check commit age: {e}")
+                # Fall through to retryable on error
+
+            # Commit is recent enough - workflow might still be queued
             message = (f"No workflow run found: '{expected_workflow}' has not run for commit "
                        f"{commit_sha[:7]}. The workflow may be queued, or was not triggered "
                        f"due to path filters. Will retry.")
diff --git a/requirements.txt b/requirements.txt
index ef3bfccb..558018ee 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -26,3 +26,4 @@ cffi==2.0.0
 PyGithub==2.8.1
 blinker==1.9.0
 click==8.3.1
+PyYAML==6.0.2
diff --git a/tests/test_ci/test_controllers.py b/tests/test_ci/test_controllers.py
index 24643f00..cf62edd2 100644
--- a/tests/test_ci/test_controllers.py
+++ b/tests/test_ci/test_controllers.py
@@ -3490,8 +3490,10 @@ def test_old_completed_build_is_not_retryable(self):
         self.assertFalse(is_retryable)
         self.assertIn("Artifact not found", message)
 
-    def test_no_workflow_run_is_retryable(self):
-        """Test that missing workflow run returns retryable=True."""
+    def test_no_workflow_run_recent_commit_is_retryable(self):
+        """Test that missing workflow run for a recent commit returns retryable=True."""
+        from datetime import datetime, timezone
+
         from mod_ci.controllers import _diagnose_missing_artifact
 
         repository = MagicMock()
@@ -3506,6 +3508,11 @@ def test_no_workflow_run_is_retryable(self):
         # No workflow runs for this commit
         repository.get_workflow_runs.return_value = []
 
+        # Mock a recent commit (1 hour ago)
+        commit_obj = MagicMock()
+        commit_obj.commit.author.date = datetime.now(timezone.utc)
+        repository.get_commit.return_value = commit_obj
+
         message, is_retryable = _diagnose_missing_artifact(
             repository, "abc123", TestPlatform.linux, log
         )
@@ -3514,6 +3521,38 @@ def test_no_workflow_run_is_retryable(self):
         self.assertIn("No workflow run found", message)
         self.assertIn("Will retry", message)
 
+    def test_no_workflow_run_old_commit_not_retryable(self):
+        """Test that missing workflow run for an old commit returns retryable=False."""
+        from datetime import datetime, timedelta, timezone
+
+        from mod_ci.controllers import _diagnose_missing_artifact
+
+        repository = MagicMock()
+        log = MagicMock()
+
+        # Mock workflow
+        workflow = MagicMock()
+        workflow.id = 1
+        workflow.name = "Build CCExtractor on Linux"
+        repository.get_workflows.return_value = [workflow]
+
+        # No workflow runs for this commit
+        repository.get_workflow_runs.return_value = []
+
+        # Mock an old commit (5 hours ago - beyond the 3 hour limit)
+        commit_obj = MagicMock()
+        commit_obj.commit.author.date = datetime.now(timezone.utc) - timedelta(hours=5)
+        repository.get_commit.return_value = commit_obj
+
+        message, is_retryable = _diagnose_missing_artifact(
+            repository, "abc123", TestPlatform.linux, log
+        )
+
+        self.assertFalse(is_retryable)
+        self.assertIn("No build available", message)
+        self.assertIn("never ran", message)
+        self.assertIn("5.0 hours old", message)
+
     def test_diagnostic_exception_is_retryable(self):
         """Test that diagnostic failures default to retryable=True."""
         from mod_ci.controllers import _diagnose_missing_artifact
@@ -3531,6 +3570,118 @@ def test_diagnostic_exception_is_retryable(self):
         self.assertTrue(is_retryable)
         self.assertIn("diagnostic check failed", message)
 
+    @mock.patch('mod_ci.controllers._will_workflow_run_for_commit')
+    def test_workflow_will_not_run_not_retryable(self, mock_will_run):
+        """Test that when workflow won't run due to path filters, it's not retryable."""
+        from mod_ci.controllers import _diagnose_missing_artifact
+
+        repository = MagicMock()
+        log = MagicMock()
+
+        # Workflow will NOT run due to path filters
+        mock_will_run.return_value = False
+
+        message, is_retryable = _diagnose_missing_artifact(
+            repository, "abc123", TestPlatform.linux, log
+        )
+
+        self.assertFalse(is_retryable)
+        self.assertIn("No build will be created", message)
+        self.assertIn("does not modify any files", message)
+
+
+class TestWorkflowPathFilters(BaseTestCase):
+    """Test the workflow path filter checking functions."""
+
+    @mock.patch('mod_ci.controllers._get_workflow_path_filters')
+    def test_will_workflow_run_matches_c_file(self, mock_get_filters):
+        """Test that changing a .c file triggers the workflow."""
+        from mod_ci.controllers import _will_workflow_run_for_commit
+
+        repository = MagicMock()
+        log = MagicMock()
+
+        # Mock path filters
+        mock_get_filters.return_value = ['**.c', '**.h', 'linux/**']
+
+        # Mock commit with a .c file changed
+        mock_file = MagicMock()
+        mock_file.filename = 'src/ccextractor.c'
+        mock_commit = MagicMock()
+        mock_commit.files = [mock_file]
+        repository.get_commit.return_value = mock_commit
+
+        result = _will_workflow_run_for_commit(
+            repository, "abc123", "Build CCExtractor on Linux", log
+        )
+
+        self.assertTrue(result)
+
+    @mock.patch('mod_ci.controllers._get_workflow_path_filters')
+    def test_will_workflow_run_no_match_docs_only(self, mock_get_filters):
+        """Test that changing only docs doesn't trigger the workflow."""
+        from mod_ci.controllers import _will_workflow_run_for_commit
+
+        repository = MagicMock()
+        log = MagicMock()
+
+        # Mock path filters
+        mock_get_filters.return_value = ['**.c', '**.h', 'linux/**']
+
+        # Mock commit with only README changed
+        mock_file = MagicMock()
+        mock_file.filename = 'README.md'
+        mock_commit = MagicMock()
+        mock_commit.files = [mock_file]
+        repository.get_commit.return_value = mock_commit
+
+        result = _will_workflow_run_for_commit(
+            repository, "abc123", "Build CCExtractor on Linux", log
+        )
+
+        self.assertFalse(result)
+
+    @mock.patch('mod_ci.controllers._get_workflow_path_filters')
+    def test_will_workflow_run_no_filters_returns_none(self, mock_get_filters):
+        """Test that workflows without path filters return None."""
+        from mod_ci.controllers import _will_workflow_run_for_commit
+
+        repository = MagicMock()
+        log = MagicMock()
+
+        # No path filters (workflow runs on everything)
+        mock_get_filters.return_value = None
+
+        result = _will_workflow_run_for_commit(
+            repository, "abc123", "Build CCExtractor on Linux", log
+        )
+
+        self.assertIsNone(result)
+
+    @mock.patch('mod_ci.controllers._get_workflow_path_filters')
+    def test_will_workflow_run_matches_subdirectory(self, mock_get_filters):
+        """Test that ** patterns match subdirectories."""
+        from mod_ci.controllers import _will_workflow_run_for_commit
+
+        repository = MagicMock()
+        log = MagicMock()
+
+        # Mock path filters with ** pattern
+        mock_get_filters.return_value = ['src/rust/**']
+
+        # Mock commit with a file in src/rust subdirectory
+        mock_file = MagicMock()
+        mock_file.filename = 'src/rust/lib_ccxr/src/main.rs'
+        mock_commit = MagicMock()
+        mock_commit.files = [mock_file]
+        repository.get_commit.return_value = mock_commit
+
+        result = _will_workflow_run_for_commit(
+            repository, "abc123", "Build CCExtractor on Linux", log
+        )
+
+        self.assertTrue(result)
+
 
 class TestStartTestRetryBehavior(BaseTestCase):
     """Test that start_test correctly handles retryable vs permanent failures."""

From 1113c06ed06366586eb2ee2ad44382ce39c45254 Mon Sep 17 00:00:00 2001
From: Carlos <carlos@sanz.dev>
Date: Sat, 31 Jan 2026 18:08:21 -0800
Subject: [PATCH 2/3] style: fix import sorting order

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 mod_ci/controllers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mod_ci/controllers.py b/mod_ci/controllers.py
index 1d1a97c6..062195a2 100755
--- a/mod_ci/controllers.py
+++ b/mod_ci/controllers.py
@@ -17,6 +17,7 @@
 
 import googleapiclient.discovery
 import requests
+import yaml
 from flask import (Blueprint, abort, flash, g, jsonify, redirect, request,
                    url_for)
 from github import (Auth, Commit, Github, GithubException, GithubObject,
@@ -24,7 +25,6 @@
 from google.oauth2 import service_account
 from lxml import etree
 from markdown2 import markdown
-import yaml
 from pymysql.err import IntegrityError
 from sqlalchemy import and_, func
 from sqlalchemy.sql import label
@@ -903,7 +903,7 @@ def _diagnose_missing_artifact(repository, commit_sha: str, platform, log) -> tu
             # Check commit age to determine if we should keep waiting
             MAX_WAIT_HOURS = 3
             try:
-                from datetime import datetime, timezone, timedelta
+                from datetime import datetime, timedelta, timezone
                 commit_obj = repository.get_commit(commit_sha)
                 commit_date = commit_obj.commit.author.date
                 if commit_date.tzinfo is None:

From ab8fbd2749f2e99ddb310cd997a14746460a9876 Mon Sep 17 00:00:00 2001
From: Carlos <carlos@sanz.dev>
Date: Sat, 31 Jan 2026 18:13:30 -0800
Subject: [PATCH 3/3] style: fix docstring format to use Sphinx style

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 mod_ci/controllers.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/mod_ci/controllers.py b/mod_ci/controllers.py
index 062195a2..1414e1b0 100755
--- a/mod_ci/controllers.py
+++ b/mod_ci/controllers.py
@@ -760,16 +760,12 @@ def _will_workflow_run_for_commit(repository, commit_sha: str, workflow_name: st
     """
     Check if a workflow will run for a given commit based on path filters.
 
-    Returns:
-        True: Workflow will definitely run (files match path filters)
-        False: Workflow will definitely NOT run (no files match)
-        None: Cannot determine (no path filters, or error occurred)
-
     :param repository: GitHub repository object
     :param commit_sha: The commit SHA to check
     :param workflow_name: Name of the workflow
     :param log: Logger instance
-    :return: True/False/None as described above
+    :return: True if workflow will run (files match), False if it won't (no match),
+             None if cannot determine (no path filters or error)
     """
     # Get the workflow's path filters
     path_filters = _get_workflow_path_filters(repository, workflow_name, log)