entrius · travellingsoldier85 · Feb 26, 2026 · Feb 27, 2026 · Mar 1, 2026 · Mar 2, 2026
diff --git a/gittensor/classes.py b/gittensor/classes.py
@@ -8,6 +8,7 @@
 
 import bittensor as bt
 
+from gittensor.constants import MIN_TOKEN_SCORE_FOR_BASE_SCORE
 from gittensor.utils.utils import parse_repo_name
 from gittensor.validator.configurations.tier_config import Tier, TierConfig, TierStats
 
@@ -156,7 +157,8 @@ class PullRequest:
     base_score: float = 0.0
     issue_multiplier: float = 1.0
     open_pr_spam_multiplier: float = 1.0
-    repository_uniqueness_multiplier: float = 1.0
+    pioneer_dividend: float = 0.0  # Additive bonus for pioneering a repo
+    pioneer_rank: int = 0  # 0 = not eligible, 1 = pioneer, 2+ = follower position
     time_decay_multiplier: float = 1.0
     credibility_multiplier: float = 1.0
     raw_credibility: float = 1.0  # Before applying ^k scalar
@@ -188,24 +190,37 @@ def set_file_changes(self, file_changes: List[FileChange]) -> None:
         """Set the file changes for this pull request"""
         self.file_changes = file_changes
 
+    def is_pioneer_eligible(self) -> bool:
+        """Check if this PR qualifies for pioneer consideration.
+
+        A PR is eligible if it is merged, has a tier configuration,
+        and meets the minimum token score quality gate.
+        """
+        return (
+            self.repository_tier_configuration is not None
+            and self.merged_at is not None
+            and self.token_score >= MIN_TOKEN_SCORE_FOR_BASE_SCORE
+        )
+
     def calculate_final_earned_score(self) -> float:
-        """Combine base score with all multipliers."""
+        """Combine base score with all multipliers. Pioneer dividend is added separately after."""
         multipliers = {
             'repo': self.repo_weight_multiplier,
             'issue': self.issue_multiplier,
             'spam': self.open_pr_spam_multiplier,
-            'unique': self.repository_uniqueness_multiplier,
             'decay': self.time_decay_multiplier,
             'cred': self.credibility_multiplier,
         }
 
         self.earned_score = self.base_score * prod(multipliers.values())
 
         # Log all multipliers (credibility shows ^k format)
-        mult_str = ' × '.join(
-            f'cred={self.raw_credibility:.2f}^{self.credibility_scalar}' if k == 'cred' else f'{k}={v:.2f}'
-            for k, v in multipliers.items()
-        )
+        def _format_multiplier(k: str, v: float) -> str:
+            if k == 'cred':
+                return f'cred={self.raw_credibility:.2f}^{self.credibility_scalar}'
+            return f'{k}={v:.2f}'
+
+        mult_str = ' × '.join(_format_multiplier(k, v) for k, v in multipliers.items())
         bt.logging.info(
             f'├─ {self.pr_state.value} PR #{self.number} ({self.repository_full_name}) → {self.earned_score:.2f}'
         )

diff --git a/gittensor/constants.py b/gittensor/constants.py
@@ -59,9 +59,16 @@
 DEFAULT_MAX_CONTRIBUTION_SCORE_FOR_FULL_BONUS = 2000
 
 # Boosts
-UNIQUE_PR_BOOST = 0.74
 MAX_CODE_DENSITY_MULTIPLIER = 3.0
 
+# Pioneer dividend — rewards the first quality contributor to each repository
+# Rates applied per follower position (1st follower pays most, diminishing after)
+# Dividend capped at PIONEER_DIVIDEND_MAX_RATIO × pioneer's own earned_score
+PIONEER_DIVIDEND_RATE_1ST = 0.30  # 1st follower: 30% of their earned_score
+PIONEER_DIVIDEND_RATE_2ND = 0.20  # 2nd follower: 20% of their earned_score
+PIONEER_DIVIDEND_RATE_REST = 0.10  # 3rd+ followers: 10% of their earned_score
+PIONEER_DIVIDEND_MAX_RATIO = 1.0  # Cap dividend at 1× pioneer's own earned_score (max 2× total)
+
 # Issue boosts
 MAX_ISSUE_CLOSE_WINDOW_DAYS = 1
 MAX_ISSUE_AGE_FOR_MAX_SCORE = 40  # days
@@ -112,7 +119,7 @@
 # =============================================================================
 # Spam & Gaming Mitigation
 # =============================================================================
-MAINTAINER_ASSOCIATIONS = ['OWNER', 'MEMBER', 'COLLABORATOR']
+MAINTAINER_ASSOCIATIONS = ['OWNER', 'COLLABORATOR']
 
 # Issue multiplier bonuses
 MAX_ISSUE_AGE_BONUS = 0.75  # Max bonus for issue age (scales with sqrt of days open)

diff --git a/gittensor/utils/github_api_tools.py b/gittensor/utils/github_api_tools.py
@@ -980,6 +980,28 @@ def check_github_issue_closed(repo: str, issue_number: int, token: str) -> Optio
         return None
 
 
+def _escape_graphql_expression(expression: str) -> str:
+    """Escape special characters in a GraphQL string literal.
+
+    File paths containing backslashes or double quotes break GraphQL query
+    syntax when interpolated directly. This escapes them so the query remains
+    valid.
+
+    Args:
+        expression: Raw string to embed inside a GraphQL double-quoted literal.
+
+    Returns:
+        Escaped string safe for embedding in GraphQL queries.
+    """
+    return expression.replace('\\', '\\\\').replace('"', '\\"')
+
+
+# Maximum files per GraphQL batch request. GitHub's GraphQL API has query
+# complexity limits; batching too many object lookups in a single request can
+# cause a 502/complexity error and lose all results.
+_MAX_FILES_PER_GRAPHQL_BATCH = 50
+
+
 def fetch_file_contents_batch(
     repo_owner: str,
     repo_name: str,
@@ -988,9 +1010,10 @@ def fetch_file_contents_batch(
     token: str,
 ) -> Dict[str, Optional[str]]:
     """
-    Fetch multiple file contents from a repository in a single GraphQL request.
+    Fetch multiple file contents from a repository in batched GraphQL requests.
 
-    Uses retry logic with exponential backoff for reliability.
+    Uses retry logic with exponential backoff for reliability. Batches files
+    to avoid exceeding GitHub's GraphQL complexity limits.
 
     Args:
         repo_owner: Repository owner
@@ -1005,47 +1028,53 @@ def fetch_file_contents_batch(
     if not file_paths:
         return {}
 
-    # Build GraphQL query with aliased file fields
-    file_fields = []
-    for i, path in enumerate(file_paths):
-        expression = f'{head_sha}:{path}'
-        file_fields.append(
-            f'file{i}: object(expression: "{expression}") {{ ... on Blob {{ text byteSize isBinary }} }}'
-        )
+    results: Dict[str, Optional[str]] = {}
 
-    query = f"""
-        query($owner: String!, $name: String!) {{
-            repository(owner: $owner, name: $name) {{
-                {' '.join(file_fields)}
+    # Process files in batches to avoid exceeding GraphQL complexity limits
+    for batch_start in range(0, len(file_paths), _MAX_FILES_PER_GRAPHQL_BATCH):
+        batch_paths = file_paths[batch_start : batch_start + _MAX_FILES_PER_GRAPHQL_BATCH]
+
+        # Build GraphQL query with aliased file fields
+        file_fields = []
+        for i, path in enumerate(batch_paths):
+            expression = _escape_graphql_expression(f'{head_sha}:{path}')
+            file_fields.append(
+                f'file{i}: object(expression: "{expression}") {{ ... on Blob {{ text byteSize isBinary }} }}'
+            )
+
+        query = f"""
+            query($owner: String!, $name: String!) {{
+                repository(owner: $owner, name: $name) {{
+                    {' '.join(file_fields)}
+                }}
             }}
-        }}
-    """
+        """
 
-    variables = {'owner': repo_owner, 'name': repo_name}
+        variables = {'owner': repo_owner, 'name': repo_name}
 
-    # Execute with retry logic
-    data = execute_graphql_query(query, variables, token)
-    if data is None:
-        bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}')
-        return {path: None for path in file_paths}
+        data = execute_graphql_query(query, variables, token)
+        if data is None:
+            bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}')
+            for path in batch_paths:
+                results[path] = None
+            continue
 
-    if 'errors' in data:
-        bt.logging.warning(f'GraphQL errors fetching files: {data["errors"]}')
+        if 'errors' in data:
+            bt.logging.warning(f'GraphQL errors fetching files: {data["errors"]}')
 
-    repo_data = data.get('data', {}).get('repository', {})
-    results = {}
+        repo_data = data.get('data', {}).get('repository', {})
 
-    for i, path in enumerate(file_paths):
-        file_data = repo_data.get(f'file{i}')
+        for i, path in enumerate(batch_paths):
+            file_data = repo_data.get(f'file{i}')
 
-        if file_data is None:
-            results[path] = None
-        elif file_data.get('isBinary'):
-            results[path] = None
-        elif file_data.get('byteSize', 0) > MAX_FILE_SIZE_BYTES:
-            results[path] = None
-        else:
-            results[path] = file_data.get('text')
+            if file_data is None:
+                results[path] = None
+            elif file_data.get('isBinary'):
+                results[path] = None
+            elif file_data.get('byteSize', 0) > MAX_FILE_SIZE_BYTES:
+                results[path] = None
+            else:
+                results[path] = file_data.get('text')
 
     return results
 
@@ -1058,52 +1087,31 @@ class FileContentPair:
     new_content: Optional[str]  # None for deleted files
 
 
-def fetch_file_contents_with_base(
+def _fetch_file_contents_with_base_batch(
     repo_owner: str,
     repo_name: str,
     base_sha: str,
     head_sha: str,
     file_changes: List['FileChangeType'],
     token: str,
 ) -> Dict[str, FileContentPair]:
-    """
-    Fetch both base and head (old and new) versions of files in a single GraphQL request.
-
-    Args:
-        repo_owner: Repository owner
-        repo_name: Repository name
-        base_sha: The base branch SHA (before PR changes)
-        head_sha: The head/merge commit SHA (after PR changes)
-        file_changes: List of FileChange objects (needed for status and previous_filename)
-        token: GitHub PAT for authentication
+    """Fetch base and head file contents for a single batch of file changes.
 
-    Returns:
-        Dict mapping file paths to FileContentPair (old_content, new_content)
-        - For new files: old_content is None
-        - For deleted files: new_content is None
-        - For renamed files: old_content fetched from previous_filename
+    Internal helper called by fetch_file_contents_with_base for each batch.
     """
-    if not file_changes:
-        return {}
-
-    # Build GraphQL query with both base and head versions
     file_fields = []
     for i, fc in enumerate(file_changes):
-        # Determine the path to fetch for base version
-        # For renames, use previous_filename; otherwise use current filename
         base_path = fc.previous_filename if fc.previous_filename else fc.filename
         head_path = fc.filename
 
-        # Only fetch base version if file wasn't newly added
         if fc.status != 'added':
-            base_expr = f'{base_sha}:{base_path}'
+            base_expr = _escape_graphql_expression(f'{base_sha}:{base_path}')
             file_fields.append(
                 f'base{i}: object(expression: "{base_expr}") {{ ... on Blob {{ text byteSize isBinary }} }}'
             )
 
-        # Only fetch head version if file wasn't deleted
         if fc.status != 'removed':
-            head_expr = f'{head_sha}:{head_path}'
+            head_expr = _escape_graphql_expression(f'{head_sha}:{head_path}')
             file_fields.append(
                 f'head{i}: object(expression: "{head_expr}") {{ ... on Blob {{ text byteSize isBinary }} }}'
             )
@@ -1121,7 +1129,6 @@ def fetch_file_contents_with_base(
 
     variables = {'owner': repo_owner, 'name': repo_name}
 
-    # Execute with retry logic
     data = execute_graphql_query(query, variables, token)
     if data is None:
         bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}')
@@ -1137,13 +1144,11 @@ def fetch_file_contents_with_base(
         old_content = None
         new_content = None
 
-        # Extract base (old) content if applicable
         if fc.status != 'added':
             base_data = repo_data.get(f'base{i}')
             if base_data and not base_data.get('isBinary') and base_data.get('byteSize', 0) <= MAX_FILE_SIZE_BYTES:
                 old_content = base_data.get('text')
 
-        # Extract head (new) content if applicable
         if fc.status != 'removed':
             head_data = repo_data.get(f'head{i}')
             if head_data and not head_data.get('isBinary') and head_data.get('byteSize', 0) <= MAX_FILE_SIZE_BYTES:
@@ -1152,3 +1157,47 @@ def fetch_file_contents_with_base(
         results[fc.filename] = FileContentPair(old_content=old_content, new_content=new_content)
 
     return results
+
+
+def fetch_file_contents_with_base(
+    repo_owner: str,
+    repo_name: str,
+    base_sha: str,
+    head_sha: str,
+    file_changes: List['FileChangeType'],
+    token: str,
+) -> Dict[str, FileContentPair]:
+    """
+    Fetch both base and head (old and new) versions of files via batched GraphQL requests.
+
+    Large PRs are split into batches to avoid exceeding GitHub's GraphQL query
+    complexity limits. File paths are escaped to prevent query syntax errors
+    from special characters.
+
+    Args:
+        repo_owner: Repository owner
+        repo_name: Repository name
+        base_sha: The base branch SHA (before PR changes)
+        head_sha: The head/merge commit SHA (after PR changes)
+        file_changes: List of FileChange objects (needed for status and previous_filename)
+        token: GitHub PAT for authentication
+
+    Returns:
+        Dict mapping file paths to FileContentPair (old_content, new_content)
+        - For new files: old_content is None
+        - For deleted files: new_content is None
+        - For renamed files: old_content fetched from previous_filename
+    """
+    if not file_changes:
+        return {}
+
+    results: Dict[str, FileContentPair] = {}
+
+    for batch_start in range(0, len(file_changes), _MAX_FILES_PER_GRAPHQL_BATCH):
+        batch = file_changes[batch_start : batch_start + _MAX_FILES_PER_GRAPHQL_BATCH]
+        batch_results = _fetch_file_contents_with_base_batch(
+            repo_owner, repo_name, base_sha, head_sha, batch, token
+        )
+        results.update(batch_results)
+
+    return results
diff --git a/gittensor/validator/evaluation/reward.py b/gittensor/validator/evaluation/reward.py
@@ -136,7 +136,7 @@ async def get_rewards(
     # Adjust scores for duplicate accounts
     detect_and_penalize_miners_sharing_github(miner_evaluations)
 
-    # Finalize scores: apply unique contribution multiplier, credibility, sum totals, deduct collateral
+    # Finalize scores: apply pioneer dividends, credibility, sum totals, deduct collateral
     finalize_miner_scores(miner_evaluations)
 
     # Allocate emissions by tier: replace total_score with tier-weighted allocations