diff --git a/gittensor/classes.py b/gittensor/classes.py index 1b0530d3..b27645ab 100644 --- a/gittensor/classes.py +++ b/gittensor/classes.py @@ -8,6 +8,7 @@ import bittensor as bt +from gittensor.constants import MIN_TOKEN_SCORE_FOR_BASE_SCORE from gittensor.utils.utils import parse_repo_name from gittensor.validator.configurations.tier_config import Tier, TierConfig, TierStats @@ -156,7 +157,8 @@ class PullRequest: base_score: float = 0.0 issue_multiplier: float = 1.0 open_pr_spam_multiplier: float = 1.0 - repository_uniqueness_multiplier: float = 1.0 + pioneer_dividend: float = 0.0 # Additive bonus for pioneering a repo + pioneer_rank: int = 0 # 0 = not eligible, 1 = pioneer, 2+ = follower position time_decay_multiplier: float = 1.0 credibility_multiplier: float = 1.0 raw_credibility: float = 1.0 # Before applying ^k scalar @@ -188,13 +190,24 @@ def set_file_changes(self, file_changes: List[FileChange]) -> None: """Set the file changes for this pull request""" self.file_changes = file_changes + def is_pioneer_eligible(self) -> bool: + """Check if this PR qualifies for pioneer consideration. + + A PR is eligible if it is merged, has a tier configuration, + and meets the minimum token score quality gate. + """ + return ( + self.repository_tier_configuration is not None + and self.merged_at is not None + and self.token_score >= MIN_TOKEN_SCORE_FOR_BASE_SCORE + ) + def calculate_final_earned_score(self) -> float: - """Combine base score with all multipliers.""" + """Combine base score with all multipliers. Pioneer dividend is added separately after.""" multipliers = { 'repo': self.repo_weight_multiplier, 'issue': self.issue_multiplier, 'spam': self.open_pr_spam_multiplier, - 'unique': self.repository_uniqueness_multiplier, 'decay': self.time_decay_multiplier, 'cred': self.credibility_multiplier, } @@ -202,10 +215,12 @@ def calculate_final_earned_score(self) -> float: self.earned_score = self.base_score * prod(multipliers.values()) # Log all multipliers (credibility shows ^k format) - mult_str = ' × '.join( - f'cred={self.raw_credibility:.2f}^{self.credibility_scalar}' if k == 'cred' else f'{k}={v:.2f}' - for k, v in multipliers.items() - ) + def _format_multiplier(k: str, v: float) -> str: + if k == 'cred': + return f'cred={self.raw_credibility:.2f}^{self.credibility_scalar}' + return f'{k}={v:.2f}' + + mult_str = ' × '.join(_format_multiplier(k, v) for k, v in multipliers.items()) bt.logging.info( f'├─ {self.pr_state.value} PR #{self.number} ({self.repository_full_name}) → {self.earned_score:.2f}' ) diff --git a/gittensor/constants.py b/gittensor/constants.py index 30cd7a17..58e9a521 100644 --- a/gittensor/constants.py +++ b/gittensor/constants.py @@ -59,9 +59,16 @@ DEFAULT_MAX_CONTRIBUTION_SCORE_FOR_FULL_BONUS = 2000 # Boosts -UNIQUE_PR_BOOST = 0.74 MAX_CODE_DENSITY_MULTIPLIER = 3.0 +# Pioneer dividend — rewards the first quality contributor to each repository +# Rates applied per follower position (1st follower pays most, diminishing after) +# Dividend capped at PIONEER_DIVIDEND_MAX_RATIO × pioneer's own earned_score +PIONEER_DIVIDEND_RATE_1ST = 0.30 # 1st follower: 30% of their earned_score +PIONEER_DIVIDEND_RATE_2ND = 0.20 # 2nd follower: 20% of their earned_score +PIONEER_DIVIDEND_RATE_REST = 0.10 # 3rd+ followers: 10% of their earned_score +PIONEER_DIVIDEND_MAX_RATIO = 1.0 # Cap dividend at 1× pioneer's own earned_score (max 2× total) + # Issue boosts MAX_ISSUE_CLOSE_WINDOW_DAYS = 1 MAX_ISSUE_AGE_FOR_MAX_SCORE = 40 # days @@ -112,7 +119,7 @@ # ============================================================================= # Spam & Gaming Mitigation # ============================================================================= -MAINTAINER_ASSOCIATIONS = ['OWNER', 'MEMBER', 'COLLABORATOR'] +MAINTAINER_ASSOCIATIONS = ['OWNER', 'COLLABORATOR'] # Issue multiplier bonuses MAX_ISSUE_AGE_BONUS = 0.75 # Max bonus for issue age (scales with sqrt of days open) diff --git a/gittensor/utils/github_api_tools.py b/gittensor/utils/github_api_tools.py index fe676d07..06ae7dcf 100644 --- a/gittensor/utils/github_api_tools.py +++ b/gittensor/utils/github_api_tools.py @@ -980,6 +980,28 @@ def check_github_issue_closed(repo: str, issue_number: int, token: str) -> Optio return None +def _escape_graphql_expression(expression: str) -> str: + """Escape special characters in a GraphQL string literal. + + File paths containing backslashes or double quotes break GraphQL query + syntax when interpolated directly. This escapes them so the query remains + valid. + + Args: + expression: Raw string to embed inside a GraphQL double-quoted literal. + + Returns: + Escaped string safe for embedding in GraphQL queries. + """ + return expression.replace('\\', '\\\\').replace('"', '\\"') + + +# Maximum files per GraphQL batch request. GitHub's GraphQL API has query +# complexity limits; batching too many object lookups in a single request can +# cause a 502/complexity error and lose all results. +_MAX_FILES_PER_GRAPHQL_BATCH = 50 + + def fetch_file_contents_batch( repo_owner: str, repo_name: str, @@ -988,9 +1010,10 @@ def fetch_file_contents_batch( token: str, ) -> Dict[str, Optional[str]]: """ - Fetch multiple file contents from a repository in a single GraphQL request. + Fetch multiple file contents from a repository in batched GraphQL requests. - Uses retry logic with exponential backoff for reliability. + Uses retry logic with exponential backoff for reliability. Batches files + to avoid exceeding GitHub's GraphQL complexity limits. Args: repo_owner: Repository owner @@ -1005,47 +1028,53 @@ def fetch_file_contents_batch( if not file_paths: return {} - # Build GraphQL query with aliased file fields - file_fields = [] - for i, path in enumerate(file_paths): - expression = f'{head_sha}:{path}' - file_fields.append( - f'file{i}: object(expression: "{expression}") {{ ... on Blob {{ text byteSize isBinary }} }}' - ) + results: Dict[str, Optional[str]] = {} - query = f""" - query($owner: String!, $name: String!) {{ - repository(owner: $owner, name: $name) {{ - {' '.join(file_fields)} + # Process files in batches to avoid exceeding GraphQL complexity limits + for batch_start in range(0, len(file_paths), _MAX_FILES_PER_GRAPHQL_BATCH): + batch_paths = file_paths[batch_start : batch_start + _MAX_FILES_PER_GRAPHQL_BATCH] + + # Build GraphQL query with aliased file fields + file_fields = [] + for i, path in enumerate(batch_paths): + expression = _escape_graphql_expression(f'{head_sha}:{path}') + file_fields.append( + f'file{i}: object(expression: "{expression}") {{ ... on Blob {{ text byteSize isBinary }} }}' + ) + + query = f""" + query($owner: String!, $name: String!) {{ + repository(owner: $owner, name: $name) {{ + {' '.join(file_fields)} + }} }} - }} - """ + """ - variables = {'owner': repo_owner, 'name': repo_name} + variables = {'owner': repo_owner, 'name': repo_name} - # Execute with retry logic - data = execute_graphql_query(query, variables, token) - if data is None: - bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}') - return {path: None for path in file_paths} + data = execute_graphql_query(query, variables, token) + if data is None: + bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}') + for path in batch_paths: + results[path] = None + continue - if 'errors' in data: - bt.logging.warning(f'GraphQL errors fetching files: {data["errors"]}') + if 'errors' in data: + bt.logging.warning(f'GraphQL errors fetching files: {data["errors"]}') - repo_data = data.get('data', {}).get('repository', {}) - results = {} + repo_data = data.get('data', {}).get('repository', {}) - for i, path in enumerate(file_paths): - file_data = repo_data.get(f'file{i}') + for i, path in enumerate(batch_paths): + file_data = repo_data.get(f'file{i}') - if file_data is None: - results[path] = None - elif file_data.get('isBinary'): - results[path] = None - elif file_data.get('byteSize', 0) > MAX_FILE_SIZE_BYTES: - results[path] = None - else: - results[path] = file_data.get('text') + if file_data is None: + results[path] = None + elif file_data.get('isBinary'): + results[path] = None + elif file_data.get('byteSize', 0) > MAX_FILE_SIZE_BYTES: + results[path] = None + else: + results[path] = file_data.get('text') return results @@ -1058,7 +1087,7 @@ class FileContentPair: new_content: Optional[str] # None for deleted files -def fetch_file_contents_with_base( +def _fetch_file_contents_with_base_batch( repo_owner: str, repo_name: str, base_sha: str, @@ -1066,44 +1095,23 @@ def fetch_file_contents_with_base( file_changes: List['FileChangeType'], token: str, ) -> Dict[str, FileContentPair]: - """ - Fetch both base and head (old and new) versions of files in a single GraphQL request. - - Args: - repo_owner: Repository owner - repo_name: Repository name - base_sha: The base branch SHA (before PR changes) - head_sha: The head/merge commit SHA (after PR changes) - file_changes: List of FileChange objects (needed for status and previous_filename) - token: GitHub PAT for authentication + """Fetch base and head file contents for a single batch of file changes. - Returns: - Dict mapping file paths to FileContentPair (old_content, new_content) - - For new files: old_content is None - - For deleted files: new_content is None - - For renamed files: old_content fetched from previous_filename + Internal helper called by fetch_file_contents_with_base for each batch. """ - if not file_changes: - return {} - - # Build GraphQL query with both base and head versions file_fields = [] for i, fc in enumerate(file_changes): - # Determine the path to fetch for base version - # For renames, use previous_filename; otherwise use current filename base_path = fc.previous_filename if fc.previous_filename else fc.filename head_path = fc.filename - # Only fetch base version if file wasn't newly added if fc.status != 'added': - base_expr = f'{base_sha}:{base_path}' + base_expr = _escape_graphql_expression(f'{base_sha}:{base_path}') file_fields.append( f'base{i}: object(expression: "{base_expr}") {{ ... on Blob {{ text byteSize isBinary }} }}' ) - # Only fetch head version if file wasn't deleted if fc.status != 'removed': - head_expr = f'{head_sha}:{head_path}' + head_expr = _escape_graphql_expression(f'{head_sha}:{head_path}') file_fields.append( f'head{i}: object(expression: "{head_expr}") {{ ... on Blob {{ text byteSize isBinary }} }}' ) @@ -1121,7 +1129,6 @@ def fetch_file_contents_with_base( variables = {'owner': repo_owner, 'name': repo_name} - # Execute with retry logic data = execute_graphql_query(query, variables, token) if data is None: bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}') @@ -1137,13 +1144,11 @@ def fetch_file_contents_with_base( old_content = None new_content = None - # Extract base (old) content if applicable if fc.status != 'added': base_data = repo_data.get(f'base{i}') if base_data and not base_data.get('isBinary') and base_data.get('byteSize', 0) <= MAX_FILE_SIZE_BYTES: old_content = base_data.get('text') - # Extract head (new) content if applicable if fc.status != 'removed': head_data = repo_data.get(f'head{i}') if head_data and not head_data.get('isBinary') and head_data.get('byteSize', 0) <= MAX_FILE_SIZE_BYTES: @@ -1152,3 +1157,47 @@ def fetch_file_contents_with_base( results[fc.filename] = FileContentPair(old_content=old_content, new_content=new_content) return results + + +def fetch_file_contents_with_base( + repo_owner: str, + repo_name: str, + base_sha: str, + head_sha: str, + file_changes: List['FileChangeType'], + token: str, +) -> Dict[str, FileContentPair]: + """ + Fetch both base and head (old and new) versions of files via batched GraphQL requests. + + Large PRs are split into batches to avoid exceeding GitHub's GraphQL query + complexity limits. File paths are escaped to prevent query syntax errors + from special characters. + + Args: + repo_owner: Repository owner + repo_name: Repository name + base_sha: The base branch SHA (before PR changes) + head_sha: The head/merge commit SHA (after PR changes) + file_changes: List of FileChange objects (needed for status and previous_filename) + token: GitHub PAT for authentication + + Returns: + Dict mapping file paths to FileContentPair (old_content, new_content) + - For new files: old_content is None + - For deleted files: new_content is None + - For renamed files: old_content fetched from previous_filename + """ + if not file_changes: + return {} + + results: Dict[str, FileContentPair] = {} + + for batch_start in range(0, len(file_changes), _MAX_FILES_PER_GRAPHQL_BATCH): + batch = file_changes[batch_start : batch_start + _MAX_FILES_PER_GRAPHQL_BATCH] + batch_results = _fetch_file_contents_with_base_batch( + repo_owner, repo_name, base_sha, head_sha, batch, token + ) + results.update(batch_results) + + return results diff --git a/gittensor/validator/evaluation/reward.py b/gittensor/validator/evaluation/reward.py index 2d21431e..62ad3455 100644 --- a/gittensor/validator/evaluation/reward.py +++ b/gittensor/validator/evaluation/reward.py @@ -136,7 +136,7 @@ async def get_rewards( # Adjust scores for duplicate accounts detect_and_penalize_miners_sharing_github(miner_evaluations) - # Finalize scores: apply unique contribution multiplier, credibility, sum totals, deduct collateral + # Finalize scores: apply pioneer dividends, credibility, sum totals, deduct collateral finalize_miner_scores(miner_evaluations) # Allocate emissions by tier: replace total_score with tier-weighted allocations diff --git a/gittensor/validator/evaluation/scoring.py b/gittensor/validator/evaluation/scoring.py index af03ab49..fb313ffe 100644 --- a/gittensor/validator/evaluation/scoring.py +++ b/gittensor/validator/evaluation/scoring.py @@ -3,7 +3,7 @@ import math from datetime import datetime, timezone -from typing import Dict, Optional +from typing import Dict, Optional, Tuple import bittensor as bt @@ -20,13 +20,16 @@ MAX_OPEN_PR_THRESHOLD, MIN_TOKEN_SCORE_FOR_BASE_SCORE, OPEN_PR_THRESHOLD_TOKEN_SCORE, + PIONEER_DIVIDEND_MAX_RATIO, + PIONEER_DIVIDEND_RATE_1ST, + PIONEER_DIVIDEND_RATE_2ND, + PIONEER_DIVIDEND_RATE_REST, SECONDS_PER_DAY, SECONDS_PER_HOUR, TIME_DECAY_GRACE_PERIOD_HOURS, TIME_DECAY_MIN_MULTIPLIER, TIME_DECAY_SIGMOID_MIDPOINT, TIME_DECAY_SIGMOID_STEEPNESS_SCALAR, - UNIQUE_PR_BOOST, ) from gittensor.utils.github_api_tools import ( FileContentPair, @@ -226,27 +229,6 @@ def calculate_pr_multipliers( pr.credibility_multiplier = 1.0 -def count_repository_contributors(miner_evaluations: Dict[int, MinerEvaluation]) -> Dict[str, int]: - """ - Count how many miners contribute to each repository and log statistics. - - Returns: - Dict[str, int]: Dictionary mapping repository names to contributor counts - """ - repo_counts: Dict[str, int] = {} - - for evaluation in miner_evaluations.values(): - for repo in evaluation.unique_repos_contributed_to: - repo_counts[repo] = repo_counts.get(repo, 0) + 1 - - if repo_counts: - bt.logging.info(f'Repository contribution counts: {len(repo_counts)} total repositories') - for repo, count in sorted(repo_counts.items(), key=lambda x: -x[1]): - bt.logging.info(f'{repo}: {count}') - - return repo_counts - - def calculate_open_pr_threshold( tier_stats: Dict[Tier, TierStats] = None, ) -> int: @@ -304,13 +286,85 @@ def calculate_time_decay_multiplier(pr: PullRequest) -> float: return max(sigmoid, TIME_DECAY_MIN_MULTIPLIER) +def calculate_pioneer_dividends( + miner_evaluations: Dict[int, MinerEvaluation], +) -> None: + """Determine pioneers and set pioneer_rank + pioneer_dividend on each PR. + + For each repo, the pioneer is the miner with the earliest merged PR that + passes the quality gate (is_pioneer_eligible). The pioneer's earliest PR + on that repo earns a dividend based on ALL followers' earned_scores (post- + multiplier), using per-position rates (30%/20%/10%). The dividend uses the + follower's multipliers, not the pioneer's — so it reflects follower quality. + + Must be called AFTER all earned_scores have been computed. + """ + # Build index: (repo, uid) -> eligible PRs, and per-repo aggregates for ordering + pr_index: Dict[str, Dict[int, list]] = {} # repo -> {uid: [eligible PRs]} + repo_contributions: Dict[str, Dict[int, Tuple[datetime, int, float]]] = {} + + for evaluation in miner_evaluations.values(): + for pr in evaluation.merged_pull_requests: + if not pr.is_pioneer_eligible(): + continue + repo = pr.repository_full_name + pr_index.setdefault(repo, {}).setdefault(pr.uid, []).append(pr) + + current = repo_contributions.setdefault(repo, {}).get(pr.uid) + if current is None: + repo_contributions[repo][pr.uid] = (pr.merged_at, pr.number, pr.earned_score) + else: + earliest_at, earliest_num, total_score = current + new_total = total_score + pr.earned_score + if pr.merged_at < earliest_at or (pr.merged_at == earliest_at and pr.number < earliest_num): + repo_contributions[repo][pr.uid] = (pr.merged_at, pr.number, new_total) + else: + repo_contributions[repo][pr.uid] = (earliest_at, earliest_num, new_total) + + # For each repo: rank contributors, calculate dividend, apply to pioneer PR + for repo, uid_entries in repo_contributions.items(): + sorted_uids = sorted(uid_entries.items(), key=lambda x: (x[1][0], x[1][1])) + + # Set pioneer_rank via index lookup (no full evaluation scan) + for rank_pos, (uid, _) in enumerate(sorted_uids): + for pr in pr_index[repo][uid]: + pr.pioneer_rank = rank_pos + 1 + + # Calculate dividend from followers' earned_scores + dividend = 0.0 + for pos, (_, entry) in enumerate(sorted_uids[1:]): + follower_earned = entry[2] + if pos == 0: + dividend += follower_earned * PIONEER_DIVIDEND_RATE_1ST + elif pos == 1: + dividend += follower_earned * PIONEER_DIVIDEND_RATE_2ND + else: + dividend += follower_earned * PIONEER_DIVIDEND_RATE_REST + + if dividend <= 0: + continue + + # Find pioneer's earliest PR via index and apply capped dividend + pioneer_uid = sorted_uids[0][0] + pioneer_pr_number = sorted_uids[0][1][1] + pioneer_pr = next(pr for pr in pr_index[repo][pioneer_uid] if pr.number == pioneer_pr_number) + max_dividend = pioneer_pr.earned_score * PIONEER_DIVIDEND_MAX_RATIO + capped = min(dividend, max_dividend) + pioneer_pr.pioneer_dividend = round(capped, 2) + pioneer_pr.earned_score += pioneer_pr.pioneer_dividend + + cap_note = f' (capped from {dividend:.2f})' if capped < dividend else '' + bt.logging.info( + f'Pioneer dividend | repo={repo} pioneer=uid {pioneer_uid} ' + f'followers={len(sorted_uids) - 1} dividend={capped:.2f}{cap_note}' + ) + + def finalize_miner_scores(miner_evaluations: Dict[int, MinerEvaluation]) -> None: - """Finalize all miner scores: apply uniqueness multipliers, calculate totals, and deduct collateral.""" + """Finalize all miner scores: compute earned_scores, then apply pioneer dividends, then collateral.""" bt.logging.info('**Finalizing miner scores**') - repo_counts = count_repository_contributors(miner_evaluations) - total_contributing_miners = sum(1 for ev in miner_evaluations.values() if ev.unique_repos_contributed_to) - + # Phase 1: Compute all earned_scores (base × multipliers) for every miner for uid, evaluation in miner_evaluations.items(): if not evaluation: continue @@ -348,10 +402,6 @@ def finalize_miner_scores(miner_evaluations: Dict[int, MinerEvaluation]) -> None # Process merged PRs for pr in evaluation.merged_pull_requests: - pr.repository_uniqueness_multiplier = calculate_uniqueness_multiplier( - pr.repository_full_name, repo_counts, total_contributing_miners - ) - # Apply spam multiplier (calculated once per miner based on unlocked tiers) pr.open_pr_spam_multiplier = spam_multiplier @@ -364,9 +414,6 @@ def finalize_miner_scores(miner_evaluations: Dict[int, MinerEvaluation]) -> None pr.credibility_multiplier = round(credibility**tier_config.credibility_scalar, 2) pr.calculate_final_earned_score() - evaluation.base_total_score += pr.base_score - evaluation.total_score += pr.earned_score - evaluation.total_nodes_scored += pr.total_nodes_scored # Aggregate token scoring breakdown evaluation.total_token_score += pr.token_score @@ -375,6 +422,25 @@ def finalize_miner_scores(miner_evaluations: Dict[int, MinerEvaluation]) -> None evaluation.total_leaf_count += pr.leaf_count evaluation.total_leaf_score += pr.leaf_score + # Phase 2: Calculate pioneer dividends from follower earned_scores + # Must happen after Phase 1 so all earned_scores are available + calculate_pioneer_dividends(miner_evaluations) + + # Phase 3: Aggregate totals (including dividends), collateral, tier stats, logging + for uid, evaluation in miner_evaluations.items(): + if not evaluation: + continue + + has_contributions = len(evaluation.merged_pull_requests) > 0 or len(evaluation.closed_pull_requests) > 0 + if not has_contributions: + continue + + # Aggregate scores (earned_score now includes pioneer_dividend from Phase 2) + for pr in evaluation.merged_pull_requests: + evaluation.base_total_score += pr.base_score + evaluation.total_score += pr.earned_score + evaluation.total_nodes_scored += pr.total_nodes_scored + # Apply collateral deduction (0 - 0 = 0 for empty miners) earned_score = evaluation.total_score evaluation.total_score = max(0.0, earned_score - evaluation.total_collateral_score) @@ -431,17 +497,6 @@ def finalize_miner_scores(miner_evaluations: Dict[int, MinerEvaluation]) -> None bt.logging.info('Finalization complete.') -def calculate_uniqueness_multiplier( - repo_full_name: str, repo_counts: Dict[str, int], total_contributing_miners: int -) -> float: - """Calculate repository uniqueness multiplier based on how many miners contribute to a repo.""" - if total_contributing_miners == 0: - return 1.0 - repo_count = repo_counts.get(repo_full_name, 0) - uniqueness_score = (total_contributing_miners - repo_count + 1) / total_contributing_miners - return 1.0 + (uniqueness_score * UNIQUE_PR_BOOST) - - def calculate_issue_multiplier(pr: PullRequest) -> float: """ Calculate PR score multiplier based on the first valid linked issue's age. diff --git a/gittensor/validator/storage/queries.py b/gittensor/validator/storage/queries.py index db40b5e5..777e087a 100644 --- a/gittensor/validator/storage/queries.py +++ b/gittensor/validator/storage/queries.py @@ -37,7 +37,7 @@ number, repository_full_name, uid, hotkey, github_id, title, author_login, merged_at, pr_created_at, pr_state, repo_weight_multiplier, base_score, issue_multiplier, - open_pr_spam_multiplier, repository_uniqueness_multiplier, time_decay_multiplier, + open_pr_spam_multiplier, pioneer_dividend, pioneer_rank, time_decay_multiplier, credibility_multiplier, raw_credibility, credibility_scalar, earned_score, collateral_score, additions, deletions, commits, total_nodes_scored, @@ -56,7 +56,8 @@ base_score = EXCLUDED.base_score, issue_multiplier = EXCLUDED.issue_multiplier, open_pr_spam_multiplier = EXCLUDED.open_pr_spam_multiplier, - repository_uniqueness_multiplier = EXCLUDED.repository_uniqueness_multiplier, + pioneer_dividend = EXCLUDED.pioneer_dividend, + pioneer_rank = EXCLUDED.pioneer_rank, time_decay_multiplier = EXCLUDED.time_decay_multiplier, credibility_multiplier = EXCLUDED.credibility_multiplier, raw_credibility = EXCLUDED.raw_credibility, diff --git a/gittensor/validator/storage/repository.py b/gittensor/validator/storage/repository.py index 799b9f72..da358047 100644 --- a/gittensor/validator/storage/repository.py +++ b/gittensor/validator/storage/repository.py @@ -166,7 +166,8 @@ def store_pull_requests_bulk(self, pull_requests: List[PullRequest]) -> int: pr.base_score, pr.issue_multiplier, pr.open_pr_spam_multiplier, - pr.repository_uniqueness_multiplier, + pr.pioneer_dividend, + pr.pioneer_rank, pr.time_decay_multiplier, pr.credibility_multiplier, pr.raw_credibility, diff --git a/gittensor/validator/weights/master_repositories.json b/gittensor/validator/weights/master_repositories.json index 6887af61..4b3f5333 100644 --- a/gittensor/validator/weights/master_repositories.json +++ b/gittensor/validator/weights/master_repositories.json @@ -51,11 +51,7 @@ "tier": "Gold", "weight": 20.88 }, - "AffineIO/affine-cortex": { - "tier": "Gold", - "weight": 22.21 - }, - "AffineIO/affinetes": { + "AffineFoundation/liveweb-arena": { "tier": "Gold", "weight": 20.88 }, @@ -494,16 +490,17 @@ "weight": 0.29 }, "autoppia/autoppia_iwa": { + "additional_acceptable_branches": ["contribution/*"], "tier": "Silver", "weight": 4.25 }, "autoppia/autoppia_web_agents_subnet": { - "additional_acceptable_branches": ["dev"], + "additional_acceptable_branches": ["dev", "dev-gittensor"], "tier": "Silver", "weight": 6.46 }, "autoppia/autoppia_webs_demo": { - "additional_acceptable_branches": ["feature/*"], + "additional_acceptable_branches": ["feature/*", "fix/*"], "tier": "Silver", "weight": 4.25 }, @@ -1124,6 +1121,10 @@ "tier": "Bronze", "weight": 0.24 }, + "D4Vinci/Scrapling": { + "weight": 0.26, + "tier": "Bronze" + }, "danielmiessler/SecLists": { "tier": "Bronze", "weight": 0.35 @@ -1527,7 +1528,7 @@ }, "entrius/gittensor-ui": { "tier": "Silver", - "weight": 1.01 + "weight": 10 }, "entrius/venth": { "tier": "Silver", @@ -3785,6 +3786,10 @@ "tier": "Bronze", "weight": 0.29 }, + "openclaw/openclaw": { + "weight": 38.92, + "tier": "Gold" + }, "opencv/opencv": { "tier": "Silver", "weight": 3.79 @@ -4262,6 +4267,7 @@ "weight": 0.19 }, "pulumi/pulumi": { + "inactive_at": "2026-03-04T17:03:48.522Z", "tier": "Silver", "weight": 3.73 }, @@ -4471,6 +4477,7 @@ "weight": 0.22 }, "RedTeamSubnet/RedTeam": { + "additional_acceptable_branches": ["dev"], "tier": "Silver", "weight": 4.4 }, @@ -5034,6 +5041,7 @@ "weight": 0.17 }, "taofu-labs/tpn-subnet": { + "additional_acceptable_branches": ["development"], "tier": "Silver", "weight": 4.15 }, diff --git a/tests/utils/test_github_api_tools.py b/tests/utils/test_github_api_tools.py index 185202e8..5ebd7c57 100644 --- a/tests/utils/test_github_api_tools.py +++ b/tests/utils/test_github_api_tools.py @@ -914,6 +914,232 @@ def _make_graphql_response(pr_nodes): return mock_response +_escape_graphql_expression = github_api_tools._escape_graphql_expression +_MAX_FILES_PER_GRAPHQL_BATCH = github_api_tools._MAX_FILES_PER_GRAPHQL_BATCH +fetch_file_contents_batch = github_api_tools.fetch_file_contents_batch +fetch_file_contents_with_base = github_api_tools.fetch_file_contents_with_base +FileContentPair = github_api_tools.FileContentPair + + +# ============================================================================ +# GraphQL Expression Escaping Tests +# ============================================================================ + + +class TestEscapeGraphQLExpression: + """Tests for _escape_graphql_expression helper.""" + + def test_plain_path_unchanged(self): + """Normal file paths pass through unmodified.""" + assert _escape_graphql_expression('abc123:src/main.py') == 'abc123:src/main.py' + + def test_double_quotes_escaped(self): + """Double quotes in paths are escaped to prevent query breakage.""" + assert _escape_graphql_expression('abc123:path/with"quote.py') == 'abc123:path/with\\"quote.py' + + def test_backslash_escaped(self): + """Backslashes in paths are escaped.""" + assert _escape_graphql_expression('abc123:path\\file.py') == 'abc123:path\\\\file.py' + + def test_both_quote_and_backslash(self): + """Paths with both special characters are fully escaped.""" + result = _escape_graphql_expression('abc123:dir\\"file.py') + assert result == 'abc123:dir\\\\\\"file.py' + + def test_empty_string(self): + """Empty string returns empty string.""" + assert _escape_graphql_expression('') == '' + + +# ============================================================================ +# File Contents Batch Tests +# ============================================================================ + + +class TestFetchFileContentsBatch: + """Tests for fetch_file_contents_batch batching and escaping.""" + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_empty_paths_returns_empty(self, mock_graphql): + """Empty file list returns empty dict without any API call.""" + result = fetch_file_contents_batch('owner', 'repo', 'abc123', [], 'token') + assert result == {} + mock_graphql.assert_not_called() + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_small_batch_single_request(self, mock_graphql): + """Few files are fetched in a single GraphQL request.""" + mock_graphql.return_value = { + 'data': { + 'repository': { + 'file0': {'text': 'content_a', 'byteSize': 9, 'isBinary': False}, + 'file1': {'text': 'content_b', 'byteSize': 9, 'isBinary': False}, + } + } + } + + result = fetch_file_contents_batch('owner', 'repo', 'abc123', ['a.py', 'b.py'], 'token') + + assert mock_graphql.call_count == 1 + assert result == {'a.py': 'content_a', 'b.py': 'content_b'} + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_large_batch_split_into_multiple_requests(self, mock_graphql): + """More files than _MAX_FILES_PER_GRAPHQL_BATCH triggers multiple requests.""" + total_files = _MAX_FILES_PER_GRAPHQL_BATCH + 10 + paths = [f'file_{i}.py' for i in range(total_files)] + + def side_effect(query, variables, token): + # Count how many file aliases are in the query + count = query.count('... on Blob') + repo_data = {} + for i in range(count): + repo_data[f'file{i}'] = {'text': f'content', 'byteSize': 7, 'isBinary': False} + return {'data': {'repository': repo_data}} + + mock_graphql.side_effect = side_effect + + result = fetch_file_contents_batch('owner', 'repo', 'abc123', paths, 'token') + + assert mock_graphql.call_count == 2, 'Should split into 2 batches' + assert len(result) == total_files + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_special_characters_in_path_escaped(self, mock_graphql): + """File paths with special characters are properly escaped in the query.""" + mock_graphql.return_value = { + 'data': { + 'repository': { + 'file0': {'text': 'ok', 'byteSize': 2, 'isBinary': False}, + } + } + } + + fetch_file_contents_batch('owner', 'repo', 'abc123', ['path/with"quote.py'], 'token') + + query_arg = mock_graphql.call_args[0][0] + assert '\\"' in query_arg, 'Double quotes in path should be escaped in GraphQL query' + assert 'with"quote' not in query_arg, 'Unescaped double quote should not appear' + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_failed_batch_returns_none_for_affected_files(self, mock_graphql): + """Failed GraphQL request returns None for all files in that batch.""" + mock_graphql.return_value = None + + result = fetch_file_contents_batch('owner', 'repo', 'abc123', ['a.py', 'b.py'], 'token') + + assert result == {'a.py': None, 'b.py': None} + + +# ============================================================================ +# File Contents With Base Batch Tests +# ============================================================================ + + +class TestFetchFileContentsWithBase: + """Tests for fetch_file_contents_with_base batching and escaping.""" + + @staticmethod + def _make_file_change(filename, status='modified', previous_filename=None): + """Create a mock FileChange object.""" + fc = Mock() + fc.filename = filename + fc.status = status + fc.previous_filename = previous_filename + return fc + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_empty_file_changes_returns_empty(self, mock_graphql): + """Empty file changes returns empty dict.""" + result = fetch_file_contents_with_base('owner', 'repo', 'base', 'head', [], 'token') + assert result == {} + mock_graphql.assert_not_called() + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_escapes_special_characters_in_paths(self, mock_graphql): + """File paths with special characters are escaped in both base and head expressions.""" + fc = self._make_file_change('path/with"quote.py') + mock_graphql.return_value = { + 'data': { + 'repository': { + 'base0': {'text': 'old', 'byteSize': 3, 'isBinary': False}, + 'head0': {'text': 'new', 'byteSize': 3, 'isBinary': False}, + } + } + } + + fetch_file_contents_with_base('owner', 'repo', 'base_sha', 'head_sha', [fc], 'token') + + query_arg = mock_graphql.call_args[0][0] + assert 'with\\"quote' in query_arg, 'Double quotes should be escaped' + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_large_pr_batched(self, mock_graphql): + """PRs with many files are split into batches.""" + total_files = _MAX_FILES_PER_GRAPHQL_BATCH + 5 + file_changes = [self._make_file_change(f'file_{i}.py') for i in range(total_files)] + + def side_effect(query, variables, token): + repo_data = {} + # Count base/head aliases in the query + for prefix in ('base', 'head'): + i = 0 + while f'{prefix}{i}:' in query: + repo_data[f'{prefix}{i}'] = {'text': 'content', 'byteSize': 7, 'isBinary': False} + i += 1 + return {'data': {'repository': repo_data}} + + mock_graphql.side_effect = side_effect + + result = fetch_file_contents_with_base( + 'owner', 'repo', 'base_sha', 'head_sha', file_changes, 'token' + ) + + assert mock_graphql.call_count == 2, 'Should split into 2 batches' + assert len(result) == total_files + for fc in file_changes: + assert fc.filename in result + assert isinstance(result[fc.filename], FileContentPair) + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_added_file_no_base_fetch(self, mock_graphql): + """Added files should not fetch base content.""" + fc = self._make_file_change('new_file.py', status='added') + mock_graphql.return_value = { + 'data': { + 'repository': { + 'head0': {'text': 'new content', 'byteSize': 11, 'isBinary': False}, + } + } + } + + result = fetch_file_contents_with_base('owner', 'repo', 'base_sha', 'head_sha', [fc], 'token') + + assert result['new_file.py'].old_content is None + assert result['new_file.py'].new_content == 'new content' + query_arg = mock_graphql.call_args[0][0] + assert 'base0' not in query_arg, 'Should not fetch base for added file' + + @patch('gittensor.utils.github_api_tools.execute_graphql_query') + def test_removed_file_no_head_fetch(self, mock_graphql): + """Removed files should not fetch head content.""" + fc = self._make_file_change('deleted.py', status='removed') + mock_graphql.return_value = { + 'data': { + 'repository': { + 'base0': {'text': 'old content', 'byteSize': 11, 'isBinary': False}, + } + } + } + + result = fetch_file_contents_with_base('owner', 'repo', 'base_sha', 'head_sha', [fc], 'token') + + assert result['deleted.py'].old_content == 'old content' + assert result['deleted.py'].new_content is None + query_arg = mock_graphql.call_args[0][0] + assert 'head0' not in query_arg, 'Should not fetch head for removed file' + + class TestLoadMinersPrsErrorResilience: """Test that a single bad PR doesn't abort fetching for the entire miner.""" diff --git a/tests/validator/conftest.py b/tests/validator/conftest.py index c6b70b54..5201a886 100644 --- a/tests/validator/conftest.py +++ b/tests/validator/conftest.py @@ -88,6 +88,8 @@ def create( repo: Optional[str] = None, unique_repo: bool = False, token_score: Optional[float] = None, # Auto-calculated from tier if None + uid: int = 0, + merged_at: Optional[datetime] = None, ) -> PullRequest: """Create a mock PullRequest with the given parameters. @@ -110,15 +112,18 @@ def create( if repo is None: repo = self._next_repo() if unique_repo else 'test/repo' + if merged_at is None: + merged_at = datetime.now(timezone.utc) if state == PRState.MERGED else None + return PullRequest( number=number, repository_full_name=repo, - uid=0, - hotkey='test_hotkey', - github_id='12345', + uid=uid, + hotkey=f'hotkey_{uid}', + github_id=str(uid), title=f'Test PR #{number}', - author_login='testuser', - merged_at=datetime.now(timezone.utc) if state == PRState.MERGED else None, + author_login=f'user_{uid}', + merged_at=merged_at, created_at=datetime.now(timezone.utc), pr_state=state, repository_tier_configuration=tier, diff --git a/tests/validator/test_pioneer_dividend.py b/tests/validator/test_pioneer_dividend.py new file mode 100644 index 00000000..01517258 --- /dev/null +++ b/tests/validator/test_pioneer_dividend.py @@ -0,0 +1,567 @@ +# The MIT License (MIT) +# Copyright © 2025 Entrius + +"""Tests for pioneer dividend mechanism.""" + +from datetime import datetime, timedelta, timezone + +import pytest + +from gittensor.classes import MinerEvaluation, PRState +from gittensor.constants import ( + MIN_TOKEN_SCORE_FOR_BASE_SCORE, + PIONEER_DIVIDEND_MAX_RATIO, + PIONEER_DIVIDEND_RATE_1ST, + PIONEER_DIVIDEND_RATE_2ND, + PIONEER_DIVIDEND_RATE_REST, +) +from gittensor.validator.configurations.tier_config import TIERS, Tier +from gittensor.validator.evaluation.scoring import ( + calculate_pioneer_dividends, + finalize_miner_scores, +) +from tests.validator.conftest import PRBuilder + +# ========================================================================== +# Fixtures +# ========================================================================== + +@pytest.fixture +def builder(): + return PRBuilder() + + +@pytest.fixture +def bronze(): + return TIERS[Tier.BRONZE] + + +# ========================================================================== +# TestPioneerEligibility +# ========================================================================== + +class TestPioneerEligibility: + """Tests for PullRequest.is_pioneer_eligible instance method.""" + + def test_eligible_when_merged_with_tier_and_token_score(self, builder, bronze): + pr = builder.create(state=PRState.MERGED, tier=bronze, uid=1) + assert pr.is_pioneer_eligible() + + def test_ineligible_without_tier(self, builder, bronze): + pr = builder.create(state=PRState.MERGED, tier=bronze, uid=1) + pr.repository_tier_configuration = None + assert not pr.is_pioneer_eligible() + + def test_ineligible_without_merge_timestamp(self, builder, bronze): + pr = builder.create(state=PRState.MERGED, tier=bronze, uid=1) + pr.merged_at = None + assert not pr.is_pioneer_eligible() + + def test_ineligible_below_token_score_threshold(self, builder, bronze): + pr = builder.create( + state=PRState.MERGED, tier=bronze, uid=1, + token_score=MIN_TOKEN_SCORE_FOR_BASE_SCORE - 1, + ) + assert not pr.is_pioneer_eligible() + + def test_eligible_at_exact_token_score_threshold(self, builder, bronze): + pr = builder.create( + state=PRState.MERGED, tier=bronze, uid=1, + token_score=MIN_TOKEN_SCORE_FOR_BASE_SCORE, + ) + assert pr.is_pioneer_eligible() + + +# ========================================================================== +# TestCalculatePioneerDividends +# ========================================================================== + +class TestCalculatePioneerDividends: + """Tests for calculate_pioneer_dividends function.""" + + def test_single_miner_gets_no_dividend(self, builder, bronze): + """A lone pioneer with no followers earns zero dividend.""" + now = datetime.now(timezone.utc) + pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now, earned_score=0.0, collateral_score=0.0, + ) + pr.base_score = 30.0 + evals = {1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[pr])} + calculate_pioneer_dividends(evals) + assert pr.pioneer_rank == 1 + assert pr.pioneer_dividend == 0.0 + + def test_pioneer_earns_dividend_from_follower(self, builder, bronze): + """Pioneer earns 30% of first follower's earned_score.""" + now = datetime.now(timezone.utc) + pioneer_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now - timedelta(days=5), earned_score=0.0, collateral_score=0.0, + ) + follower_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=2, + merged_at=now, earned_score=0.0, collateral_score=0.0, + ) + pioneer_pr.base_score = 30.0 + follower_pr.base_score = 20.0 + # Simulate earned_scores (all multipliers = 1.0) + pioneer_pr.earned_score = 30.0 + follower_pr.earned_score = 20.0 + evals = { + 1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[pioneer_pr]), + 2: MinerEvaluation(uid=2, hotkey='h2', merged_pull_requests=[follower_pr]), + } + calculate_pioneer_dividends(evals) + + expected_dividend = round(20.0 * PIONEER_DIVIDEND_RATE_1ST, 2) # 20 * 0.30 = 6.0 + assert pioneer_pr.pioneer_rank == 1 + assert pioneer_pr.pioneer_dividend == expected_dividend + assert follower_pr.pioneer_rank == 2 + assert follower_pr.pioneer_dividend == 0.0 + + def test_dividend_from_multiple_followers(self, builder, bronze): + """Pioneer dividend uses per-position rates: 30%, 20%, 10%, 10%.""" + now = datetime.now(timezone.utc) + pioneer_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now - timedelta(days=10), earned_score=0.0, collateral_score=0.0, + ) + pioneer_pr.base_score = 30.0 + pioneer_pr.earned_score = 30.0 + follower_prs = [] + for uid in range(2, 6): # 4 followers + pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=uid, + merged_at=now - timedelta(days=10 - uid), earned_score=0.0, collateral_score=0.0, + ) + pr.base_score = 10.0 + pr.earned_score = 10.0 + follower_prs.append(pr) + evals = {1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[pioneer_pr])} + for pr in follower_prs: + evals[pr.uid] = MinerEvaluation(uid=pr.uid, hotkey=f'h{pr.uid}', merged_pull_requests=[pr]) + calculate_pioneer_dividends(evals) + + # 1st: 10*0.30=3.0, 2nd: 10*0.20=2.0, 3rd: 10*0.10=1.0, 4th: 10*0.10=1.0 + expected_dividend = round( + 10.0 * PIONEER_DIVIDEND_RATE_1ST + + 10.0 * PIONEER_DIVIDEND_RATE_2ND + + 10.0 * PIONEER_DIVIDEND_RATE_REST + + 10.0 * PIONEER_DIVIDEND_RATE_REST, 2 + ) + assert pioneer_pr.pioneer_dividend == expected_dividend + + def test_dividend_grows_with_many_followers(self, builder, bronze): + """Dividend scales with followers but is capped at PIONEER_DIVIDEND_MAX_RATIO × own earned.""" + now = datetime.now(timezone.utc) + pioneer_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now - timedelta(days=30), earned_score=0.0, collateral_score=0.0, + ) + pioneer_pr.base_score = 30.0 + pioneer_pr.earned_score = 30.0 + + follower_prs = [] + for uid in range(2, 12): # 10 followers + pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=uid, + merged_at=now - timedelta(days=30 - uid), earned_score=0.0, collateral_score=0.0, + ) + pr.base_score = 30.0 + pr.earned_score = 30.0 + follower_prs.append(pr) + evals = {1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[pioneer_pr])} + for pr in follower_prs: + evals[pr.uid] = MinerEvaluation(uid=pr.uid, hotkey=f'h{pr.uid}', merged_pull_requests=[pr]) + calculate_pioneer_dividends(evals) + + # Raw: 30*0.30=9 + 30*0.20=6 + 8*30*0.10=24 → 39.0 + # Cap: min(39.0, 30.0 * 1.0) = 30.0 + max_dividend = round(30.0 * PIONEER_DIVIDEND_MAX_RATIO, 2) + assert pioneer_pr.pioneer_dividend == max_dividend + assert pioneer_pr.earned_score == 30.0 + max_dividend + + def test_dividend_cap_at_max_ratio(self, builder, bronze): + """Dividend is capped at PIONEER_DIVIDEND_MAX_RATIO × pioneer's own earned_score.""" + now = datetime.now(timezone.utc) + pioneer_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now - timedelta(days=10), earned_score=0.0, collateral_score=0.0, + ) + pioneer_pr.base_score = 10.0 + pioneer_pr.earned_score = 10.0 + # 1 follower with much higher earned_score + follower_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=2, + merged_at=now, earned_score=0.0, collateral_score=0.0, + ) + follower_pr.base_score = 100.0 + follower_pr.earned_score = 100.0 + evals = { + 1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[pioneer_pr]), + 2: MinerEvaluation(uid=2, hotkey='h2', merged_pull_requests=[follower_pr]), + } + calculate_pioneer_dividends(evals) + + # Raw: 100*0.30 = 30.0, Cap: min(30.0, 10.0*1.0) = 10.0 + assert pioneer_pr.pioneer_dividend == round(10.0 * PIONEER_DIVIDEND_MAX_RATIO, 2) + assert pioneer_pr.earned_score == 10.0 + pioneer_pr.pioneer_dividend + + def test_multiple_follower_prs_summed(self, builder, bronze): + """A follower with multiple PRs on the same repo contributes all earned_scores to dividend.""" + now = datetime.now(timezone.utc) + pioneer_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now - timedelta(days=10), earned_score=0.0, collateral_score=0.0, + ) + pioneer_pr.base_score = 30.0 + pioneer_pr.earned_score = 30.0 + # Follower has 3 PRs on the same repo + f_pr1 = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=2, + merged_at=now - timedelta(days=5), earned_score=0.0, collateral_score=0.0, + ) + f_pr2 = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=2, + merged_at=now - timedelta(days=3), earned_score=0.0, collateral_score=0.0, + ) + f_pr3 = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=2, + merged_at=now - timedelta(days=1), earned_score=0.0, collateral_score=0.0, + ) + f_pr1.base_score = 5.0 + f_pr1.earned_score = 5.0 + f_pr2.base_score = 5.0 + f_pr2.earned_score = 5.0 + f_pr3.base_score = 5.0 + f_pr3.earned_score = 5.0 + evals = { + 1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[pioneer_pr]), + 2: MinerEvaluation(uid=2, hotkey='h2', merged_pull_requests=[f_pr1, f_pr2, f_pr3]), + } + calculate_pioneer_dividends(evals) + + # Single follower (position 0 → 30% rate), sum of ALL their earned_scores: (5+5+5) * 0.30 + expected = round((5.0 + 5.0 + 5.0) * PIONEER_DIVIDEND_RATE_1ST, 2) + assert pioneer_pr.pioneer_dividend == expected + + def test_repos_are_independent(self, builder, bronze): + """Pioneer status and dividends are calculated per repo independently.""" + now = datetime.now(timezone.utc) + # UID 1 pioneers repo-a, UID 2 pioneers repo-b + pr1a = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now - timedelta(days=10), earned_score=0.0, collateral_score=0.0, + ) + pr2a = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=2, + merged_at=now - timedelta(days=5), earned_score=0.0, collateral_score=0.0, + ) + pr2b = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-b', uid=2, + merged_at=now - timedelta(days=10), earned_score=0.0, collateral_score=0.0, + ) + pr1b = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-b', uid=1, + merged_at=now - timedelta(days=5), earned_score=0.0, collateral_score=0.0, + ) + for pr in [pr1a, pr2a, pr2b, pr1b]: + pr.base_score = 30.0 + pr.earned_score = 30.0 + evals = { + 1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[pr1a, pr1b]), + 2: MinerEvaluation(uid=2, hotkey='h2', merged_pull_requests=[pr2a, pr2b]), + } + calculate_pioneer_dividends(evals) + + # UID 1 is pioneer on repo-a + assert pr1a.pioneer_rank == 1 + assert pr1a.pioneer_dividend == round(30.0 * PIONEER_DIVIDEND_RATE_1ST, 2) + # UID 2 is pioneer on repo-b + assert pr2b.pioneer_rank == 1 + assert pr2b.pioneer_dividend == round(30.0 * PIONEER_DIVIDEND_RATE_1ST, 2) + + def test_low_quality_pr_excluded_from_pioneer(self, builder, bronze): + """Low token_score PR cannot be pioneer; quality follower becomes pioneer.""" + now = datetime.now(timezone.utc) + snipe_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now - timedelta(days=10), + token_score=MIN_TOKEN_SCORE_FOR_BASE_SCORE - 1, + earned_score=0.0, collateral_score=0.0, + ) + snipe_pr.base_score = 5.0 + good_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=2, + merged_at=now - timedelta(days=5), earned_score=0.0, collateral_score=0.0, + ) + good_pr.base_score = 30.0 + evals = { + 1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[snipe_pr]), + 2: MinerEvaluation(uid=2, hotkey='h2', merged_pull_requests=[good_pr]), + } + calculate_pioneer_dividends(evals) + + # Snipe PR is not eligible, so it keeps default pioneer_rank=0 + assert snipe_pr.pioneer_rank == 0 + assert snipe_pr.pioneer_dividend == 0.0 + # Good PR becomes the solo pioneer (no followers -> no dividend) + assert good_pr.pioneer_rank == 1 + assert good_pr.pioneer_dividend == 0.0 + + def test_ineligible_pr_does_not_receive_rank(self, builder, bronze): + """Ineligible PR from same miner on same repo must not get pioneer_rank.""" + now = datetime.now(timezone.utc) + eligible_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now - timedelta(days=10), earned_score=0.0, collateral_score=0.0, + ) + eligible_pr.base_score = 30.0 + eligible_pr.earned_score = 30.0 + ineligible_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now - timedelta(days=5), + token_score=MIN_TOKEN_SCORE_FOR_BASE_SCORE - 1, + earned_score=0.0, collateral_score=0.0, + ) + ineligible_pr.base_score = 2.0 + ineligible_pr.earned_score = 2.0 + evals = { + 1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[eligible_pr, ineligible_pr]), + } + calculate_pioneer_dividends(evals) + + assert eligible_pr.pioneer_rank == 1 + assert ineligible_pr.pioneer_rank == 0 # must stay default + + def test_deterministic_tiebreak_by_pr_number(self, builder, bronze): + """Same merged_at timestamp: lower PR number wins pioneer status.""" + now = datetime.now(timezone.utc) + pr1 = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now, number=10, earned_score=0.0, collateral_score=0.0, + ) + pr2 = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=2, + merged_at=now, number=20, earned_score=0.0, collateral_score=0.0, + ) + pr1.base_score = 30.0 + pr2.base_score = 30.0 + evals = { + 1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[pr1]), + 2: MinerEvaluation(uid=2, hotkey='h2', merged_pull_requests=[pr2]), + } + calculate_pioneer_dividends(evals) + + assert pr1.pioneer_rank == 1 + assert pr2.pioneer_rank == 2 + + def test_only_pioneering_pr_gets_dividend(self, builder, bronze): + """Follow-up PRs by the pioneer on same repo don't get dividend.""" + now = datetime.now(timezone.utc) + pioneer_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now - timedelta(days=10), earned_score=0.0, collateral_score=0.0, + ) + followup_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now - timedelta(days=2), earned_score=0.0, collateral_score=0.0, + ) + follower_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=2, + merged_at=now, earned_score=0.0, collateral_score=0.0, + ) + pioneer_pr.base_score = 30.0 + pioneer_pr.earned_score = 30.0 + followup_pr.base_score = 25.0 + followup_pr.earned_score = 25.0 + follower_pr.base_score = 10.0 + follower_pr.earned_score = 10.0 + evals = { + 1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[pioneer_pr, followup_pr]), + 2: MinerEvaluation(uid=2, hotkey='h2', merged_pull_requests=[follower_pr]), + } + calculate_pioneer_dividends(evals) + + # Only the pioneering PR gets the dividend + assert pioneer_pr.pioneer_dividend == round(10.0 * PIONEER_DIVIDEND_RATE_1ST, 2) + assert followup_pr.pioneer_dividend == 0.0 + + def test_empty_evaluations(self, builder, bronze): + """No crash on empty evaluations.""" + evals = {} + calculate_pioneer_dividends(evals) # Should not raise + + def test_no_eligible_prs(self, builder, bronze): + """No crash when all PRs are ineligible.""" + now = datetime.now(timezone.utc) + pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now, token_score=0.0, earned_score=0.0, collateral_score=0.0, + ) + evals = {1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[pr])} + calculate_pioneer_dividends(evals) + assert pr.pioneer_rank == 0 + assert pr.pioneer_dividend == 0.0 + + +# ========================================================================== +# TestFinalizeWithDividend +# ========================================================================== + +class TestFinalizeWithDividend: + """Integration tests: pioneer dividend flows through finalize_miner_scores.""" + + def test_pioneer_dividend_additive_to_earned_score(self, builder, bronze): + """Pioneer dividend is added on top of earned_score: base × multipliers + dividend.""" + now = datetime.now(timezone.utc) + pioneer_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now - timedelta(days=5), earned_score=0.0, collateral_score=0.0, + ) + follower_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=2, + merged_at=now, earned_score=0.0, collateral_score=0.0, + ) + pioneer_pr.base_score = 30.0 + follower_pr.base_score = 30.0 + # Compute earned_scores first (base × multipliers) + pioneer_pr.calculate_final_earned_score() + follower_pr.calculate_final_earned_score() + assert pioneer_pr.earned_score == 30.0 # base × 1.0 + assert follower_pr.earned_score == 30.0 + + # Now apply dividend (uses follower earned_score) + evals = { + 1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[pioneer_pr]), + 2: MinerEvaluation(uid=2, hotkey='h2', merged_pull_requests=[follower_pr]), + } + calculate_pioneer_dividends(evals) + + # Dividend = 30% of follower's earned_score + expected_dividend = round(30.0 * PIONEER_DIVIDEND_RATE_1ST, 2) + assert pioneer_pr.pioneer_dividend == expected_dividend + # Pioneer earned_score = base_earned + dividend = 30 + 9 = 39 + assert pioneer_pr.earned_score == 30.0 + expected_dividend + assert pioneer_pr.earned_score > follower_pr.earned_score + + def test_follower_keeps_full_score(self, builder, bronze): + """Follower's score is not reduced — dividend is additive, not zero-sum.""" + now = datetime.now(timezone.utc) + # Create a solo miner scenario for baseline + solo_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/solo-repo', uid=3, + merged_at=now, earned_score=0.0, collateral_score=0.0, + ) + solo_pr.base_score = 30.0 + solo_eval = MinerEvaluation(uid=3, hotkey='h3', merged_pull_requests=[solo_pr]) + solo_eval.unique_repos_contributed_to.add('org/solo-repo') + + # Create a follower scenario + pioneer_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now - timedelta(days=5), earned_score=0.0, collateral_score=0.0, + ) + follower_pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=2, + merged_at=now, earned_score=0.0, collateral_score=0.0, + ) + pioneer_pr.base_score = 30.0 + follower_pr.base_score = 30.0 + eval1 = MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[pioneer_pr]) + eval1.unique_repos_contributed_to.add('org/repo-a') + eval2 = MinerEvaluation(uid=2, hotkey='h2', merged_pull_requests=[follower_pr]) + eval2.unique_repos_contributed_to.add('org/repo-a') + + finalize_miner_scores({1: eval1, 2: eval2, 3: solo_eval}) + + # Follower's earned_score should equal solo miner's (no penalty) + assert follower_pr.pioneer_dividend == 0.0 + + +# ========================================================================== +# TestPioneerIncentiveEvidence +# ========================================================================== + +class TestPioneerIncentiveEvidence: + """Evidence tests proving the mechanism rewards exploration over pile-on.""" + + def test_exploration_beats_pile_on(self, builder, bronze): + """5 miners piling on 1 repo: only pioneer gets dividend. Exploring avoids the crowd.""" + now = datetime.now(timezone.utc) + + # Pile-on: 5 miners on 1 repo — only 1 pioneer + builder.reset() + pile_evals = {} + for uid in range(1, 6): + pr = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/saturated', uid=uid, + merged_at=now - timedelta(days=uid), earned_score=0.0, collateral_score=0.0, + ) + pr.base_score = 30.0 + pr.earned_score = 30.0 + pile_evals[uid] = MinerEvaluation(uid=uid, hotkey=f'h{uid}', merged_pull_requests=[pr]) + calculate_pioneer_dividends(pile_evals) + pile_total_dividend = sum( + pr.pioneer_dividend for ev in pile_evals.values() for pr in ev.merged_pull_requests + ) + + # With pile-on, only pioneer gets dividend (based on follower earned_scores) + expected = round( + 30.0 * PIONEER_DIVIDEND_RATE_1ST + + 30.0 * PIONEER_DIVIDEND_RATE_2ND + + 30.0 * PIONEER_DIVIDEND_RATE_REST + + 30.0 * PIONEER_DIVIDEND_RATE_REST, 2 + ) + assert pile_total_dividend == expected + + def test_pioneer_earns_more_with_more_followers(self, builder, bronze): + """Pioneer's reward naturally grows as more miners follow — self-scaling incentive.""" + now = datetime.now(timezone.utc) + + # Scenario 1: 1 follower + builder.reset() + pr1 = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=1, + merged_at=now - timedelta(days=10), earned_score=0.0, collateral_score=0.0, + ) + pr1.base_score = 30.0 + pr1.earned_score = 30.0 + f1 = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-a', uid=2, + merged_at=now, earned_score=0.0, collateral_score=0.0, + ) + f1.base_score = 30.0 + f1.earned_score = 30.0 + evals1 = { + 1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[pr1]), + 2: MinerEvaluation(uid=2, hotkey='h2', merged_pull_requests=[f1]), + } + calculate_pioneer_dividends(evals1) + div_1_follower = pr1.pioneer_dividend + + # Scenario 2: 5 followers + builder.reset() + pr2 = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-b', uid=1, + merged_at=now - timedelta(days=10), earned_score=0.0, collateral_score=0.0, + ) + pr2.base_score = 30.0 + pr2.earned_score = 30.0 + followers = [] + for uid in range(2, 7): + f = builder.create( + state=PRState.MERGED, tier=bronze, repo='org/repo-b', uid=uid, + merged_at=now - timedelta(days=10 - uid), earned_score=0.0, collateral_score=0.0, + ) + f.base_score = 30.0 + f.earned_score = 30.0 + followers.append(f) + evals2 = {1: MinerEvaluation(uid=1, hotkey='h1', merged_pull_requests=[pr2])} + for f in followers: + evals2[f.uid] = MinerEvaluation(uid=f.uid, hotkey=f'h{f.uid}', merged_pull_requests=[f]) + calculate_pioneer_dividends(evals2) + div_5_followers = pr2.pioneer_dividend + + assert div_5_followers > div_1_follower