Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 22 additions & 7 deletions gittensor/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import bittensor as bt

from gittensor.constants import MIN_TOKEN_SCORE_FOR_BASE_SCORE
from gittensor.utils.utils import parse_repo_name
from gittensor.validator.configurations.tier_config import Tier, TierConfig, TierStats

Expand Down Expand Up @@ -156,7 +157,8 @@ class PullRequest:
base_score: float = 0.0
issue_multiplier: float = 1.0
open_pr_spam_multiplier: float = 1.0
repository_uniqueness_multiplier: float = 1.0
pioneer_dividend: float = 0.0 # Additive bonus for pioneering a repo
pioneer_rank: int = 0 # 0 = not eligible, 1 = pioneer, 2+ = follower position
time_decay_multiplier: float = 1.0
credibility_multiplier: float = 1.0
raw_credibility: float = 1.0 # Before applying ^k scalar
Expand Down Expand Up @@ -188,24 +190,37 @@ def set_file_changes(self, file_changes: List[FileChange]) -> None:
"""Set the file changes for this pull request"""
self.file_changes = file_changes

def is_pioneer_eligible(self) -> bool:
"""Check if this PR qualifies for pioneer consideration.

A PR is eligible if it is merged, has a tier configuration,
and meets the minimum token score quality gate.
"""
return (
self.repository_tier_configuration is not None
and self.merged_at is not None
and self.token_score >= MIN_TOKEN_SCORE_FOR_BASE_SCORE
)

def calculate_final_earned_score(self) -> float:
"""Combine base score with all multipliers."""
"""Combine base score with all multipliers. Pioneer dividend is added separately after."""
multipliers = {
'repo': self.repo_weight_multiplier,
'issue': self.issue_multiplier,
'spam': self.open_pr_spam_multiplier,
'unique': self.repository_uniqueness_multiplier,
'decay': self.time_decay_multiplier,
'cred': self.credibility_multiplier,
}

self.earned_score = self.base_score * prod(multipliers.values())

# Log all multipliers (credibility shows ^k format)
mult_str = ' × '.join(
f'cred={self.raw_credibility:.2f}^{self.credibility_scalar}' if k == 'cred' else f'{k}={v:.2f}'
for k, v in multipliers.items()
)
def _format_multiplier(k: str, v: float) -> str:
if k == 'cred':
return f'cred={self.raw_credibility:.2f}^{self.credibility_scalar}'
return f'{k}={v:.2f}'

mult_str = ' × '.join(_format_multiplier(k, v) for k, v in multipliers.items())
bt.logging.info(
f'├─ {self.pr_state.value} PR #{self.number} ({self.repository_full_name}) → {self.earned_score:.2f}'
)
Expand Down
11 changes: 9 additions & 2 deletions gittensor/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,16 @@
DEFAULT_MAX_CONTRIBUTION_SCORE_FOR_FULL_BONUS = 2000

# Boosts
UNIQUE_PR_BOOST = 0.74
MAX_CODE_DENSITY_MULTIPLIER = 3.0

# Pioneer dividend — rewards the first quality contributor to each repository
# Rates applied per follower position (1st follower pays most, diminishing after)
# Dividend capped at PIONEER_DIVIDEND_MAX_RATIO × pioneer's own earned_score
PIONEER_DIVIDEND_RATE_1ST = 0.30 # 1st follower: 30% of their earned_score
PIONEER_DIVIDEND_RATE_2ND = 0.20 # 2nd follower: 20% of their earned_score
PIONEER_DIVIDEND_RATE_REST = 0.10 # 3rd+ followers: 10% of their earned_score
PIONEER_DIVIDEND_MAX_RATIO = 1.0 # Cap dividend at 1× pioneer's own earned_score (max 2× total)

# Issue boosts
MAX_ISSUE_CLOSE_WINDOW_DAYS = 1
MAX_ISSUE_AGE_FOR_MAX_SCORE = 40 # days
Expand Down Expand Up @@ -112,7 +119,7 @@
# =============================================================================
# Spam & Gaming Mitigation
# =============================================================================
MAINTAINER_ASSOCIATIONS = ['OWNER', 'MEMBER', 'COLLABORATOR']
MAINTAINER_ASSOCIATIONS = ['OWNER', 'COLLABORATOR']

# Issue multiplier bonuses
MAX_ISSUE_AGE_BONUS = 0.75 # Max bonus for issue age (scales with sqrt of days open)
Expand Down
177 changes: 113 additions & 64 deletions gittensor/utils/github_api_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,28 @@ def check_github_issue_closed(repo: str, issue_number: int, token: str) -> Optio
return None


def _escape_graphql_expression(expression: str) -> str:
"""Escape special characters in a GraphQL string literal.

File paths containing backslashes or double quotes break GraphQL query
syntax when interpolated directly. This escapes them so the query remains
valid.

Args:
expression: Raw string to embed inside a GraphQL double-quoted literal.

Returns:
Escaped string safe for embedding in GraphQL queries.
"""
return expression.replace('\\', '\\\\').replace('"', '\\"')


# Maximum files per GraphQL batch request. GitHub's GraphQL API has query
# complexity limits; batching too many object lookups in a single request can
# cause a 502/complexity error and lose all results.
_MAX_FILES_PER_GRAPHQL_BATCH = 50


def fetch_file_contents_batch(
repo_owner: str,
repo_name: str,
Expand All @@ -988,9 +1010,10 @@ def fetch_file_contents_batch(
token: str,
) -> Dict[str, Optional[str]]:
"""
Fetch multiple file contents from a repository in a single GraphQL request.
Fetch multiple file contents from a repository in batched GraphQL requests.

Uses retry logic with exponential backoff for reliability.
Uses retry logic with exponential backoff for reliability. Batches files
to avoid exceeding GitHub's GraphQL complexity limits.

Args:
repo_owner: Repository owner
Expand All @@ -1005,47 +1028,53 @@ def fetch_file_contents_batch(
if not file_paths:
return {}

# Build GraphQL query with aliased file fields
file_fields = []
for i, path in enumerate(file_paths):
expression = f'{head_sha}:{path}'
file_fields.append(
f'file{i}: object(expression: "{expression}") {{ ... on Blob {{ text byteSize isBinary }} }}'
)
results: Dict[str, Optional[str]] = {}

query = f"""
query($owner: String!, $name: String!) {{
repository(owner: $owner, name: $name) {{
{' '.join(file_fields)}
# Process files in batches to avoid exceeding GraphQL complexity limits
for batch_start in range(0, len(file_paths), _MAX_FILES_PER_GRAPHQL_BATCH):
batch_paths = file_paths[batch_start : batch_start + _MAX_FILES_PER_GRAPHQL_BATCH]

# Build GraphQL query with aliased file fields
file_fields = []
for i, path in enumerate(batch_paths):
expression = _escape_graphql_expression(f'{head_sha}:{path}')
file_fields.append(
f'file{i}: object(expression: "{expression}") {{ ... on Blob {{ text byteSize isBinary }} }}'
)

query = f"""
query($owner: String!, $name: String!) {{
repository(owner: $owner, name: $name) {{
{' '.join(file_fields)}
}}
}}
}}
"""
"""

variables = {'owner': repo_owner, 'name': repo_name}
variables = {'owner': repo_owner, 'name': repo_name}

# Execute with retry logic
data = execute_graphql_query(query, variables, token)
if data is None:
bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}')
return {path: None for path in file_paths}
data = execute_graphql_query(query, variables, token)
if data is None:
bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}')
for path in batch_paths:
results[path] = None
continue

if 'errors' in data:
bt.logging.warning(f'GraphQL errors fetching files: {data["errors"]}')
if 'errors' in data:
bt.logging.warning(f'GraphQL errors fetching files: {data["errors"]}')

repo_data = data.get('data', {}).get('repository', {})
results = {}
repo_data = data.get('data', {}).get('repository', {})

for i, path in enumerate(file_paths):
file_data = repo_data.get(f'file{i}')
for i, path in enumerate(batch_paths):
file_data = repo_data.get(f'file{i}')

if file_data is None:
results[path] = None
elif file_data.get('isBinary'):
results[path] = None
elif file_data.get('byteSize', 0) > MAX_FILE_SIZE_BYTES:
results[path] = None
else:
results[path] = file_data.get('text')
if file_data is None:
results[path] = None
elif file_data.get('isBinary'):
results[path] = None
elif file_data.get('byteSize', 0) > MAX_FILE_SIZE_BYTES:
results[path] = None
else:
results[path] = file_data.get('text')

return results

Expand All @@ -1058,52 +1087,31 @@ class FileContentPair:
new_content: Optional[str] # None for deleted files


def fetch_file_contents_with_base(
def _fetch_file_contents_with_base_batch(
repo_owner: str,
repo_name: str,
base_sha: str,
head_sha: str,
file_changes: List['FileChangeType'],
token: str,
) -> Dict[str, FileContentPair]:
"""
Fetch both base and head (old and new) versions of files in a single GraphQL request.

Args:
repo_owner: Repository owner
repo_name: Repository name
base_sha: The base branch SHA (before PR changes)
head_sha: The head/merge commit SHA (after PR changes)
file_changes: List of FileChange objects (needed for status and previous_filename)
token: GitHub PAT for authentication
"""Fetch base and head file contents for a single batch of file changes.

Returns:
Dict mapping file paths to FileContentPair (old_content, new_content)
- For new files: old_content is None
- For deleted files: new_content is None
- For renamed files: old_content fetched from previous_filename
Internal helper called by fetch_file_contents_with_base for each batch.
"""
if not file_changes:
return {}

# Build GraphQL query with both base and head versions
file_fields = []
for i, fc in enumerate(file_changes):
# Determine the path to fetch for base version
# For renames, use previous_filename; otherwise use current filename
base_path = fc.previous_filename if fc.previous_filename else fc.filename
head_path = fc.filename

# Only fetch base version if file wasn't newly added
if fc.status != 'added':
base_expr = f'{base_sha}:{base_path}'
base_expr = _escape_graphql_expression(f'{base_sha}:{base_path}')
file_fields.append(
f'base{i}: object(expression: "{base_expr}") {{ ... on Blob {{ text byteSize isBinary }} }}'
)

# Only fetch head version if file wasn't deleted
if fc.status != 'removed':
head_expr = f'{head_sha}:{head_path}'
head_expr = _escape_graphql_expression(f'{head_sha}:{head_path}')
file_fields.append(
f'head{i}: object(expression: "{head_expr}") {{ ... on Blob {{ text byteSize isBinary }} }}'
)
Expand All @@ -1121,7 +1129,6 @@ def fetch_file_contents_with_base(

variables = {'owner': repo_owner, 'name': repo_name}

# Execute with retry logic
data = execute_graphql_query(query, variables, token)
if data is None:
bt.logging.warning(f'Failed to fetch file contents for {repo_owner}/{repo_name}')
Expand All @@ -1137,13 +1144,11 @@ def fetch_file_contents_with_base(
old_content = None
new_content = None

# Extract base (old) content if applicable
if fc.status != 'added':
base_data = repo_data.get(f'base{i}')
if base_data and not base_data.get('isBinary') and base_data.get('byteSize', 0) <= MAX_FILE_SIZE_BYTES:
old_content = base_data.get('text')

# Extract head (new) content if applicable
if fc.status != 'removed':
head_data = repo_data.get(f'head{i}')
if head_data and not head_data.get('isBinary') and head_data.get('byteSize', 0) <= MAX_FILE_SIZE_BYTES:
Expand All @@ -1152,3 +1157,47 @@ def fetch_file_contents_with_base(
results[fc.filename] = FileContentPair(old_content=old_content, new_content=new_content)

return results


def fetch_file_contents_with_base(
repo_owner: str,
repo_name: str,
base_sha: str,
head_sha: str,
file_changes: List['FileChangeType'],
token: str,
) -> Dict[str, FileContentPair]:
"""
Fetch both base and head (old and new) versions of files via batched GraphQL requests.

Large PRs are split into batches to avoid exceeding GitHub's GraphQL query
complexity limits. File paths are escaped to prevent query syntax errors
from special characters.

Args:
repo_owner: Repository owner
repo_name: Repository name
base_sha: The base branch SHA (before PR changes)
head_sha: The head/merge commit SHA (after PR changes)
file_changes: List of FileChange objects (needed for status and previous_filename)
token: GitHub PAT for authentication

Returns:
Dict mapping file paths to FileContentPair (old_content, new_content)
- For new files: old_content is None
- For deleted files: new_content is None
- For renamed files: old_content fetched from previous_filename
"""
if not file_changes:
return {}

results: Dict[str, FileContentPair] = {}

for batch_start in range(0, len(file_changes), _MAX_FILES_PER_GRAPHQL_BATCH):
batch = file_changes[batch_start : batch_start + _MAX_FILES_PER_GRAPHQL_BATCH]
batch_results = _fetch_file_contents_with_base_batch(
repo_owner, repo_name, base_sha, head_sha, batch, token
)
results.update(batch_results)

return results
2 changes: 1 addition & 1 deletion gittensor/validator/evaluation/reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ async def get_rewards(
# Adjust scores for duplicate accounts
detect_and_penalize_miners_sharing_github(miner_evaluations)

# Finalize scores: apply unique contribution multiplier, credibility, sum totals, deduct collateral
# Finalize scores: apply pioneer dividends, credibility, sum totals, deduct collateral
finalize_miner_scores(miner_evaluations)

# Allocate emissions by tier: replace total_score with tier-weighted allocations
Expand Down
Loading