This guide shows how to customize PR-Sentinel's spam detection behavior.
The spam score threshold determines when a PR is automatically moderated. Default is 70.
# In .env file
SPAM_SCORE_THRESHOLD=80 # More lenient (fewer false positives)
# or
SPAM_SCORE_THRESHOLD=60 # More strict (catches more spam)# config.py
SPAM_SCORE_THRESHOLD = 80Edit config.py to adjust how much each factor contributes to the spam score:
WEIGHTS = {
"trivial_readme": 30, # Trivial README edits (0-30 points)
"minimal_changes": 25, # Minimal code changes (0-25 points)
"generic_ai_text": 35, # AI-generated text (0-35 points)
"suspicious_patterns": 10 # Suspicious patterns (0-10 points)
}More sensitive to AI text:
WEIGHTS = {
"trivial_readme": 20,
"minimal_changes": 20,
"generic_ai_text": 50, # Increased
"suspicious_patterns": 10
}Focus on code quality:
WEIGHTS = {
"trivial_readme": 40, # Increased
"minimal_changes": 40, # Increased
"generic_ai_text": 15, # Decreased
"suspicious_patterns": 5
}Edit config.py to add patterns you've observed in spam PRs:
AI_TEXT_INDICATORS = [
# Existing patterns
"as an ai",
"i don't have personal",
"it's worth noting",
# Add your custom patterns
"revolutionize",
"cutting-edge solution",
"synergy",
"paradigm shift",
"at the end of the day",
"move the needle"
]SUSPICIOUS_PATTERNS = [
# Existing patterns
"typo fix",
"minor fix",
# Add your custom patterns
"first pr",
"beginner pr",
"testing",
"test pr"
]Edit github_client.py to customize the auto-comment:
def _generate_spam_comment(self, spam_score: float, reasons: List[str]) -> str:
"""Generate a spam detection comment."""
comment = f"🤖 **Your Repository's Custom Message**\n\n"
comment += f"This PR has been flagged by our automated system.\n\n"
comment += f"**Score:** {spam_score:.1f}/100\n\n"
# Add your custom instructions
comment += f"**What to do next:**\n"
comment += f"1. Review our contribution guidelines\n"
comment += f"2. Make meaningful changes\n"
comment += f"3. Resubmit your PR\n\n"
comment += f"**Detection reasons:**\n"
for reason in reasons:
comment += f"- {reason}\n"
return commentEdit spam_detector.py:
def analyze_pr(self, pr_data: Dict) -> Tuple[float, Dict]:
"""Analyze a PR and return spam score and details."""
score = 0.0
details = {
# ... existing fields ...
"custom_check": False # Add your field
}
# ... existing checks ...
# Add your custom check
custom_score, custom_details = self._check_custom_rule(pr_data)
if custom_score > 0:
score += custom_score
details["custom_check"] = True
details["reasons"].extend(custom_details)
return score, details
def _check_custom_rule(self, pr_data: Dict) -> Tuple[float, List[str]]:
"""Your custom detection logic."""
reasons = []
# Example: Flag PRs from new accounts
# (You'd need to add account_age to pr_data first)
if pr_data.get("account_age_days", 999) < 7:
reasons.append("PR from account less than 7 days old")
return 20.0, reasons
return 0.0, reasons# config.py
TRUSTED_USERS = [
"dependabot[bot]",
"renovate[bot]",
"github-actions[bot]",
"your-trusted-contributor"
]
# spam_detector.py
def analyze_pr(self, pr_data: Dict) -> Tuple[float, Dict]:
"""Analyze a PR and return spam score and details."""
# Skip analysis for trusted users
if pr_data.get("user", "") in TRUSTED_USERS:
return 0.0, {"reasons": ["Trusted user"]}
# Continue with normal analysis...from datetime import datetime
def _check_suspicious_timing(self, pr_data: Dict) -> Tuple[float, List[str]]:
"""Flag PRs created during suspicious hours."""
reasons = []
created_at = datetime.fromisoformat(pr_data.get("created_at", ""))
hour = created_at.hour
# Flag PRs created between 2 AM and 5 AM UTC (common bot hours)
if 2 <= hour <= 5:
reasons.append("PR created during unusual hours (2-5 AM UTC)")
return 15.0, reasons
return 0.0, reasons# config.py
REPO_SPECIFIC_RULES = {
"owner/repo1": {
"spam_threshold": 80, # More lenient
"auto_close": False # Only comment, don't close
},
"owner/repo2": {
"spam_threshold": 50, # More strict
"auto_close": True
}
}
# main.py - modify webhook handler
repo_full_name = repository.get("full_name", "")
repo_rules = REPO_SPECIFIC_RULES.get(repo_full_name, {})
threshold = repo_rules.get("spam_threshold", SPAM_SCORE_THRESHOLD)
if spam_score >= threshold:
# Apply repo-specific rules
...# .env
PR_TRACKING_FILE=/path/to/custom/tracking.jsonCreate a new file database_storage.py:
from typing import Dict, List, Optional
import psycopg2
import json
class DatabaseStorage:
def __init__(self, connection_string: str):
self.conn = psycopg2.connect(connection_string)
self._create_tables()
def _create_tables(self):
"""Create PR tracking table."""
with self.conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS tracked_prs (
id SERIAL PRIMARY KEY,
repo VARCHAR(255),
pr_number INTEGER,
user_login VARCHAR(255),
spam_score FLOAT,
is_spam BOOLEAN,
details JSONB,
tracked_at TIMESTAMP DEFAULT NOW()
)
""")
self.conn.commit()
def add_pr(self, pr_data: Dict):
"""Add a PR to tracking."""
with self.conn.cursor() as cur:
cur.execute("""
INSERT INTO tracked_prs
(repo, pr_number, user_login, spam_score, is_spam, details)
VALUES (%s, %s, %s, %s, %s, %s)
""", (
pr_data["repo"],
pr_data["pr_number"],
pr_data["user"],
pr_data["spam_score"],
pr_data["is_spam"],
json.dumps(pr_data["details"])
))
self.conn.commit()
# Implement other methods...Then modify main.py:
# main.py
from database_storage import DatabaseStorage
# Initialize with database instead
pr_storage = DatabaseStorage(os.getenv("DATABASE_URL"))Create test cases for your custom rules:
# test_custom_rules.py
from spam_detector import SpamDetector
def test_custom_rule():
detector = SpamDetector()
pr_data = {
# Your test data
}
score, details = detector.analyze_pr(pr_data)
assert score > 0, "Should detect custom pattern"
print(f"✓ Custom rule test passed (score: {score:.1f})")
if __name__ == "__main__":
test_custom_rule()Create different configuration profiles:
# config_profiles.py
PROFILES = {
"lenient": {
"threshold": 85,
"weights": {
"trivial_readme": 20,
"minimal_changes": 20,
"generic_ai_text": 30,
"suspicious_patterns": 5
}
},
"balanced": {
"threshold": 70,
"weights": {
"trivial_readme": 30,
"minimal_changes": 25,
"generic_ai_text": 35,
"suspicious_patterns": 10
}
},
"strict": {
"threshold": 55,
"weights": {
"trivial_readme": 35,
"minimal_changes": 30,
"generic_ai_text": 40,
"suspicious_patterns": 15
}
}
}
# Load profile
ACTIVE_PROFILE = os.getenv("DETECTION_PROFILE", "balanced")
SPAM_SCORE_THRESHOLD = PROFILES[ACTIVE_PROFILE]["threshold"]
WEIGHTS = PROFILES[ACTIVE_PROFILE]["weights"]Test without actually closing PRs:
# config.py
DRY_RUN = os.getenv("DRY_RUN", "false").lower() == "true"
# github_client.py
def close_pr(self, repo_full_name: str, pr_number: int) -> bool:
if DRY_RUN:
logger.info(f"[DRY RUN] Would close PR #{pr_number}")
return True
# Actually close PR
...Use it:
# .env
DRY_RUN=trueKey files to modify for customization:
config.py- Thresholds, weights, patternsspam_detector.py- Detection logicgithub_client.py- Comment messages, actionsstorage.py- Storage implementation
Always test changes thoroughly before deploying to production!