-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
247 lines (199 loc) · 6.84 KB
/
main.py
File metadata and controls
247 lines (199 loc) · 6.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
"""FastAPI webhook listener for GitHub pull request events."""
import hmac
import hashlib
import logging
from typing import Dict, Optional
from fastapi import FastAPI, Request, Response, HTTPException, status
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from config import (
GITHUB_WEBHOOK_SECRET,
SPAM_SCORE_THRESHOLD,
HOST,
PORT
)
from spam_detector import SpamDetector
from github_client import GitHubClient
from storage import PRStorage
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Initialize FastAPI app
app = FastAPI(
title="PR-Sentinel",
description="Automated spam detection for GitHub pull requests",
version="1.0.0"
)
# Initialize components
spam_detector = SpamDetector()
github_client = GitHubClient()
pr_storage = PRStorage()
class WebhookResponse(BaseModel):
"""Response model for webhook."""
status: str
message: str
spam_score: Optional[float] = None
action_taken: Optional[str] = None
def verify_signature(payload_body: bytes, signature_header: str) -> bool:
"""
Verify GitHub webhook signature.
Args:
payload_body: Raw request body
signature_header: X-Hub-Signature-256 header value
Returns:
True if signature is valid, False otherwise
"""
if not GITHUB_WEBHOOK_SECRET:
logger.warning("Webhook secret not configured, skipping signature verification")
return True
if not signature_header:
return False
hash_algorithm, github_signature = signature_header.split('=')
algorithm = hashlib.sha256
encoded_key = bytes(GITHUB_WEBHOOK_SECRET, 'latin-1')
mac = hmac.new(encoded_key, msg=payload_body, digestmod=algorithm)
return hmac.compare_digest(mac.hexdigest(), github_signature)
@app.get("/")
async def root():
"""Root endpoint."""
return {
"service": "PR-Sentinel",
"status": "running",
"description": "Automated spam detection for GitHub pull requests"
}
@app.get("/health")
async def health():
"""Health check endpoint."""
return {"status": "healthy"}
@app.get("/stats")
async def stats():
"""Get statistics about tracked PRs."""
all_prs = pr_storage.get_all_prs()
spam_prs = [pr for pr in all_prs if pr.get("is_spam", False)]
return {
"total_tracked": len(all_prs),
"spam_detected": len(spam_prs),
"recent_prs": all_prs[:10]
}
@app.post("/webhook")
async def webhook(request: Request):
"""
Handle GitHub webhook events.
Processes pull_request events and analyzes them for spam.
"""
# Get request body and headers
payload_body = await request.body()
signature = request.headers.get("X-Hub-Signature-256", "")
event_type = request.headers.get("X-GitHub-Event", "")
# Verify signature
if not verify_signature(payload_body, signature):
logger.warning("Invalid webhook signature")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid signature"
)
# Parse payload
try:
payload = await request.json()
except Exception as e:
logger.error(f"Error parsing payload: {e}")
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Invalid JSON payload"
)
# Only process pull_request events
if event_type != "pull_request":
return JSONResponse(
content={"status": "ignored", "message": f"Event type '{event_type}' not processed"},
status_code=200
)
# Get action and PR data
action = payload.get("action", "")
pr = payload.get("pull_request", {})
repository = payload.get("repository", {})
# Only process opened and reopened PRs
if action not in ["opened", "reopened"]:
return JSONResponse(
content={"status": "ignored", "message": f"Action '{action}' not processed"},
status_code=200
)
# Extract PR information
repo_full_name = repository.get("full_name", "")
pr_number = pr.get("number", 0)
pr_user = pr.get("user", {}).get("login", "")
logger.info(f"Processing PR #{pr_number} from {pr_user} in {repo_full_name}")
# Fetch detailed PR data
pr_data = github_client.get_pr_data(repo_full_name, pr_number)
if not pr_data:
# Fall back to webhook data if API fails
pr_data = extract_pr_data_from_webhook(payload)
# Analyze PR for spam
spam_score, details = spam_detector.analyze_pr(pr_data)
logger.info(f"PR #{pr_number} spam score: {spam_score:.1f}")
# Store PR data
storage_data = {
"repo": repo_full_name,
"pr_number": pr_number,
"user": pr_user,
"spam_score": spam_score,
"is_spam": spam_score >= SPAM_SCORE_THRESHOLD,
"details": details
}
pr_storage.add_pr(storage_data)
# Take action if spam score exceeds threshold
action_taken = None
if spam_score >= SPAM_SCORE_THRESHOLD:
logger.warning(f"PR #{pr_number} flagged as spam (score: {spam_score:.1f})")
# Auto-moderate the PR
success = github_client.auto_moderate_pr(
repo_full_name,
pr_number,
spam_score,
details["reasons"]
)
if success:
action_taken = "commented_and_closed"
logger.info(f"PR #{pr_number} automatically closed")
else:
action_taken = "moderation_failed"
logger.error(f"Failed to moderate PR #{pr_number}")
return JSONResponse(
content={
"status": "processed",
"message": "PR analyzed successfully",
"spam_score": spam_score,
"action_taken": action_taken
},
status_code=200
)
def extract_pr_data_from_webhook(payload: Dict) -> Dict:
"""
Extract PR data from webhook payload as fallback.
Args:
payload: Webhook payload
Returns:
Dictionary with PR data
"""
pr = payload.get("pull_request", {})
# Get files from payload (limited information)
files = []
# Get basic PR information
return {
"title": pr.get("title", ""),
"body": pr.get("body", ""),
"user": pr.get("user", {}).get("login", ""),
"state": pr.get("state", ""),
"created_at": pr.get("created_at", ""),
"files": files,
"commit_messages": [],
"additions": pr.get("additions", 0),
"deletions": pr.get("deletions", 0),
"changed_files": pr.get("changed_files", 0)
}
if __name__ == "__main__":
import uvicorn
logger.info(f"Starting PR-Sentinel on {HOST}:{PORT}")
uvicorn.run(app, host=HOST, port=PORT)