Re: [Coconut-support] Registration problems #193
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This workflow automatically detects and handles spam issues submitted to the repository. | |
| # It analyzes new issues for spam patterns and characteristics, then closes and locks spam issues | |
| # while notifying the submitter, helping maintain repository quality. | |
| # | |
| # Maintainers: | |
| # - name: Nisha Sharma | |
| # - email: nisha.sharma@uni-jena.de | |
| name: Enhanced Spam Issue Detection | |
| on: | |
| issues: | |
| types: [opened] | |
| jobs: | |
| spam-detection: | |
| runs-on: ubuntu-latest | |
| permissions: | |
| issues: write | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v3 | |
| - name: Setup Node.js | |
| uses: actions/setup-node@v3 | |
| with: | |
| node-version: '18' | |
| - name: Install dependencies | |
| run: npm install @octokit/rest natural | |
| - name: Check if issue is spam | |
| id: spam-check | |
| uses: actions/github-script@v6 | |
| with: | |
| github-token: ${{ secrets.GITHUB_TOKEN }} | |
| script: | | |
| const { Octokit } = require('@octokit/rest'); | |
| const natural = require('natural'); | |
| // Initialize Octokit client | |
| const octokit = new Octokit({ auth: process.env.GITHUB_TOKEN }); | |
| // Constants and configuration - moving these out for easier updates | |
| const CONFIG = { | |
| // Minimum confidence score to mark as spam (0-1) | |
| spamThreshold: 0.65, | |
| // Minimum confidence score to mark as phishing (0-1) | |
| phishingThreshold: 0.70, | |
| // Maximum URLs allowed before considering suspicious | |
| maxUrls: 4, | |
| // Labels to apply | |
| labels: { | |
| spam: 'spam', | |
| phishing: 'phishing', | |
| dataSpam: 'data-selling', | |
| businessScam: 'business-scam', | |
| marketingSpam: 'marketing-spam' | |
| } | |
| }; | |
| /** | |
| * Main function to analyze and process an issue | |
| */ | |
| async function processIssue(context, octokit) { | |
| try { | |
| // Get issue data | |
| const issue = context.payload.issue; | |
| const issueNumber = issue.number; | |
| const issueTitle = issue.title; | |
| const issueBody = issue.body || ''; | |
| const issueAuthor = issue.user.login; | |
| // Log issue information for debugging | |
| console.log(`Analyzing issue #${issueNumber} from ${issueAuthor}`); | |
| console.log(`Title: ${issueTitle}`); | |
| // Analyze the issue for spam | |
| const spamAnalysis = analyzeContent(issueTitle, issueBody); | |
| console.log(`Analysis results: ${JSON.stringify(spamAnalysis)}`); | |
| // If spam is detected, handle it | |
| if (spamAnalysis.isSpam) { | |
| await handleSpamIssue(octokit, context, issueNumber, spamAnalysis); | |
| return true; | |
| } | |
| return false; | |
| } catch (error) { | |
| console.error('Error processing issue:', error); | |
| // Don't close the issue if there's an error in our detection | |
| return false; | |
| } | |
| } | |
| /** | |
| * Checks if a user should be exempt from spam filtering | |
| */ | |
| async function checkExemptUser(octokit, owner, repo, username) { | |
| try { | |
| // Check if user is a collaborator | |
| try { | |
| const { data: isCollaborator } = await octokit.repos.checkCollaborator({ | |
| owner, | |
| repo, | |
| username | |
| }); | |
| if (isCollaborator) return true; | |
| } catch (e) { | |
| // Not a collaborator, continue with other checks | |
| } | |
| // Check if user has any merged PRs | |
| const { data: prs } = await octokit.search.issuesAndPullRequests({ | |
| q: `repo:${owner}/${repo} author:${username} is:pr is:merged` | |
| }); | |
| if (prs.total_count > 0) return true; | |
| return false; | |
| } catch (error) { | |
| console.error('Error checking user exemption status:', error); | |
| // If we can't determine, don't exempt | |
| return false; | |
| } | |
| } | |
| /** | |
| * Handle closing an issue identified as spam | |
| */ | |
| async function handleSpamIssue(octokit, context, issueNumber, spamAnalysis) { | |
| console.log(`Closing issue #${issueNumber} as ${spamAnalysis.spamType}`); | |
| // Record metrics about the spam detection | |
| try { | |
| // We could store metrics in a separate file or database | |
| // This is a placeholder for implementation | |
| console.log(`Metrics: Spam type=${spamAnalysis.spamType}, confidence=${spamAnalysis.confidence}`); | |
| } catch (e) { | |
| console.error('Error recording metrics:', e); | |
| } | |
| try { | |
| // Add comment explaining why the issue was closed | |
| await octokit.issues.createComment({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: issueNumber, | |
| body: `This issue has been automatically closed because it was detected as ${spamAnalysis.closeReason} with ${Math.round(spamAnalysis.confidence * 100)}% confidence. If this is a mistake, please contact the repository maintainers.` | |
| }); | |
| // Close the issue | |
| await octokit.issues.update({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: issueNumber, | |
| state: 'closed', | |
| state_reason: 'not_planned' | |
| }); | |
| // Add appropriate label | |
| await octokit.issues.addLabels({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: issueNumber, | |
| labels: [spamAnalysis.spamType] | |
| }); | |
| } catch (error) { | |
| console.error('Error handling spam issue:', error); | |
| } | |
| } | |
| /** | |
| * Detects URLs in text and performs analysis on them | |
| */ | |
| function analyzeUrls(text) { | |
| // Extract all URLs | |
| const urlRegex = /(https?:\/\/[^\s]+)/g; | |
| const urlMatches = text.match(urlRegex) || []; | |
| // Check for excessive URLs | |
| const hasExcessiveUrls = urlMatches.length > CONFIG.maxUrls; | |
| // Check for suspicious URL patterns (common in phishing) | |
| const suspiciousUrlPatterns = [ | |
| // Lookalike domains with typos | |
| /paypa[l1]/i, /amaz[o0]n/i, /g[o0]{2}gle/i, /faceb[o0]{2}k/i, /[l1]inked[i1]n/i, | |
| // URLs with unusual TLDs for business sites | |
| /\.(xyz|top|club|online|site|fun|space|icu)\//i, | |
| // IP address URLs | |
| /https?:\/\/\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/, | |
| // URLs with encoded characters to hide destination | |
| /%[0-9A-F]{2}/i | |
| ]; | |
| const suspiciousUrls = urlMatches.filter(url => { | |
| return suspiciousUrlPatterns.some(pattern => pattern.test(url)); | |
| }); | |
| // Check for domain mimicry | |
| const knownDomains = ['paypal', 'amazon', 'apple', 'microsoft', 'google', 'facebook', | |
| 'instagram', 'twitter', 'linkedin', 'github', 'dropbox', 'chase', | |
| 'wellsfargo', 'bankofamerica', 'capitalone', 'amex', 'gmail']; | |
| const mimicryDomains = urlMatches.filter(url => { | |
| const domain = url.toLowerCase(); | |
| return knownDomains.some(known => { | |
| return domain.includes(known) && | |
| !domain.includes(`${known}.com`) && | |
| !domain.includes(`www.${known}.com`); | |
| }); | |
| }); | |
| return { | |
| urlCount: urlMatches.length, | |
| hasExcessiveUrls, | |
| suspiciousUrls, | |
| hasSuspiciousUrls: suspiciousUrls.length > 0, | |
| mimicryDomains, | |
| hasMimicryDomains: mimicryDomains.length > 0 | |
| }; | |
| } | |
| /** | |
| * Analyzes text for various spam indicators | |
| */ | |
| function analyzeContent(title, body) { | |
| // Convert to lowercase for consistent matching | |
| const lowerTitle = title.toLowerCase(); | |
| const lowerBody = body.toLowerCase(); | |
| const fullContent = `${lowerTitle} ${lowerBody}`; | |
| // Initialize natural language classifier | |
| const classifier = new natural.BayesClassifier(); | |
| // Create a spam indicators object to track matches | |
| const indicators = { | |
| spam: [], | |
| phishing: [], | |
| legitimate: [] | |
| }; | |
| // --- KEYWORD DETECTION --- | |
| // Common spam indicators - general terms found across many spam types | |
| const spamKeywords = [ | |
| // Medical/pharmaceutical spam | |
| 'viagra', 'cialis', 'pharmacy', 'prescription', 'medication', | |
| // Gambling/lottery | |
| 'casino', 'lottery', 'jackpot', 'betting', 'gambling', 'winners', | |
| // Financial scams | |
| 'bitcoin', 'crypto', 'investment opportunity', 'earn money', 'make money', | |
| 'forex', 'trading', 'passive income', 'quick cash', 'financial freedom', | |
| // Work opportunities | |
| 'work from home', 'remote job', 'side hustle', 'residual income', | |
| // Loans/finance | |
| 'loan offer', 'quick loan', 'debt relief', 'credit score', 'refinance', | |
| // Dating/adult | |
| 'dating site', 'hot singles', 'meet singles', 'adult content', | |
| // Health products | |
| 'weight loss', 'diet', 'fat burn', 'miracle cure', 'natural remedy', | |
| // Marketing/SEO | |
| 'seo services', 'boost ranking', 'website traffic', 'backlinks', | |
| // Domain sales | |
| 'domain for sale', 'premium domain', 'web address', | |
| // Email list/contact sales (general patterns) | |
| 'email list', 'contact database', 'leads', 'mailing list', | |
| // Marketing language | |
| 'limited time', 'exclusive offer', 'act now', 'don\'t miss', | |
| 'unsubscribe', 'one-time offer', 'best price', 'discount' | |
| ]; | |
| // Phishing-specific keywords and phrases | |
| const phishingKeywords = [ | |
| 'verify your account', 'account verification', 'update your information', | |
| 'confirm your details', 'unusual activity', 'suspicious activity', | |
| 'security alert', 'password expired', 'account suspended', 'account on hold', | |
| 'payment failed', 'billing problem', 'invoice attached', 'document shared', | |
| 'dropbox link', 'google doc', 'login attempt', 'please login', | |
| 'confirm identity', 'reset password', 'unusual login', 'access limited' | |
| ]; | |
| // Check for spam keywords | |
| const matchedSpamKeywords = spamKeywords.filter(keyword => fullContent.includes(keyword)); | |
| if (matchedSpamKeywords.length > 0) { | |
| indicators.spam.push(`Matched spam keywords: ${matchedSpamKeywords.join(', ')}`); | |
| } | |
| // Check for phishing keywords | |
| const matchedPhishingKeywords = phishingKeywords.filter(keyword => fullContent.includes(keyword)); | |
| if (matchedPhishingKeywords.length > 0) { | |
| indicators.phishing.push(`Matched phishing keywords: ${matchedPhishingKeywords.join(', ')}`); | |
| } | |
| // --- URL ANALYSIS --- | |
| const urlAnalysis = analyzeUrls(fullContent); | |
| if (urlAnalysis.hasExcessiveUrls) { | |
| indicators.spam.push(`Excessive URLs: ${urlAnalysis.urlCount}`); | |
| } | |
| if (urlAnalysis.hasSuspiciousUrls) { | |
| indicators.phishing.push(`Suspicious URLs: ${urlAnalysis.suspiciousUrls.join(', ')}`); | |
| } | |
| if (urlAnalysis.hasMimicryDomains) { | |
| indicators.phishing.push(`Domain mimicry: ${urlAnalysis.mimicryDomains.join(', ')}`); | |
| } | |
| // --- PATTERN MATCHING --- | |
| // Check for common email marketing patterns | |
| const hasMarketingPatterns = | |
| fullContent.includes('unsubscribe') || | |
| fullContent.includes('view in browser') || | |
| fullContent.includes('view as webpage') || | |
| fullContent.includes('click here to unsubscribe'); | |
| if (hasMarketingPatterns) { | |
| indicators.spam.push('Email marketing patterns detected'); | |
| } | |
| // Data selling spam detection | |
| const dataSellingPatterns = [ | |
| // Pattern 1: Mentions of contact lists with availability/offering language | |
| /(email|contact|attendee|lead|prospect|customer|client)\s*(list|database|data|information)/i.test(fullContent) && | |
| /(available|acquire|purchase|buy|sell|offer|get|access|download)/i.test(fullContent), | |
| // Pattern 2: Mentions of contact numbers with contact-related term | |
| /\d{4,}\s*(contact|email|lead|record|profile|attendee|prospect)/i.test(fullContent), | |
| // Pattern 3: Industry or event-related data selling | |
| /(conference|expo|exhibition|event|industry|trade show|fair)/i.test(fullContent) && | |
| /(list|database|contact|attendee|participant|visitor)/i.test(fullContent) && | |
| /(available|interested|pricing|information|detail)/i.test(fullContent), | |
| // Pattern 4: Direct marketing of contact data | |
| /(marketing|business|contact|email)\s*(list|database|directory)/i.test(fullContent), | |
| // Pattern 5: Data selling with engagement request | |
| /(contact|email|data|list)/i.test(fullContent) && | |
| /(interested|let me know|get back|reply|respond)/i.test(fullContent) && | |
| /(price|cost|detail|information|more)/i.test(fullContent) | |
| ]; | |
| const isDataSellingSpam = dataSellingPatterns.some(pattern => pattern === true); | |
| if (isDataSellingSpam) { | |
| indicators.spam.push('Data selling patterns detected'); | |
| } | |
| // Business scam detection | |
| const businessScamPatterns = [ | |
| // General business outreach with vague titles | |
| /director|manager|CEO|head of|officer|specialist/i.test(fullContent) && | |
| !/(specific|particular|regarding your|about your|existing)/i.test(fullContent), | |
| // Vague partnership/supplier requests | |
| /(looking|searching|seeking)\s*(for|to find)\s*(partner|supplier|vendor|distributor)/i.test(fullContent) && | |
| !/(specific product|specific service|specific project)/i.test(fullContent), | |
| // Vague interest in products without specifics | |
| /(interest|interested in)\s*(your|in your)\s*(product|service|business|company)/i.test(fullContent) && | |
| !/(specific|particular|model|item)/i.test(fullContent), | |
| // Mentions of payment terms or bank transfers in initial outreach | |
| /(payment term|bank transfer|wire transfer|advance payment)/i.test(fullContent) && | |
| /(day|week|month|percent|%)/i.test(fullContent), | |
| // Requesting catalog without specific interest | |
| /(catalog|catalogue|price list|quotation|quote)/i.test(fullContent) && | |
| !/(specific|particular|item|model)/i.test(fullContent), | |
| // Generic distribution network claims | |
| /(distribution|market|customer|client)\s*(network|base|reach|access)/i.test(fullContent), | |
| // Formulaic introduction with generic company reference | |
| /I am \w+\s+\w+\s+(from|of|at)\s+[A-Z]/i.test(fullContent) && | |
| /(company|corporation|enterprise|business|firm)/i.test(fullContent) && | |
| !/(about your|regarding your|your recent|your product)/i.test(fullContent) | |
| ]; | |
| const businessScamCount = businessScamPatterns.filter(Boolean).length; | |
| if (businessScamCount >= 2) { | |
| indicators.spam.push(`Business scam patterns detected (${businessScamCount} indicators)`); | |
| } | |
| // Marketing follow-up spam detection | |
| const isMarketingFollowUp = ( | |
| // Follow-up language | |
| /(follow|following)\s*(up|with you)/i.test(fullContent) && | |
| // Generic marketing engagement patterns | |
| ( | |
| // Fake previous contact | |
| /(haven't heard|no response|not heard back|didn't receive|since our last|since my last)/i.test(fullContent) || | |
| // Pushing for response | |
| /(checking in|touching base|reaching out|wanted to see)/i.test(fullContent) | |
| ) && | |
| // Marketing offering indicators | |
| ( | |
| /(quote|proposal|offer|service|package|solution|deal|discount|promotion)/i.test(fullContent) || | |
| /(SEO|marketing|design|development|optimization|analysis|consultation|strategy)/i.test(fullContent) || | |
| /^(Hello|Hi|Greetings|Good day|Dear)/i.test(fullContent) | |
| ) | |
| ); | |
| if (isMarketingFollowUp) { | |
| indicators.spam.push('Marketing follow-up patterns detected'); | |
| } | |
| // Check for requests for sensitive information (common in phishing) | |
| const sensitiveInfoRequests = [ | |
| 'credit card', 'social security', 'ssn', 'password', | |
| 'login credentials', 'bank details', 'personal information', | |
| 'verify your identity', 'login to view' | |
| ]; | |
| const matchedSensitiveRequests = sensitiveInfoRequests.filter(term => fullContent.includes(term)); | |
| if (matchedSensitiveRequests.length > 0 || /please\s+(?:enter|provide|confirm|update|verify)\s+your/i.test(fullContent)) { | |
| indicators.phishing.push('Requests for sensitive information detected'); | |
| } | |
| // Check for urgent language (common in phishing) | |
| const urgentLanguageTerms = [ | |
| 'urgent', 'immediate action', 'immediate attention', 'act now', | |
| 'expires soon', 'within 24 hours', 'account will be locked', 'security breach' | |
| ]; | |
| const matchedUrgentTerms = urgentLanguageTerms.filter(term => fullContent.includes(term)); | |
| const hasUrgentTimePattern = /within\s+\d+\s+(?:hour|day|minute)/i.test(fullContent); | |
| if (matchedUrgentTerms.length > 0 || hasUrgentTimePattern) { | |
| const urgentScore = (urlAnalysis.urlCount > 0 || matchedSensitiveRequests.length > 0) ? 2 : 1; | |
| if (urgentScore > 1) { | |
| indicators.phishing.push('Urgent language with links/sensitive requests'); | |
| } else { | |
| indicators.spam.push('Urgent language detected'); | |
| } | |
| } | |
| // --- LEGITIMATE CONTENT INDICATORS --- | |
| // Check for patterns that suggest legitimate issues | |
| if ((lowerBody.includes('support') || lowerBody.includes('help') || lowerBody.includes('issue')) && | |
| (lowerBody.includes('error') || lowerBody.includes('problem') || lowerBody.includes('question') || lowerBody.includes('how to'))) { | |
| indicators.legitimate.push('Contains support-related terminology'); | |
| } | |
| // Check if the structure looks like a legitimate support request | |
| if (lowerBody.includes('?') || | |
| lowerBody.includes('please') || | |
| lowerBody.includes('thank you') || | |
| lowerBody.includes('help')) { | |
| indicators.legitimate.push('Contains question or polite request format'); | |
| } | |
| // Code-related content is likely legitimate | |
| if (lowerBody.includes('```') || lowerBody.includes('code') || /\b(function|class|var|const|let)\b/.test(lowerBody)) { | |
| indicators.legitimate.push('Contains code elements'); | |
| } | |
| // Technical terms suggest legitimate technical issues | |
| const technicalTerms = ['error', 'exception', 'traceback', 'log', 'debug', 'runtime', 'compile', 'crash']; | |
| const matchedTechTerms = technicalTerms.filter(term => fullContent.includes(term)); | |
| if (matchedTechTerms.length > 0) { | |
| indicators.legitimate.push('Contains technical terminology'); | |
| } | |
| // --- CALCULATE CONFIDENCE SCORES --- | |
| // Calculate weights for different indicators | |
| const spamWeight = indicators.spam.length * 0.2; | |
| const phishingWeight = indicators.phishing.length * 0.3; | |
| const legitimateWeight = indicators.legitimate.length * 0.4; | |
| // Calculate overall confidence scores | |
| let spamConfidence = Math.min(0.95, (spamWeight / (spamWeight + legitimateWeight + 0.1))); | |
| let phishingConfidence = Math.min(0.95, (phishingWeight / (phishingWeight + legitimateWeight + 0.1))); | |
| // Strong legitimate indicators should reduce confidence more significantly | |
| if (indicators.legitimate.length >= 2) { | |
| spamConfidence *= 0.6; | |
| phishingConfidence *= 0.5; | |
| } | |
| // --- MAKE FINAL DECISION --- | |
| // Determine if this is spam, what type, and with what confidence | |
| let isSpam = false; | |
| let spamType = CONFIG.labels.spam; | |
| let closeReason = 'potential spam'; | |
| let confidence = spamConfidence; | |
| if (phishingConfidence > CONFIG.phishingThreshold) { | |
| isSpam = true; | |
| spamType = CONFIG.labels.phishing; | |
| closeReason = 'potential phishing content'; | |
| confidence = phishingConfidence; | |
| } else if (spamConfidence > CONFIG.spamThreshold) { | |
| isSpam = true; | |
| // Determine specific spam type | |
| if (isDataSellingSpam) { | |
| spamType = CONFIG.labels.dataSpam; | |
| closeReason = 'unsolicited data selling'; | |
| } else if (businessScamCount >= 2) { | |
| spamType = CONFIG.labels.businessScam; | |
| closeReason = 'potential business scam'; | |
| } else if (isMarketingFollowUp) { | |
| spamType = CONFIG.labels.marketingSpam; | |
| closeReason = 'unsolicited marketing'; | |
| } | |
| } | |
| return { | |
| isSpam, | |
| spamType, | |
| closeReason, | |
| isPhishing: spamType === CONFIG.labels.phishing, | |
| confidence, | |
| indicators | |
| }; | |
| } | |
| // Process the issue | |
| return await processIssue(context, octokit); |