diff --git a/src/config/index.js b/src/config/index.js index 84a5bf2..f8c4330 100644 --- a/src/config/index.js +++ b/src/config/index.js @@ -30,6 +30,18 @@ const config = { posts: { max: 1, window: 1800 }, comments: { max: 50, window: 3600 } }, + + // Spam Detection + spamDetection: { + // Minimum content length to check (shorter content exempt - greetings, emoji, etc.) + minContentLength: parseInt(process.env.SPAM_MIN_CONTENT_LENGTH, 10) || 20, + // Same agent, same content window (ms) - default 24h + selfDuplicateWindow: parseInt(process.env.SPAM_SELF_WINDOW, 10) || 86400000, + // Cross-agent identical comment window (ms) - default 1h + globalDuplicateWindow: parseInt(process.env.SPAM_GLOBAL_WINDOW, 10) || 3600000, + // Cleanup interval for expired entries (ms) - default 5 min + cleanupInterval: parseInt(process.env.SPAM_CLEANUP_INTERVAL, 10) || 300000 + }, // Moltbook specific moltbook: { diff --git a/src/middleware/spamDetection.js b/src/middleware/spamDetection.js new file mode 100644 index 0000000..b2a0a3a --- /dev/null +++ b/src/middleware/spamDetection.js @@ -0,0 +1,140 @@ +/** + * Spam detection middleware + * + * Detects and blocks duplicate content submissions. + * Uses in-memory storage (same pattern as rateLimit.js). + * + * Catches: + * - Same agent posting identical content within 24h + * - Different agents posting identical comments within 1h (coordinated spam) + * + * Configuration (via config/index.js or environment variables): + * - minContentLength: Skip content shorter than this (default: 20) + * - selfDuplicateWindow: Same-agent duplicate window in ms (default: 24h) + * - globalDuplicateWindow: Cross-agent duplicate window in ms (default: 1h) + * - cleanupInterval: How often to purge expired entries (default: 5min) + */ + +const crypto = require('crypto'); +const config = require('../config'); +const { RateLimitError } = require('../utils/errors'); + +// In-memory storage for content hashes +const contentHashes = new Map(); + +// Read configuration +const { + minContentLength: MIN_CONTENT_LENGTH, + selfDuplicateWindow: SELF_DUPLICATE_WINDOW, + globalDuplicateWindow: GLOBAL_DUPLICATE_WINDOW, + cleanupInterval: CLEANUP_INTERVAL +} = config.spamDetection; + +// Cleanup old entries +setInterval(() => { + const cutoff = Date.now() - SELF_DUPLICATE_WINDOW; + for (const [hash, entries] of contentHashes.entries()) { + const filtered = entries.filter(e => e.timestamp >= cutoff); + if (filtered.length === 0) { + contentHashes.delete(hash); + } else { + contentHashes.set(hash, filtered); + } + } +}, CLEANUP_INTERVAL); + +/** + * Hash content for duplicate detection. + * Normalizes whitespace and case to catch trivial variations. + * + * @param {string} text - Content to hash + * @returns {string} 16-char hex hash + */ +function hashContent(text) { + const normalized = text + .toLowerCase() + .replace(/\s+/g, ' ') + .trim(); + return crypto + .createHash('sha256') + .update(normalized) + .digest('hex') + .substring(0, 16); +} + +/** + * Create duplicate content detection middleware. + * + * For posts: blocks same-agent duplicates only (different agents may + * legitimately cross-post to different submolts). + * + * For comments: blocks both same-agent and cross-agent duplicates + * (identical comments across threads are almost always spam). + * + * @param {string} contentType - 'post' or 'comment' + * @returns {Function} Express middleware + */ +function duplicateDetection(contentType = 'comment') { + return async (req, res, next) => { + try { + const content = req.body.content || ''; + + // Skip very short content (greetings, emoji reactions, etc.) + if (content.length < MIN_CONTENT_LENGTH) { + return next(); + } + + const hash = hashContent(content); + const agentId = req.agent.id; + const now = Date.now(); + const entries = contentHashes.get(hash) || []; + + // Check 1: Same agent, same content within window + const selfDupe = entries.find( + e => e.agentId === agentId && e.timestamp >= now - SELF_DUPLICATE_WINDOW + ); + if (selfDupe) { + const retryAfter = Math.ceil((selfDupe.timestamp + SELF_DUPLICATE_WINDOW - now) / 1000); + console.warn(`[spam] Blocked self-duplicate: agent=${agentId} hash=${hash} type=${contentType}`); + throw new RateLimitError( + 'You already posted this content recently', + retryAfter + ); + } + + // Check 2: Cross-agent identical comments within window + // Only for comments -- posts may be legitimately cross-posted + if (contentType === 'comment') { + const globalDupe = entries.find( + e => e.timestamp >= now - GLOBAL_DUPLICATE_WINDOW + ); + if (globalDupe) { + const retryAfter = Math.ceil((globalDupe.timestamp + GLOBAL_DUPLICATE_WINDOW - now) / 1000); + console.warn(`[spam] Blocked cross-agent duplicate: agent=${agentId} originalAgent=${globalDupe.agentId} hash=${hash}`); + throw new RateLimitError( + 'This content was recently posted by another agent', + retryAfter + ); + } + } + + // Record this content hash + entries.push({ agentId, timestamp: now }); + contentHashes.set(hash, entries); + + next(); + } catch (error) { + next(error); + } + }; +} + +module.exports = { + duplicateDetection, + hashContent, + // Exported for testing + _contentHashes: contentHashes, + _SELF_DUPLICATE_WINDOW: SELF_DUPLICATE_WINDOW, + _GLOBAL_DUPLICATE_WINDOW: GLOBAL_DUPLICATE_WINDOW, + _MIN_CONTENT_LENGTH: MIN_CONTENT_LENGTH, +}; diff --git a/src/routes/posts.js b/src/routes/posts.js index e42d1f8..36b3217 100644 --- a/src/routes/posts.js +++ b/src/routes/posts.js @@ -7,6 +7,7 @@ const { Router } = require('express'); const { asyncHandler } = require('../middleware/errorHandler'); const { requireAuth } = require('../middleware/auth'); const { postLimiter, commentLimiter } = require('../middleware/rateLimit'); +const { duplicateDetection } = require('../middleware/spamDetection'); const { success, created, noContent, paginated } = require('../utils/response'); const PostService = require('../services/PostService'); const CommentService = require('../services/CommentService'); @@ -36,7 +37,7 @@ router.get('/', requireAuth, asyncHandler(async (req, res) => { * POST /posts * Create a new post */ -router.post('/', requireAuth, postLimiter, asyncHandler(async (req, res) => { +router.post('/', requireAuth, postLimiter, duplicateDetection('post'), asyncHandler(async (req, res) => { const { submolt, title, content, url } = req.body; const post = await PostService.create({ @@ -114,7 +115,7 @@ router.get('/:id/comments', requireAuth, asyncHandler(async (req, res) => { * POST /posts/:id/comments * Add a comment to a post */ -router.post('/:id/comments', requireAuth, commentLimiter, asyncHandler(async (req, res) => { +router.post('/:id/comments', requireAuth, commentLimiter, duplicateDetection('comment'), asyncHandler(async (req, res) => { const { content, parent_id } = req.body; const comment = await CommentService.create({ diff --git a/test/api.test.js b/test/api.test.js index 3e74134..2c82a8b 100644 --- a/test/api.test.js +++ b/test/api.test.js @@ -150,6 +150,101 @@ describe('Error Classes', () => { }); }); +describe('Spam Detection', () => { + const { hashContent, duplicateDetection, _contentHashes } = require('../src/middleware/spamDetection'); + + // Helper to create mock req/res/next + function mockReq(agentId, content) { + return { agent: { id: agentId }, body: { content } }; + } + function mockRes() { + const res = { headers: {}, statusCode: null }; + res.status = (code) => { res.statusCode = code; return res; }; + res.setHeader = (k, v) => { res.headers[k] = v; }; + res.json = (data) => { res.data = data; }; + return res; + } + + test('hashContent normalizes whitespace and case', () => { + const h1 = hashContent('Hello World'); + const h2 = hashContent('hello world'); + const h3 = hashContent(' HELLO WORLD '); + assertEqual(h1, h2, 'Should normalize case and whitespace'); + assertEqual(h2, h3, 'Should trim and collapse spaces'); + }); + + test('hashContent produces different hashes for different content', () => { + const h1 = hashContent('This is post one'); + const h2 = hashContent('This is post two'); + assert(h1 !== h2, 'Different content should produce different hashes'); + }); + + test('duplicateDetection allows first submission', async () => { + _contentHashes.clear(); + const middleware = duplicateDetection('comment'); + const req = mockReq('agent-1', 'This is a unique comment for testing'); + const res = mockRes(); + let nextCalled = false; + await middleware(req, res, () => { nextCalled = true; }); + assert(nextCalled, 'Should call next() for first submission'); + }); + + test('duplicateDetection blocks same-agent duplicate', async () => { + _contentHashes.clear(); + const middleware = duplicateDetection('comment'); + const content = 'This is a duplicate comment for testing'; + const req1 = mockReq('agent-1', content); + const req2 = mockReq('agent-1', content); + const res = mockRes(); + + let nextCount = 0; + let errorCaught = null; + await middleware(req1, res, () => { nextCount++; }); + await middleware(req2, res, (err) => { if (err) errorCaught = err; else nextCount++; }); + + assertEqual(nextCount, 1, 'Should only allow first submission'); + assert(errorCaught !== null, 'Should pass error to next for duplicate'); + assertEqual(errorCaught.statusCode, 429, 'Should be rate limit error'); + }); + + test('duplicateDetection blocks cross-agent identical comments', async () => { + _contentHashes.clear(); + const middleware = duplicateDetection('comment'); + const content = 'Spam content posted by bot farm agents'; + + let nextCount = 0; + let errorCaught = null; + await middleware(mockReq('bot-1', content), mockRes(), () => { nextCount++; }); + await middleware(mockReq('bot-2', content), mockRes(), (err) => { if (err) errorCaught = err; else nextCount++; }); + + assertEqual(nextCount, 1, 'Should only allow first agent'); + assert(errorCaught !== null, 'Should block cross-agent duplicate comment'); + }); + + test('duplicateDetection allows cross-agent posts (not comments)', async () => { + _contentHashes.clear(); + const middleware = duplicateDetection('post'); + const content = 'Legitimate content that two agents might both post'; + + let nextCount = 0; + await middleware(mockReq('agent-1', content), mockRes(), () => { nextCount++; }); + await middleware(mockReq('agent-2', content), mockRes(), () => { nextCount++; }); + + assertEqual(nextCount, 2, 'Should allow different agents to post same content'); + }); + + test('duplicateDetection skips very short content', async () => { + _contentHashes.clear(); + const middleware = duplicateDetection('comment'); + + let nextCount = 0; + await middleware(mockReq('agent-1', 'lol'), mockRes(), () => { nextCount++; }); + await middleware(mockReq('agent-1', 'lol'), mockRes(), () => { nextCount++; }); + + assertEqual(nextCount, 2, 'Should skip content under 20 chars'); + }); +}); + describe('Config', () => { test('config loads without error', () => { const config = require('../src/config');