Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/config/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ const config = {
posts: { max: 1, window: 1800 },
comments: { max: 50, window: 3600 }
},

// Spam Detection
spamDetection: {
// Minimum content length to check (shorter content exempt - greetings, emoji, etc.)
minContentLength: parseInt(process.env.SPAM_MIN_CONTENT_LENGTH, 10) || 20,
// Same agent, same content window (ms) - default 24h
selfDuplicateWindow: parseInt(process.env.SPAM_SELF_WINDOW, 10) || 86400000,
// Cross-agent identical comment window (ms) - default 1h
globalDuplicateWindow: parseInt(process.env.SPAM_GLOBAL_WINDOW, 10) || 3600000,
// Cleanup interval for expired entries (ms) - default 5 min
cleanupInterval: parseInt(process.env.SPAM_CLEANUP_INTERVAL, 10) || 300000
},

// Moltbook specific
moltbook: {
Expand Down
140 changes: 140 additions & 0 deletions src/middleware/spamDetection.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/**
* Spam detection middleware
*
* Detects and blocks duplicate content submissions.
* Uses in-memory storage (same pattern as rateLimit.js).
*
* Catches:
* - Same agent posting identical content within 24h
* - Different agents posting identical comments within 1h (coordinated spam)
*
* Configuration (via config/index.js or environment variables):
* - minContentLength: Skip content shorter than this (default: 20)
* - selfDuplicateWindow: Same-agent duplicate window in ms (default: 24h)
* - globalDuplicateWindow: Cross-agent duplicate window in ms (default: 1h)
* - cleanupInterval: How often to purge expired entries (default: 5min)
*/

const crypto = require('crypto');
const config = require('../config');
const { RateLimitError } = require('../utils/errors');

// In-memory storage for content hashes
const contentHashes = new Map();

// Read configuration
const {
minContentLength: MIN_CONTENT_LENGTH,
selfDuplicateWindow: SELF_DUPLICATE_WINDOW,
globalDuplicateWindow: GLOBAL_DUPLICATE_WINDOW,
cleanupInterval: CLEANUP_INTERVAL
} = config.spamDetection;

// Cleanup old entries
setInterval(() => {
const cutoff = Date.now() - SELF_DUPLICATE_WINDOW;
for (const [hash, entries] of contentHashes.entries()) {
const filtered = entries.filter(e => e.timestamp >= cutoff);
if (filtered.length === 0) {
contentHashes.delete(hash);
} else {
contentHashes.set(hash, filtered);
}
}
}, CLEANUP_INTERVAL);

/**
* Hash content for duplicate detection.
* Normalizes whitespace and case to catch trivial variations.
*
* @param {string} text - Content to hash
* @returns {string} 16-char hex hash
*/
function hashContent(text) {
const normalized = text
.toLowerCase()
.replace(/\s+/g, ' ')
.trim();
return crypto
.createHash('sha256')
.update(normalized)
.digest('hex')
.substring(0, 16);
}

/**
* Create duplicate content detection middleware.
*
* For posts: blocks same-agent duplicates only (different agents may
* legitimately cross-post to different submolts).
*
* For comments: blocks both same-agent and cross-agent duplicates
* (identical comments across threads are almost always spam).
*
* @param {string} contentType - 'post' or 'comment'
* @returns {Function} Express middleware
*/
function duplicateDetection(contentType = 'comment') {
return async (req, res, next) => {
try {
const content = req.body.content || '';

// Skip very short content (greetings, emoji reactions, etc.)
if (content.length < MIN_CONTENT_LENGTH) {
return next();
}

const hash = hashContent(content);
const agentId = req.agent.id;
const now = Date.now();
const entries = contentHashes.get(hash) || [];

// Check 1: Same agent, same content within window
const selfDupe = entries.find(
e => e.agentId === agentId && e.timestamp >= now - SELF_DUPLICATE_WINDOW
);
if (selfDupe) {
const retryAfter = Math.ceil((selfDupe.timestamp + SELF_DUPLICATE_WINDOW - now) / 1000);
console.warn(`[spam] Blocked self-duplicate: agent=${agentId} hash=${hash} type=${contentType}`);
throw new RateLimitError(
'You already posted this content recently',
retryAfter
);
}

// Check 2: Cross-agent identical comments within window
// Only for comments -- posts may be legitimately cross-posted
if (contentType === 'comment') {
const globalDupe = entries.find(
e => e.timestamp >= now - GLOBAL_DUPLICATE_WINDOW
);
if (globalDupe) {
const retryAfter = Math.ceil((globalDupe.timestamp + GLOBAL_DUPLICATE_WINDOW - now) / 1000);
console.warn(`[spam] Blocked cross-agent duplicate: agent=${agentId} originalAgent=${globalDupe.agentId} hash=${hash}`);
throw new RateLimitError(
'This content was recently posted by another agent',
retryAfter
);
}
}

// Record this content hash
entries.push({ agentId, timestamp: now });
contentHashes.set(hash, entries);

next();
} catch (error) {
next(error);
}
};
}

module.exports = {
duplicateDetection,
hashContent,
// Exported for testing
_contentHashes: contentHashes,
_SELF_DUPLICATE_WINDOW: SELF_DUPLICATE_WINDOW,
_GLOBAL_DUPLICATE_WINDOW: GLOBAL_DUPLICATE_WINDOW,
_MIN_CONTENT_LENGTH: MIN_CONTENT_LENGTH,
};
5 changes: 3 additions & 2 deletions src/routes/posts.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const { Router } = require('express');
const { asyncHandler } = require('../middleware/errorHandler');
const { requireAuth } = require('../middleware/auth');
const { postLimiter, commentLimiter } = require('../middleware/rateLimit');
const { duplicateDetection } = require('../middleware/spamDetection');
const { success, created, noContent, paginated } = require('../utils/response');
const PostService = require('../services/PostService');
const CommentService = require('../services/CommentService');
Expand Down Expand Up @@ -36,7 +37,7 @@ router.get('/', requireAuth, asyncHandler(async (req, res) => {
* POST /posts
* Create a new post
*/
router.post('/', requireAuth, postLimiter, asyncHandler(async (req, res) => {
router.post('/', requireAuth, postLimiter, duplicateDetection('post'), asyncHandler(async (req, res) => {
const { submolt, title, content, url } = req.body;

const post = await PostService.create({
Expand Down Expand Up @@ -114,7 +115,7 @@ router.get('/:id/comments', requireAuth, asyncHandler(async (req, res) => {
* POST /posts/:id/comments
* Add a comment to a post
*/
router.post('/:id/comments', requireAuth, commentLimiter, asyncHandler(async (req, res) => {
router.post('/:id/comments', requireAuth, commentLimiter, duplicateDetection('comment'), asyncHandler(async (req, res) => {
const { content, parent_id } = req.body;

const comment = await CommentService.create({
Expand Down
95 changes: 95 additions & 0 deletions test/api.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,101 @@ describe('Error Classes', () => {
});
});

describe('Spam Detection', () => {
const { hashContent, duplicateDetection, _contentHashes } = require('../src/middleware/spamDetection');

// Helper to create mock req/res/next
function mockReq(agentId, content) {
return { agent: { id: agentId }, body: { content } };
}
function mockRes() {
const res = { headers: {}, statusCode: null };
res.status = (code) => { res.statusCode = code; return res; };
res.setHeader = (k, v) => { res.headers[k] = v; };
res.json = (data) => { res.data = data; };
return res;
}

test('hashContent normalizes whitespace and case', () => {
const h1 = hashContent('Hello World');
const h2 = hashContent('hello world');
const h3 = hashContent(' HELLO WORLD ');
assertEqual(h1, h2, 'Should normalize case and whitespace');
assertEqual(h2, h3, 'Should trim and collapse spaces');
});

test('hashContent produces different hashes for different content', () => {
const h1 = hashContent('This is post one');
const h2 = hashContent('This is post two');
assert(h1 !== h2, 'Different content should produce different hashes');
});

test('duplicateDetection allows first submission', async () => {
_contentHashes.clear();
const middleware = duplicateDetection('comment');
const req = mockReq('agent-1', 'This is a unique comment for testing');
const res = mockRes();
let nextCalled = false;
await middleware(req, res, () => { nextCalled = true; });
assert(nextCalled, 'Should call next() for first submission');
});

test('duplicateDetection blocks same-agent duplicate', async () => {
_contentHashes.clear();
const middleware = duplicateDetection('comment');
const content = 'This is a duplicate comment for testing';
const req1 = mockReq('agent-1', content);
const req2 = mockReq('agent-1', content);
const res = mockRes();

let nextCount = 0;
let errorCaught = null;
await middleware(req1, res, () => { nextCount++; });
await middleware(req2, res, (err) => { if (err) errorCaught = err; else nextCount++; });

assertEqual(nextCount, 1, 'Should only allow first submission');
assert(errorCaught !== null, 'Should pass error to next for duplicate');
assertEqual(errorCaught.statusCode, 429, 'Should be rate limit error');
});

test('duplicateDetection blocks cross-agent identical comments', async () => {
_contentHashes.clear();
const middleware = duplicateDetection('comment');
const content = 'Spam content posted by bot farm agents';

let nextCount = 0;
let errorCaught = null;
await middleware(mockReq('bot-1', content), mockRes(), () => { nextCount++; });
await middleware(mockReq('bot-2', content), mockRes(), (err) => { if (err) errorCaught = err; else nextCount++; });

assertEqual(nextCount, 1, 'Should only allow first agent');
assert(errorCaught !== null, 'Should block cross-agent duplicate comment');
});

test('duplicateDetection allows cross-agent posts (not comments)', async () => {
_contentHashes.clear();
const middleware = duplicateDetection('post');
const content = 'Legitimate content that two agents might both post';

let nextCount = 0;
await middleware(mockReq('agent-1', content), mockRes(), () => { nextCount++; });
await middleware(mockReq('agent-2', content), mockRes(), () => { nextCount++; });

assertEqual(nextCount, 2, 'Should allow different agents to post same content');
});

test('duplicateDetection skips very short content', async () => {
_contentHashes.clear();
const middleware = duplicateDetection('comment');

let nextCount = 0;
await middleware(mockReq('agent-1', 'lol'), mockRes(), () => { nextCount++; });
await middleware(mockReq('agent-1', 'lol'), mockRes(), () => { nextCount++; });

assertEqual(nextCount, 2, 'Should skip content under 20 chars');
});
});

describe('Config', () => {
test('config loads without error', () => {
const config = require('../src/config');
Expand Down