diff --git a/src/mongo/db/kill_sessions_local.cpp b/src/mongo/db/kill_sessions_local.cpp index 8419f61b5e..a739d35413 100644 --- a/src/mongo/db/kill_sessions_local.cpp +++ b/src/mongo/db/kill_sessions_local.cpp @@ -74,7 +74,9 @@ SessionKiller::Result killSessionsLocal(OperationContext* opCtx, } struct CoroCtx { - static constexpr size_t kCoroStackSize = 3200 * 1024; + // Coroutine stack size: Increased from 3.2MB to 8MB to prevent stack overflow + // in complex database operations (deep recursion, large BSON processing, etc.) + static constexpr size_t kCoroStackSize = 8192 * 1024; // 8MB boost::context::protected_fixedsize_stack salloc{kCoroStackSize}; boost::context::continuation source; std::function resumeTask; @@ -111,7 +113,7 @@ void killAllExpiredTransactions(OperationContext* opCtx) { getGlobalServiceContext()->getServiceEntryPoint()->getServiceExecutor(); coroCtx->resumeTask = [&source = coroCtx->source, &client] { - log() << "abortArbitraryTransactionIfExpired call resume."; + error() << "abortArbitraryTransactionIfExpired call resume."; Client::setCurrent(std::move(client)); source = source.resume(); }; @@ -129,7 +131,7 @@ void killAllExpiredTransactions(OperationContext* opCtx) { [&finished, &mux, &cv, coroCtx, opCtx, session, &client, serviceExecutor]( boost::context::continuation&& sink) { coroCtx->yieldFunc = [&sink, &client]() { - log() << "abortArbitraryTransactionIfExpired call yield."; + error() << "abortArbitraryTransactionIfExpired call yield."; client = Client::releaseCurrent(); sink = sink.resume(); }; diff --git a/src/mongo/db/modules/eloq/data_substrate b/src/mongo/db/modules/eloq/data_substrate index 01b3961541..372542f747 160000 --- a/src/mongo/db/modules/eloq/data_substrate +++ b/src/mongo/db/modules/eloq/data_substrate @@ -1 +1 @@ -Subproject commit 01b396154133a25f7a49f6546d40bb7148cf2df6 +Subproject commit 372542f747511ba8541e3310c48678509870b9e6 diff --git a/src/mongo/transport/service_executor_coroutine.cpp b/src/mongo/transport/service_executor_coroutine.cpp index 3be9c3c7a7..69db5501c6 100644 --- a/src/mongo/transport/service_executor_coroutine.cpp +++ b/src/mongo/transport/service_executor_coroutine.cpp @@ -316,13 +316,22 @@ Status ServiceExecutorCoroutine::schedule(Task task, std::function ServiceExecutorCoroutine::coroutineResumeFunctor(uint16_t threadGroupId, const Task& task) { invariant(threadGroupId < _threadGroups.size()); - return [thd_group = &_threadGroups[threadGroupId], &task]() { thd_group->resumeTask(task); }; + // IMPORTANT: capture task by value. Capturing by reference here would dangle because `task` + // is a reference to this function's parameter, which goes out of scope when we return. + Task taskCopy = task; + return [thd_group = &_threadGroups[threadGroupId], task = std::move(taskCopy)]() mutable { + thd_group->resumeTask(task); + }; } std::function ServiceExecutorCoroutine::coroutineLongResumeFunctor(uint16_t threadGroupId, const Task& task) { invariant(threadGroupId < _threadGroups.size()); - return [thd_group = &_threadGroups[threadGroupId], &task]() { thd_group->enqueueTask(task); }; + // Same lifetime rule as coroutineResumeFunctor(): capture by value to avoid dangling refs. + Task taskCopy = task; + return [thd_group = &_threadGroups[threadGroupId], task = std::move(taskCopy)]() mutable { + thd_group->enqueueTask(task); + }; } void ServiceExecutorCoroutine::ongoingCoroutineCountUpdate(uint16_t threadGroupId, int delta) { diff --git a/src/mongo/transport/service_state_machine.cpp b/src/mongo/transport/service_state_machine.cpp index aa9cbb04b9..6e589c9c58 100644 --- a/src/mongo/transport/service_state_machine.cpp +++ b/src/mongo/transport/service_state_machine.cpp @@ -53,8 +53,10 @@ #include "mongo/util/net/socket_exception.h" #include "mongo/util/quick_exit.h" +#include #include #include +#include #include #include #include @@ -277,6 +279,7 @@ ServiceStateMachine::ServiceStateMachine(ServiceContext* svcContext, ServiceStateMachine::~ServiceStateMachine() { MONGO_LOG(1) << "ServiceStateMachine::~ServiceStateMachine"; _source = {}; + _coroSink = {}; ::munmap(_coroStack, kCoroStackSize); } @@ -301,6 +304,7 @@ void ServiceStateMachine::reset(ServiceContext* svcContext, _coroLongResume = {}; _coroMigrateThreadGroup = {}; _resumeTask = {}; + _coroSink = {}; _migrating.store(false, std::memory_order_relaxed); _threadGroupId.store(groupId, std::memory_order_relaxed); _owned.store(Ownership::kUnowned); @@ -543,11 +547,23 @@ void ServiceStateMachine::_runNextInGuard(ThreadGuard guard) { _coroStatus = CoroStatus::OnGoing; _serviceExecutor->ongoingCoroutineCountUpdate( _threadGroupId.load(std::memory_order_relaxed), 1); - _resumeTask = [ssm = this] { + // IMPORTANT: resume tasks can be queued and executed asynchronously. + // Avoid capturing a raw `this` pointer to prevent UAF during shutdown. + auto wssm = weak_from_this(); + _resumeTask = [wssm] { + auto ssm = wssm.lock(); + if (!ssm) { + return; + } + Client::setCurrent(std::move(ssm->_dbClient)); if (ssm->_migrating.load(std::memory_order_relaxed)) { - ssm->_coroResume(); - ssm->_coroYield(); + if (ssm->_coroResume) { + ssm->_coroResume(); + } + if (ssm->_coroYield) { + ssm->_coroYield(); + } } else { ssm->_runResumeProcess(); bool migrating = true; @@ -565,8 +581,11 @@ void ServiceStateMachine::_runNextInGuard(ThreadGuard guard) { _threadGroupId.load(std::memory_order_relaxed), _resumeTask); _coroLongResume = _serviceExecutor->coroutineLongResumeFunctor( _threadGroupId.load(std::memory_order_relaxed), _resumeTask); - _coroMigrateThreadGroup = std::bind( - &ServiceStateMachine::_migrateThreadGroup, this, std::placeholders::_1); + _coroMigrateThreadGroup = [wssm](uint16_t threadGroupId) { + if (auto ssm = wssm.lock()) { + ssm->_migrateThreadGroup(threadGroupId); + } + }; boost::context::stack_context sc = _coroStackContext(); boost::context::preallocated prealloc(sc.sp, sc.size, sc); @@ -579,14 +598,28 @@ void ServiceStateMachine::_runNextInGuard(ThreadGuard guard) { if (!ssm) { return std::move(sink); } - ssm->_coroYield = [ssm = ssm.get(), &sink]() { + // Store sink as a member so yield never captures references to + // stack-local continuation variables. + ssm->_coroSink = std::move(sink); + + // Yield functor: only valid to call while executing on the + // coroutine stack. It resumes the caller continuation. + ssm->_coroYield = [wssm]() { + auto ssmLocked = wssm.lock(); + if (!ssmLocked) { + return; + } MONGO_LOG(3) << "call yield"; - ssm->_dbClient = Client::releaseCurrent(); - sink = sink.resume(); + // If these invariants fail, it strongly suggests someone is + // calling yield outside the active coroutine window. + invariant(haveClient()); + invariant(static_cast(ssmLocked->_coroSink)); + ssmLocked->_dbClient = Client::releaseCurrent(); + ssmLocked->_coroSink = ssmLocked->_coroSink.resume(); }; ssm->_processMessage(std::move(guard)); - return std::move(sink); + return std::move(ssm->_coroSink); }); bool migrating = true; @@ -622,6 +655,9 @@ void ServiceStateMachine::_runNextInGuard(ThreadGuard guard) { } return; + } catch (const boost::context::detail::forced_unwind&) { + // Boost.Context uses forced_unwind to unwind stacks safely. It must never be swallowed. + throw; } catch (const DBException& e) { // must be right above std::exception to avoid catching subclasses log() << "DBException handling request, closing client connection: " << redact(e); @@ -733,9 +769,26 @@ void ServiceStateMachine::setThreadGroupId(size_t id) { boost::context::stack_context ServiceStateMachine::_coroStackContext() { boost::context::stack_context sc; - sc.size = kCoroStackSize - _osPageSize; - // Because stack grows downwards from high address? - sc.sp = _coroStack + kCoroStackSize; + // Stack grows downwards from high address. + // + // We reserve the first OS page as a guard page (PROT_NONE), so the usable region is: + // [bottom, top) + // where: + // bottom = _coroStack + _osPageSize + // top = _coroStack + kCoroStackSize + // + // Boost.Context models the usable range as [sp - size, sp), so if we align sp down we must + // also recompute size from the aligned sp to keep the bottom boundary correct. + char* bottom = _coroStack + _osPageSize; + char* top = _coroStack + kCoroStackSize; + + // Align down to 16-byte boundary (common ABI requirement on x86-64 and for SIMD). + char* sp = + reinterpret_cast(reinterpret_cast(top) & ~static_cast(15)); + + invariant(sp > bottom); + sc.sp = sp; + sc.size = static_cast(sp - bottom); return sc; } @@ -745,8 +798,12 @@ void ServiceStateMachine::_migrateThreadGroup(uint16_t threadGroupId) { _coroResume = _serviceExecutor->coroutineResumeFunctor(threadGroupId, _resumeTask); _coroLongResume = _serviceExecutor->coroutineLongResumeFunctor(threadGroupId, _resumeTask); _migrating.store(true, std::memory_order_relaxed); - _coroResume(); - _coroYield(); + if (_coroResume) { + _coroResume(); + } + if (_coroYield) { + _coroYield(); + } #ifdef MONGO_CONFIG_DEBUG_BUILD _owningThread.store(stdx::this_thread::get_id()); #endif @@ -769,6 +826,18 @@ void ServiceStateMachine::_cleanupSession(ThreadGuard guard) { _inMessage.reset(); + // Disable coroutine functors early during teardown to prevent any further coroutine + // yield/resume calls racing with destruction of this SSM and its coroutine stack. + if (haveClient()) { + Client::getCurrent()->setCoroutineFunctors(CoroutineFunctors::Unavailable); + } + _coroYield = {}; + _coroResume = {}; + _coroLongResume = {}; + _coroMigrateThreadGroup = {}; + _resumeTask = {}; + _coroSink = {}; + // By ignoring the return value of Client::releaseCurrent() we destroy the session. // _dbClient is now nullptr and _dbClientPtr is invalid and should never be accessed. Client::releaseCurrent(); diff --git a/src/mongo/transport/service_state_machine.h b/src/mongo/transport/service_state_machine.h index e68db8f705..5a395c9c98 100644 --- a/src/mongo/transport/service_state_machine.h +++ b/src/mongo/transport/service_state_machine.h @@ -282,10 +282,16 @@ class ServiceStateMachine : public std::enable_shared_from_this