From a1f8ae5bca60bf4ff529621de42714f0ba4fef54 Mon Sep 17 00:00:00 2001 From: Pawel Lebioda Date: Wed, 1 Apr 2026 14:48:34 +0200 Subject: [PATCH 1/3] PSMDB-1997: Add deferred encryption key cleanup to avoid race with checkpoint This commit implements deferred encryption key cleanup to prevent race conditions between encryption key deletion and checkpoint cleanup during dropDatabase operations. --- .../collection_catalog_helper.cpp | 20 +-- src/mongo/db/storage/keydb_api.h | 48 ++++++- src/mongo/db/storage/storage_engine_impl.cpp | 118 ++++++++++++++++++ src/mongo/db/storage/storage_engine_impl.h | 25 ++++ .../storage/wiredtiger/encryption_keydb.cpp | 52 +++++++- .../db/storage/wiredtiger/encryption_keydb.h | 10 ++ .../wiredtiger/wiredtiger_global_options.idl | 26 ++++ .../wiredtiger/wiredtiger_kv_engine.cpp | 106 +++++++++++++--- .../storage/wiredtiger/wiredtiger_kv_engine.h | 8 ++ .../db/storage/wiredtiger/wiredtiger_util.cpp | 34 +++++ .../db/storage/wiredtiger/wiredtiger_util.h | 9 ++ 11 files changed, 423 insertions(+), 33 deletions(-) diff --git a/src/mongo/db/shard_role/shard_catalog/collection_catalog_helper.cpp b/src/mongo/db/shard_role/shard_catalog/collection_catalog_helper.cpp index 0ffa26177ead4..512ec118bfe01 100644 --- a/src/mongo/db/shard_role/shard_catalog/collection_catalog_helper.cpp +++ b/src/mongo/db/shard_role/shard_catalog/collection_catalog_helper.cpp @@ -384,15 +384,17 @@ Status dropCollectionsWithPrefix(OperationContext* opCtx, std::vector toDrop = catalog->getAllCollectionUUIDsFromDb(dbName); - const auto status = dropCollections(opCtx, toDrop, collectionNamePrefix); - - // If all collections were dropped successfully then drop database's encryption key - if (status.isOK()) { - auto storageEngine = opCtx->getServiceContext()->getStorageEngine(); - storageEngine->keydbDropDatabase(dbName); - } - - return status; + // NOTE: Encryption key cleanup is NOT performed here. + // The key cannot be safely deleted at this point because drop-pending idents + // (encrypted with this key) may still exist in storage and will be accessed + // during checkpoint cleanup. Deleting the key here would cause WT_NOTFOUND errors. + // Instead, orphaned encryption keys are cleaned up by a background process + // (_cleanupOrphanedEncryptionKeys) that runs periodically and verifies: + // 1. The database no longer exists in the catalog + // 2. No storage idents (including drop-pending ones) use the key + // This deferred cleanup is enabled via the encryptionKeyCleanupDeferred server parameter. + + return dropCollections(opCtx, toDrop, collectionNamePrefix); } void shutDownCollectionCatalogAndGlobalStorageEngineCleanly(ServiceContext* service, diff --git a/src/mongo/db/storage/keydb_api.h b/src/mongo/db/storage/keydb_api.h index 4f34cbe4b7773..3aa3de0612b03 100644 --- a/src/mongo/db/storage/keydb_api.h +++ b/src/mongo/db/storage/keydb_api.h @@ -31,6 +31,10 @@ Copyright (C) 2018-present Percona and/or its affiliates. All rights reserved. #pragma once +#include +#include +#include + namespace mongo { class DatabaseName; } @@ -45,11 +49,53 @@ struct KeyDBAPI { virtual ~KeyDBAPI() {} /** - * Returns whether the engine supports feature compatibility version 3.6 + * Delete the encryption key for the specified database. + * + * This method is called by the deferred cleanup process after verifying: + * 1. The database no longer exists in the catalog + * 2. No storage idents (including drop-pending ones) use the key + * + * NOTE: This should NOT be called directly during dropDatabase operations + * because drop-pending idents may still exist and require the key for + * checkpoint cleanup. Use the deferred cleanup mechanism instead. */ virtual void keydbDropDatabase(const mongo::DatabaseName& dbName) { // do nothing for engines which do not support KeyDB } + + /** + * Returns all encryption key IDs (database names) stored in the key database. + * Used for deferred encryption key cleanup. + */ + virtual std::vector getAllEncryptionKeyIds() { + return {}; // empty for engines which do not support KeyDB + } + + /** + * Returns a set of all encryption keyIds currently in use by any ident in storage. + * This scans the storage engine metadata to find all encrypted idents and extracts their + * keyids. Used to reliably determine if a key can be safely deleted. + */ + virtual std::set getAllEncryptionKeyIdsInUse() { + return {}; // empty for engines which do not support KeyDB + } + + /** + * Returns true if deferred encryption key cleanup is enabled. + * When enabled, encryption keys are not deleted immediately on dropDatabase; + * instead they are cleaned up asynchronously by a background process. + */ + virtual bool isEncryptionKeyCleanupDeferred() const { + return false; // disabled by default for engines which do not support KeyDB + } + + /** + * Returns the interval in seconds between orphaned encryption key cleanup attempts. + * Only effective when isEncryptionKeyCleanupDeferred() returns true. + */ + virtual int32_t getEncryptionKeyCleanupIntervalSeconds() const { + return 60; // default 60 seconds + } }; } // namespace percona diff --git a/src/mongo/db/storage/storage_engine_impl.cpp b/src/mongo/db/storage/storage_engine_impl.cpp index 3d7b5b8a3f8dc..07ac6f0a35a17 100644 --- a/src/mongo/db/storage/storage_engine_impl.cpp +++ b/src/mongo/db/storage/storage_engine_impl.cpp @@ -35,10 +35,13 @@ #include "mongo/bson/bsonobj.h" #include "mongo/db/admission/execution_control/execution_admission_context.h" #include "mongo/db/client.h" +#include "mongo/db/database_name_util.h" #include "mongo/db/index/multikey_paths.h" #include "mongo/db/operation_context.h" #include "mongo/db/rss/replicated_storage_service.h" +#include "mongo/db/shard_role/lock_manager/d_concurrency.h" #include "mongo/db/shard_role/shard_catalog/catalog_raii.h" +#include "mongo/db/shard_role/shard_catalog/collection_catalog.h" #include "mongo/db/shard_role/transaction_resources.h" #include "mongo/db/storage/backup_cursor_hooks.h" #include "mongo/db/storage/deferred_drop_record_store.h" @@ -52,6 +55,7 @@ #include "mongo/db/storage/spill_table.h" #include "mongo/db/storage/storage_options.h" #include "mongo/db/storage/storage_repair_observer.h" +#include "mongo/db/storage/wiredtiger/encryption_keydb.h" #include "mongo/db/storage/write_unit_of_work.h" #include "mongo/idl/idl_parser.h" #include "mongo/logv2/log.h" @@ -140,6 +144,9 @@ StorageEngineImpl::StorageEngineImpl(OperationContext* opCtx, [this](OperationContext* opCtx, Timestamp timestamp) { _onMinOfCheckpointAndOldestTimestampChanged(opCtx, timestamp); }), + _encryptionKeyCleanupListener( + TimestampMonitor::TimestampType::kMinOfCheckpointAndOldest, + [this](OperationContext* opCtx, Timestamp) { _cleanupOrphanedEncryptionKeys(opCtx); }), _supportsCappedCollections(_engine->supportsCappedCollections()) { // Replace the noop recovery unit for the startup operation context now that the storage engine @@ -573,6 +580,7 @@ void StorageEngineImpl::startTimestampMonitor( _engine.get(), getGlobalServiceContext()->getPeriodicRunner()); _timestampMonitor->addListener(&_minOfCheckpointAndOldestTimestampListener); + _timestampMonitor->addListener(&_encryptionKeyCleanupListener); // Caller must provide listener for cleanup of CollectionCatalog when oldest timestamp advances. invariant(!std::empty(listeners)); @@ -1009,6 +1017,116 @@ void StorageEngineImpl::_onMinOfCheckpointAndOldestTimestampChanged(OperationCon } } +bool StorageEngineImpl::_shouldRunEncryptionKeyCleanup() { + auto now = Date_t::now(); + auto lastCleanup = _lastEncryptionKeyCleanupTime.load(); + auto intervalSeconds = _engine->getEncryptionKeyCleanupIntervalSeconds(); + if (lastCleanup != Date_t{} && (now - lastCleanup) < Seconds(intervalSeconds)) { + return false; + } + _lastEncryptionKeyCleanupTime.store(now); + return true; +} + +std::set StorageEngineImpl::_getExistingDatabaseNames(OperationContext* opCtx) { + std::set existingDbSet; + Lock::GlobalLock globalLock(opCtx, MODE_IS); + auto catalog = CollectionCatalog::get(opCtx); + auto existingDbs = catalog->getAllDbNames(); + for (const auto& db : existingDbs) { + existingDbSet.insert(DatabaseNameUtil::serialize(db, SerializationContext::stateDefault())); + } + return existingDbSet; +} + +void StorageEngineImpl::_cleanupOrphanedEncryptionKeys(OperationContext* opCtx) { + // Check if deferred encryption key cleanup is enabled + if (!_engine->isEncryptionKeyCleanupDeferred()) { + return; + } + + // Check if enough time has passed since the last cleanup + if (!_shouldRunEncryptionKeyCleanup()) { + return; + } + + // Get all encryption key IDs from the key database + auto keyIds = _engine->getAllEncryptionKeyIds(); + if (keyIds.empty()) { + return; + } + + LOGV2_DEBUG(29066, 2, "Checking for orphaned encryption keys", "keyCount"_attr = keyIds.size()); + + // CRITICAL: Get the set of keyIds actually in use by idents in WiredTiger storage. + // This is the AUTHORITATIVE check - if ANY ident (including drop-pending ones that haven't + // been physically removed yet) uses this key, it MUST NOT be deleted. + // This prevents the race condition where a key is deleted while drop-pending idents + // encrypted with that key still exist in storage. + auto keyIdsInUse = _engine->getAllEncryptionKeyIdsInUse(); + + LOGV2_DEBUG(29070, + 2, + "Encryption key cleanup check", + "keysInKeyDb"_attr = keyIds.size(), + "keysInUseByIdents"_attr = keyIdsInUse.size()); + + // Get the list of existing databases from the catalog + auto existingDbSet = _getExistingDatabaseNames(opCtx); + + for (const auto& keyId : keyIds) { + // Skip special keys + if (EncryptionKeyDB::isSpecialKeyId(keyId)) { + continue; + } + + // Quick check: if database exists in catalog, skip + if (existingDbSet.find(keyId) != existingDbSet.end()) { + continue; + } + + // CRITICAL CHECK: If ANY ident in storage uses this key, do NOT delete it. + // This includes drop-pending idents that haven't been physically removed yet. + if (keyIdsInUse.count(keyId) > 0) { + LOGV2(29069, + "Skipping encryption key deletion - key is still in use by storage idents", + "keyId"_attr = keyId); + continue; + } + + // Database doesn't appear to exist and no idents use this key - + // acquire lock and verify with synchronization + try { + auto dbName = DatabaseNameUtil::deserialize( + boost::none, keyId, SerializationContext::stateDefault()); + + // Acquire a shared DB lock to prevent concurrent create/drop operations. + // MODE_S blocks writers (MODE_IX/MODE_X) while allowing other readers. + Lock::DBLock dbLock(opCtx, dbName, MODE_S); + + // Re-check with lock held + auto freshCatalog = CollectionCatalog::get(opCtx); + auto freshDbs = freshCatalog->getAllDbNames(); + bool dbExists = std::find(freshDbs.begin(), freshDbs.end(), dbName) != freshDbs.end(); + bool dropPending = freshCatalog->isDropPending(dbName); + + if (!dbExists && !dropPending) { + LOGV2(29067, + "Deleting orphaned encryption key for non-existent database", + "keyId"_attr = keyId); + _engine->keydbDropDatabase(dbName); + } + } catch (const DBException& e) { + // Log and continue with other keys - don't let one failure stop cleanup + LOGV2_DEBUG(29068, + 1, + "Failed to clean up encryption key", + "keyId"_attr = keyId, + "error"_attr = e.toStatus()); + } + } +} + StorageEngineImpl::TimestampMonitor::TimestampMonitor(KVEngine* engine, PeriodicRunner* runner) : _engine(engine), _periodicRunner(runner) { _startup(); diff --git a/src/mongo/db/storage/storage_engine_impl.h b/src/mongo/db/storage/storage_engine_impl.h index 209a9fa380e68..3f0ce0fa17966 100644 --- a/src/mongo/db/storage/storage_engine_impl.h +++ b/src/mongo/db/storage/storage_engine_impl.h @@ -364,6 +364,25 @@ class StorageEngineImpl final : public StorageEngine { void _onMinOfCheckpointAndOldestTimestampChanged(OperationContext* opCtx, const Timestamp& timestamp); + /** + * Cleans up orphaned encryption keys - keys that belong to databases that no longer exist. + * Only active when encryptionKeyCleanupDeferred server parameter is enabled. + */ + void _cleanupOrphanedEncryptionKeys(OperationContext* opCtx); + + /** + * Checks if enough time has passed since the last encryption key cleanup. + * If so, updates the last cleanup time and returns true. + * Returns false if the cleanup interval has not yet elapsed. + */ + bool _shouldRunEncryptionKeyCleanup(); + + /** + * Returns the set of existing database names as serialized strings. + * Acquires a global IS lock to read from the catalog. + */ + std::set _getExistingDatabaseNames(OperationContext* opCtx); + // Main KVEngine instance used for all user tables. // This must be the first member so it is destroyed last. std::unique_ptr _engine; @@ -380,6 +399,12 @@ class StorageEngineImpl final : public StorageEngine { // Listener for min of checkpoint and oldest timestamp changes. TimestampMonitor::TimestampListener _minOfCheckpointAndOldestTimestampListener; + // Listener for orphaned encryption key cleanup. + TimestampMonitor::TimestampListener _encryptionKeyCleanupListener; + + // Tracks the last time orphaned encryption key cleanup was performed. + AtomicWord _lastEncryptionKeyCleanupTime{Date_t{}}; + const bool _supportsCappedCollections; std::unique_ptr _catalogRecordStore; diff --git a/src/mongo/db/storage/wiredtiger/encryption_keydb.cpp b/src/mongo/db/storage/wiredtiger/encryption_keydb.cpp index 5b27a8b8b0cd2..a732c7e97b6da 100644 --- a/src/mongo/db/storage/wiredtiger/encryption_keydb.cpp +++ b/src/mongo/db/storage/wiredtiger/encryption_keydb.cpp @@ -391,7 +391,9 @@ int EncryptionKeyDB::get_key_by_id(const char* keyid, size_t len, unsigned char* memcpy(key, v.data, encryption::Key::kLength); if (kDebugBuild) dump_key(key, encryption::Key::kLength, "loaded key from key DB"); - _encryptors[c_str] = pe; + if (pe != nullptr) { + _encryptors[c_str] = pe; + } return 0; } if (res != WT_NOTFOUND) { @@ -420,7 +422,9 @@ int EncryptionKeyDB::get_key_by_id(const char* keyid, size_t len, unsigned char* if (kDebugBuild) dump_key(key, encryption::Key::kLength, "generated and stored key"); - _encryptors[c_str] = pe; + if (pe != nullptr) { + _encryptors[c_str] = pe; + } return 0; } @@ -459,13 +463,55 @@ int EncryptionKeyDB::delete_key_by_id(const std::string& keyid) { // DB is dropped just after mongod is started and before any read/write operations) auto it = _encryptors.find(keyid); if (it != _encryptors.end()) { - percona_encryption_extension_drop_keyid(it->second); + if (it->second != nullptr) { + percona_encryption_extension_drop_keyid(it->second); + } _encryptors.erase(it); } return res; } +bool EncryptionKeyDB::isSpecialKeyId(const std::string& keyId) { + return keyId.empty() || keyId == "/default"; +} + +std::vector EncryptionKeyDB::getAllKeyIds() { + std::vector keyIds; + + WT_CURSOR* cursor; + stdx::lock_guard lk(_lock_sess); + int res = _sess->open_cursor(_sess, "table:key", nullptr, nullptr, &cursor); + if (res) { + LOGV2_ERROR( + 29060, "getAllKeyIds: error opening cursor", "error"_attr = wiredtiger_strerror(res)); + return keyIds; + } + + // Create cursor close guard + std::unique_ptr> cursor_guard( + cursor, [](WT_CURSOR* c) { c->close(c); }); + + while ((res = cursor->next(cursor)) == 0) { + char* k; + res = cursor->get_key(cursor, &k); + if (res == 0 && k != nullptr) { + std::string keyId(k); + if (!isSpecialKeyId(keyId)) { + keyIds.emplace_back(std::move(keyId)); + } + } + } + + if (res != WT_NOTFOUND) { + LOGV2_ERROR( + 29061, "getAllKeyIds: error iterating cursor", "error"_attr = wiredtiger_strerror(res)); + } + + LOGV2_DEBUG(29062, 2, "getAllKeyIds: found keys", "count"_attr = keyIds.size()); + return keyIds; +} + int EncryptionKeyDB::store_gcm_iv_reserved() { uint8_t tmp[_gcm_iv_bytes]; auto end = export_bits(_gcm_iv_reserved, tmp, 8, false); diff --git a/src/mongo/db/storage/wiredtiger/encryption_keydb.h b/src/mongo/db/storage/wiredtiger/encryption_keydb.h index 7abb83918d2e0..8cd7f43ec945c 100644 --- a/src/mongo/db/storage/wiredtiger/encryption_keydb.h +++ b/src/mongo/db/storage/wiredtiger/encryption_keydb.h @@ -39,6 +39,7 @@ Copyright (C) 2018-present Percona and/or its affiliates. All rights reserved. #include #include +#include #include @@ -89,6 +90,15 @@ class EncryptionKeyDB { // drop key for specific keyid (used in dropDatabase) int delete_key_by_id(const std::string& keyid); + // Returns all key IDs (database names) stored in the key database. + // Excludes the master key (empty keyid) and special keys like "/default". + // Used for deferred encryption key cleanup. + std::vector getAllKeyIds(); + + // Returns true if the key ID is a special/reserved key that should not be + // included in user key lists (empty key for master key, "/default" key). + static bool isSpecialKeyId(const std::string& keyId); + // get new counter value for IV in GCM mode int get_iv_gcm(uint8_t* buf, int len); diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.idl b/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.idl index e7dfc36900327..d20a2c6d5fc40 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.idl +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_global_options.idl @@ -299,6 +299,32 @@ server_parameters: default: false redact: false + encryptionKeyCleanupDeferred: + description: >- + Encryption keys are never deleted on dropDatabase. When this parameter + is enabled, orphaned keys are cleaned up asynchronously by a background + process that verifies the associated database no longer exists. This + helps avoid race conditions between key deletion and checkpoint cleanup. + The cleanup frequency is controlled by encryptionKeyCleanupIntervalSeconds. + set_at: [startup, runtime] + cpp_vartype: "AtomicWord" + cpp_varname: gEncryptionKeyCleanupDeferred + default: false + redact: false + + encryptionKeyCleanupIntervalSeconds: + description: >- + The interval in seconds between orphaned encryption key cleanup attempts. + Only effective when encryptionKeyCleanupDeferred is enabled. + set_at: [startup, runtime] + cpp_vartype: "AtomicWord" + cpp_varname: gEncryptionKeyCleanupIntervalSeconds + default: 60 + validator: + gte: 1 + lte: 86400 + redact: false + spillWiredTigerEngineRuntimeConfig: description: "Spill WiredTiger Configuration" set_at: runtime diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp index 192608c1487bd..0b5262068e7c6 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp @@ -2495,8 +2495,8 @@ Status WiredTigerKVEngine::hotBackup(OperationContext* opCtx, auto outcome = s3_client->ListBuckets(); if (!outcome.IsSuccess()) { return Status(ErrorCodes::InternalError, - str::stream() << "Cannot list buckets on storage server" - << " : " << outcome.GetError().GetExceptionName() << " : " + str::stream() << "Cannot list buckets on storage server" << " : " + << outcome.GetError().GetExceptionName() << " : " << outcome.GetError().GetMessage()); } for (auto&& bucket : outcome.GetResult().GetBuckets()) { @@ -2534,8 +2534,8 @@ Status WiredTigerKVEngine::hotBackup(OperationContext* opCtx, auto outcome = s3_client->ListObjects(request); if (!outcome.IsSuccess()) { return Status(ErrorCodes::InvalidPath, - str::stream() << "Cannot list objects in the target location" - << " : " << outcome.GetError().GetExceptionName() << " : " + str::stream() << "Cannot list objects in the target location" << " : " + << outcome.GetError().GetExceptionName() << " : " << outcome.GetError().GetMessage()); } const auto root = s3params.path + '/'; @@ -2543,8 +2543,8 @@ Status WiredTigerKVEngine::hotBackup(OperationContext* opCtx, for (auto const& s3_object : object_list) { if (s3_object.GetKey() != root) { return Status(ErrorCodes::InvalidPath, - str::stream() << "Target location is not empty" - << " : " << s3params.bucket << '/' << s3params.path); + str::stream() << "Target location is not empty" << " : " + << s3params.bucket << '/' << s3params.path); } } } @@ -2585,8 +2585,8 @@ Status WiredTigerKVEngine::hotBackup(OperationContext* opCtx, .WithUploadId(upload_id)); if (!outcome2.IsSuccess()) { return Status(ErrorCodes::InternalError, - str::stream() << "Cannot abort test multipart upload" - << " : " << upload_id); + str::stream() + << "Cannot abort test multipart upload" << " : " << upload_id); } } } @@ -2851,17 +2851,16 @@ Status WiredTigerKVEngine::hotBackup(OperationContext* opCtx, "AWS", srcFile.string(), std::ios_base::in | std::ios_base::binary); if (!fileToUpload) { return Status(ErrorCodes::InvalidPath, - str::stream() - << "Cannot open file '" << srcFile.string() << "' for backup" - << " : " << strerror(errno)); + str::stream() << "Cannot open file '" << srcFile.string() + << "' for backup" << " : " << strerror(errno)); } request.SetBody(fileToUpload); auto outcome = s3_client->PutObject(request); if (!outcome.IsSuccess()) { return Status(ErrorCodes::InternalError, - str::stream() << "Cannot backup '" << srcFile.string() << "'" - << " : " << outcome.GetError().GetExceptionName() << " : " + str::stream() << "Cannot backup '" << srcFile.string() << "'" << " : " + << outcome.GetError().GetExceptionName() << " : " << outcome.GetError().GetMessage()); } { @@ -3599,15 +3598,82 @@ void WiredTigerKVEngine::dropIdentForImport(Interruptible& interruptible, } void WiredTigerKVEngine::keydbDropDatabase(const DatabaseName& dbName) { - if (_restEncr) { - int res = _restEncr->keyDb()->delete_key_by_id( - DatabaseNameUtil::serialize(dbName, SerializationContext::stateDefault())); - if (res) { - // we cannot throw exceptions here because we are inside WUOW::commit - // every other part of DB is already dropped so we just log error message - LOGV2_ERROR(29001, "failed to delete encryption key for db", logAttrs(dbName)); + if (!_restEncr) { + return; + } + + // Delete the encryption key for this database. + // This method is called by the deferred cleanup process after verifying: + // 1. The database no longer exists in the catalog + // 2. No storage idents (including drop-pending ones) use the key + int res = _restEncr->keyDb()->delete_key_by_id( + DatabaseNameUtil::serialize(dbName, SerializationContext::stateDefault())); + if (res) { + LOGV2_ERROR(29001, "Failed to delete encryption key for database", logAttrs(dbName)); + } else { + LOGV2_DEBUG(29063, 1, "Deleted encryption key for database", logAttrs(dbName)); + } +} + +std::vector WiredTigerKVEngine::getAllEncryptionKeyIds() { + if (!_restEncr) { + return {}; + } + return _restEncr->keyDb()->getAllKeyIds(); +} + +std::set WiredTigerKVEngine::getAllEncryptionKeyIdsInUse() { + std::set keyIdsInUse; + + if (!_restEncr) { + return keyIdsInUse; + } + + auto session = _connection->getUninterruptibleSession(); + + // Get all idents from WiredTiger + auto idents = _wtGetAllIdents(*session); + + LOGV2_DEBUG( + 29071, 2, "Scanning idents for encryption keys in use", "identCount"_attr = idents.size()); + + for (const auto& ident : idents) { + // Get metadata for this ident - use getMetadataCreate to get the original creation + // configuration which includes the encryption settings as they were specified + auto metadata = WiredTigerUtil::getMetadataCreate(*session, fmt::format("table:{}", ident)); + if (!metadata.isOK()) { + LOGV2_DEBUG(29072, + 3, + "Failed to get metadata for ident", + "ident"_attr = ident, + "error"_attr = metadata.getStatus()); + continue; // Skip idents we can't read metadata for + } + + // Extract keyid from metadata + auto keyId = WiredTigerUtil::getEncryptionKeyId(metadata.getValue()); + if (keyId && !EncryptionKeyDB::isSpecialKeyId(*keyId)) { + keyIdsInUse.insert(*keyId); + LOGV2_DEBUG(29074, + 3, + "Found ident using encryption key", + "ident"_attr = ident, + "keyId"_attr = *keyId); } } + + LOGV2_DEBUG( + 29073, 2, "Found encryption keys in use by idents", "keyCount"_attr = keyIdsInUse.size()); + + return keyIdsInUse; +} + +bool WiredTigerKVEngine::isEncryptionKeyCleanupDeferred() const { + return gEncryptionKeyCleanupDeferred.load(); +} + +int32_t WiredTigerKVEngine::getEncryptionKeyCleanupIntervalSeconds() const { + return gEncryptionKeyCleanupIntervalSeconds.load(); } void WiredTigerKVEngine::_checkpoint(WiredTigerSession& session, bool useTimestamp) { diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h index 35c7f4031883b..ee68f15cd3162 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h @@ -511,6 +511,14 @@ class WiredTigerKVEngine final : public WiredTigerKVEngineBase { void keydbDropDatabase(const DatabaseName& dbName) override; + std::vector getAllEncryptionKeyIds() override; + + std::set getAllEncryptionKeyIdsInUse() override; + + bool isEncryptionKeyCleanupDeferred() const override; + + int32_t getEncryptionKeyCleanupIntervalSeconds() const override; + void flushAllFiles(OperationContext* opCtx, bool callerHoldsReadLock) override; Status beginBackup() override; diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp index 26701e7a86031..b10e5e6bb900a 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.cpp @@ -157,6 +157,40 @@ StatusWith WiredTigerUtil::getMetadata(WiredTigerSession& session, return _getMetadata(cursor, uri); } +boost::optional WiredTigerUtil::getEncryptionKeyId(StringData config) { + WiredTigerConfigParser parser(config); + + // Get the "encryption" key which contains a nested struct + WT_CONFIG_ITEM encryptionValue; + if (parser.get("encryption", &encryptionValue) != 0) { + // No encryption key found + return boost::none; + } + + // Encryption value must be a struct: encryption=(name=percona,keyid="...") + if (encryptionValue.type != WT_CONFIG_ITEM::WT_CONFIG_ITEM_STRUCT) { + return boost::none; + } + + // Parse the nested encryption struct + WiredTigerConfigParser encryptionParser(encryptionValue); + + // Get the "keyid" sub-key + WT_CONFIG_ITEM keyidValue; + if (encryptionParser.get("keyid", &keyidValue) != 0) { + // No keyid found in encryption config + return boost::none; + } + + // keyid is typically a STRING type (quoted like keyid="...") but could also be ID type + if (keyidValue.type == WT_CONFIG_ITEM::WT_CONFIG_ITEM_STRING || + keyidValue.type == WT_CONFIG_ITEM::WT_CONFIG_ITEM_ID) { + return std::string(keyidValue.str, keyidValue.len); + } + + return boost::none; +} + StatusWith WiredTigerUtil::getSourceMetadata(WiredTigerSession& session, StringData uri) { if (uri.starts_with("file:")) { diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util.h b/src/mongo/db/storage/wiredtiger/wiredtiger_util.h index 5573a53d447ca..dbb4504831a1a 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_util.h +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util.h @@ -213,6 +213,15 @@ class WiredTigerUtil { */ static StatusWith getMetadata(WiredTigerSession& session, StringData uri); + /** + * Extracts the encryption keyid from a WiredTiger config string. + * The keyid is found within the encryption config: encryption=(name=percona,keyid="...") + * + * @param config The WiredTiger configuration string (from getMetadata or getMetadataCreate) + * @return The keyid if found, boost::none otherwise. Empty string indicates master key. + */ + static boost::optional getEncryptionKeyId(StringData config); + /** * Gets the source metadata string for collection or index at URI. * From 3a78df97a76a695f6865852a3dbc69ffdc02632e Mon Sep 17 00:00:00 2001 From: Pawel Lebioda Date: Tue, 31 Mar 2026 11:21:23 +0200 Subject: [PATCH 2/3] PSMDB-1997: Add unit tests for deferred encryption key cleanup Add unit tests for the new encryption key management functionality: --- ...redtiger_kv_engine_encryption_key_test.cpp | 206 ++++++++++++++++++ .../wiredtiger/wiredtiger_util_test.cpp | 94 ++++++++ 2 files changed, 300 insertions(+) diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_encryption_key_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_encryption_key_test.cpp index 467d614711700..935b9a0bf69ac 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_encryption_key_test.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine_encryption_key_test.cpp @@ -44,6 +44,7 @@ Copyright (C) 2022-present Percona and/or its affiliates. All rights reserved. #include "mongo/db/service_context_test_fixture.h" #include "mongo/db/storage/master_key_rotation_completed.h" #include "mongo/db/storage/wiredtiger/encryption_keydb.h" +#include "mongo/db/storage/wiredtiger/wiredtiger_global_options_gen.h" #include "mongo/db/storage/wiredtiger/wiredtiger_kv_engine.h" #include "mongo/logv2/log_component.h" #include "mongo/unittest/temp_dir.h" @@ -59,8 +60,10 @@ Copyright (C) 2022-present Percona and/or its affiliates. All rights reserved. #include #include #include +#include #include #include +#include #include // for `::chmod` @@ -1659,5 +1662,208 @@ TEST_F(WiredTigerKVEngineEncryptionKeyKmipPollStateTest, #undef ASSERT_CREATE_ENGINE_THROWS_REASON_CONTAINS #undef ASSERT_CREATE_ENGINE_THROWS_WHAT +// ============================================================================= +// Tests for EncryptionKeyDB::getAllKeyIds() and WiredTigerKVEngine key management +// ============================================================================= + +/** + * Test fixture for encryption key management operations. + * Extends WiredTigerKVEngineEncryptionKeyFileTest to get a fully configured + * encrypted engine. + */ +class WiredTigerKVEngineEncryptionKeyManagementTest + : public WiredTigerKVEngineEncryptionKeyFileTest { +protected: + void _setUpPreconfiguredEngine() override { + // Set up encryption params and create the engine, but don't reset it + // We want to keep the engine running for our tests + _setUpEncryptionParams(); + _engine = _createWiredTigerKVEngine(); + WtKeyIds::instance().configured = std::move(WtKeyIds::instance().futureConfigured); + } + + // Helper to trigger key creation by calling get_key_by_id + // This simulates what happens when an encrypted ident is created + void triggerKeyCreation(const std::string& keyId) { + auto keyDb = _engine->getEncryptionKeyDB(); + ASSERT(keyDb); + unsigned char keyBuf[32]; // 256-bit key + int res = keyDb->get_key_by_id(keyId.c_str(), keyId.length(), keyBuf, nullptr); + ASSERT_EQ(0, res) << "Failed to create/get key with id: " << keyId; + } + + // Helper to delete a key from the keydb + void deleteKeyFromKeyDb(const std::string& keyId) { + auto keyDb = _engine->getEncryptionKeyDB(); + ASSERT(keyDb); + int res = keyDb->delete_key_by_id(keyId); + ASSERT_EQ(0, res) << "Failed to delete key with id: " << keyId; + } +}; + +// ----------------------------------------------------------------------------- +// Tests for EncryptionKeyDB::getAllKeyIds() +// ----------------------------------------------------------------------------- + +TEST_F(WiredTigerKVEngineEncryptionKeyManagementTest, GetAllKeyIdsInitiallyEmpty) { + // A fresh encrypted engine should have no user database keys + // (only the master key which is excluded) + auto keyDb = _engine->getEncryptionKeyDB(); + ASSERT(keyDb); + + auto keyIds = keyDb->getAllKeyIds(); + ASSERT_TRUE(keyIds.empty()); +} + +TEST_F(WiredTigerKVEngineEncryptionKeyManagementTest, GetAllKeyIdsSingleKey) { + triggerKeyCreation("testdb1"); + + auto keyDb = _engine->getEncryptionKeyDB(); + auto keyIds = keyDb->getAllKeyIds(); + + ASSERT_EQ(keyIds.size(), 1u); + ASSERT_EQ(keyIds[0], "testdb1"); +} + +TEST_F(WiredTigerKVEngineEncryptionKeyManagementTest, GetAllKeyIdsMultipleKeys) { + triggerKeyCreation("db_alpha"); + triggerKeyCreation("db_beta"); + triggerKeyCreation("db_gamma"); + + auto keyDb = _engine->getEncryptionKeyDB(); + auto keyIds = keyDb->getAllKeyIds(); + + ASSERT_EQ(keyIds.size(), 3u); + + // Convert to set for easier checking (order not guaranteed) + std::set keyIdSet(keyIds.begin(), keyIds.end()); + ASSERT_TRUE(keyIdSet.count("db_alpha") > 0); + ASSERT_TRUE(keyIdSet.count("db_beta") > 0); + ASSERT_TRUE(keyIdSet.count("db_gamma") > 0); +} + +TEST_F(WiredTigerKVEngineEncryptionKeyManagementTest, GetAllKeyIdsExcludesDefaultKey) { + // Create a key with "/default" keyid - this should be excluded from results + triggerKeyCreation("/default"); + triggerKeyCreation("regular_db"); + + auto keyDb = _engine->getEncryptionKeyDB(); + auto keyIds = keyDb->getAllKeyIds(); + + // Should only contain the regular_db key, not /default + ASSERT_EQ(keyIds.size(), 1u); + ASSERT_EQ(keyIds[0], "regular_db"); +} + +// ----------------------------------------------------------------------------- +// Tests for WiredTigerKVEngine::getAllEncryptionKeyIds() +// ----------------------------------------------------------------------------- + +TEST_F(WiredTigerKVEngineEncryptionKeyManagementTest, EngineGetAllEncryptionKeyIdsEmpty) { + auto keyIds = _engine->getAllEncryptionKeyIds(); + ASSERT_TRUE(keyIds.empty()); +} + +TEST_F(WiredTigerKVEngineEncryptionKeyManagementTest, EngineGetAllEncryptionKeyIdsWithKeys) { + triggerKeyCreation("engine_test_db1"); + triggerKeyCreation("engine_test_db2"); + + auto keyIds = _engine->getAllEncryptionKeyIds(); + + ASSERT_EQ(keyIds.size(), 2u); + std::set keyIdSet(keyIds.begin(), keyIds.end()); + ASSERT_TRUE(keyIdSet.count("engine_test_db1") > 0); + ASSERT_TRUE(keyIdSet.count("engine_test_db2") > 0); +} + +// ----------------------------------------------------------------------------- +// Tests for WiredTigerKVEngine::getAllEncryptionKeyIdsInUse() +// ----------------------------------------------------------------------------- + +TEST_F(WiredTigerKVEngineEncryptionKeyManagementTest, GetAllEncryptionKeyIdsInUseEmpty) { + // With no idents created, should return empty set + auto keyIdsInUse = _engine->getAllEncryptionKeyIdsInUse(); + ASSERT_TRUE(keyIdsInUse.empty()); +} + +// ----------------------------------------------------------------------------- +// Test for engine without encryption +// ----------------------------------------------------------------------------- + +class WiredTigerKVEngineNoEncryptionKeyManagementTest : public WiredTigerKVEngineEncryptionKeyTest { +protected: + void _setUpPreconfiguredEngine() override { + // Don't set up encryption - create engine without encryption + } + + void _setUpEncryptionParams() override { + // Don't enable encryption + encryptionGlobalParams = EncryptionGlobalParams(); + encryptionGlobalParams.enableEncryption = false; + } + + std::unique_ptr _createNonEncryptedEngine() { + WiredTigerKVEngine::WiredTigerConfig wtConfig; + wtConfig.extraOpenOptions = "log=(file_max=1m,prealloc=false)"; + wtConfig.cacheSizeMB = 1; + + auto& provider = + rss::ReplicatedStorageService::get(getServiceContext()).getPersistenceProvider(); + auto engine = std::make_unique( + "wiredTiger", + _tempDir->path(), + _clockSource.get(), + std::move(wtConfig), + WiredTigerExtensions::get(getServiceContext()), + provider, + false, + getGlobalReplSettings().isReplSet(), + repl::ReplSettings::shouldRecoverFromOplogAsStandalone(), + getReplSetMemberInStandaloneMode(getGlobalServiceContext()), + _runner.get()); + engine->notifyStorageStartupRecoveryComplete(); + return engine; + } +}; + +TEST_F(WiredTigerKVEngineNoEncryptionKeyManagementTest, GetAllEncryptionKeyIdsNoEncryption) { + _engine = _createNonEncryptedEngine(); + + auto keyIds = _engine->getAllEncryptionKeyIds(); + ASSERT_TRUE(keyIds.empty()); +} + +TEST_F(WiredTigerKVEngineNoEncryptionKeyManagementTest, GetAllEncryptionKeyIdsInUseNoEncryption) { + _engine = _createNonEncryptedEngine(); + + auto keyIdsInUse = _engine->getAllEncryptionKeyIdsInUse(); + ASSERT_TRUE(keyIdsInUse.empty()); +} + +// ============================================================================= +// Tests for encryptionKeyCleanupDeferred server parameter +// ============================================================================= + +TEST_F(WiredTigerKVEngineEncryptionKeyManagementTest, EncryptionKeyCleanupDeferredDefaultFalse) { + // The default value should be false + ASSERT_FALSE(gEncryptionKeyCleanupDeferred.load()); +} + +TEST_F(WiredTigerKVEngineEncryptionKeyManagementTest, EncryptionKeyCleanupDeferredCanBeSet) { + // Save original value + bool originalValue = gEncryptionKeyCleanupDeferred.load(); + + // Set to true + gEncryptionKeyCleanupDeferred.store(true); + ASSERT_TRUE(gEncryptionKeyCleanupDeferred.load()); + + // Set back to false + gEncryptionKeyCleanupDeferred.store(false); + ASSERT_FALSE(gEncryptionKeyCleanupDeferred.load()); + + // Restore original value + gEncryptionKeyCleanupDeferred.store(originalValue); +} + } // namespace } // namespace mongo diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_util_test.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_util_test.cpp index e4c9a322d38f9..dcfbe902cb433 100644 --- a/src/mongo/db/storage/wiredtiger/wiredtiger_util_test.cpp +++ b/src/mongo/db/storage/wiredtiger/wiredtiger_util_test.cpp @@ -1293,5 +1293,99 @@ TEST(SimpleWiredTigerUtilTest, SpillCacheSize) { WiredTigerUtil::getSpillCacheSizeMB(1024 * 8, 5, 101, 100), DBException, 10698700); } +// ============================================================================= +// Tests for WiredTigerUtil::getEncryptionKeyId() +// ============================================================================= + +TEST(WiredTigerUtilGetEncryptionKeyIdTest, EmptyConfigReturnsNone) { + auto result = WiredTigerUtil::getEncryptionKeyId(""); + ASSERT_FALSE(result.has_value()); +} + +TEST(WiredTigerUtilGetEncryptionKeyIdTest, ConfigWithoutEncryptionReturnsNone) { + auto result = WiredTigerUtil::getEncryptionKeyId( + "access_pattern_hint=none,allocation_size=4KB,app_metadata=,block_allocation=best"); + ASSERT_FALSE(result.has_value()); +} + +TEST(WiredTigerUtilGetEncryptionKeyIdTest, EncryptionWithoutKeyIdReturnsNone) { + auto result = WiredTigerUtil::getEncryptionKeyId("encryption=(name=percona)"); + ASSERT_FALSE(result.has_value()); +} + +TEST(WiredTigerUtilGetEncryptionKeyIdTest, QuotedKeyIdExtracted) { + auto result = WiredTigerUtil::getEncryptionKeyId("encryption=(name=percona,keyid=\"testdb\")"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(*result, "testdb"); +} + +TEST(WiredTigerUtilGetEncryptionKeyIdTest, UnquotedKeyIdExtracted) { + auto result = WiredTigerUtil::getEncryptionKeyId("encryption=(name=percona,keyid=testdb)"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(*result, "testdb"); +} + +TEST(WiredTigerUtilGetEncryptionKeyIdTest, DefaultKeyExtracted) { + auto result = + WiredTigerUtil::getEncryptionKeyId("encryption=(name=percona,keyid=\"/default\")"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(*result, "/default"); +} + +TEST(WiredTigerUtilGetEncryptionKeyIdTest, EmptyKeyIdExtracted) { + auto result = WiredTigerUtil::getEncryptionKeyId("encryption=(name=percona,keyid=\"\")"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(*result, ""); +} + +TEST(WiredTigerUtilGetEncryptionKeyIdTest, ComplexConfigKeyIdExtracted) { + auto result = WiredTigerUtil::getEncryptionKeyId( + "access_pattern_hint=none,allocation_size=4KB,app_metadata=(formatVersion=12,infoObj={ " + "\"v\" : 2, \"key\" : { \"_id\" : 1 }, \"name\" : \"_id_\" })," + "block_allocation=best,block_compressor=snappy,encryption=(name=percona,keyid=\"mydb\")," + "format=btree,huffman_value=none"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(*result, "mydb"); +} + +TEST(WiredTigerUtilGetEncryptionKeyIdTest, EncryptionNotStructReturnsNone) { + // If encryption= is followed by something other than '(', should return none + auto result = WiredTigerUtil::getEncryptionKeyId("encryption=none,other=value"); + ASSERT_FALSE(result.has_value()); +} + +TEST(WiredTigerUtilGetEncryptionKeyIdTest, KeyIdAtStartOfEncryptionStruct) { + auto result = WiredTigerUtil::getEncryptionKeyId("encryption=(keyid=\"firstdb\",name=percona)"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(*result, "firstdb"); +} + +TEST(WiredTigerUtilGetEncryptionKeyIdTest, DatabaseNameWithSpecialChars) { + auto result = + WiredTigerUtil::getEncryptionKeyId("encryption=(name=percona,keyid=\"my-test_db.123\")"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(*result, "my-test_db.123"); +} + +TEST(WiredTigerUtilGetEncryptionKeyIdTest, SystemDatabaseKey) { + auto result = WiredTigerUtil::getEncryptionKeyId("encryption=(name=percona,keyid=\"admin\")"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(*result, "admin"); +} + +TEST(WiredTigerUtilGetEncryptionKeyIdTest, RealWorldCollectionConfig) { + // A realistic config string from a collection + auto result = WiredTigerUtil::getEncryptionKeyId( + "access_pattern_hint=none,allocation_size=4KB," + "app_metadata=(formatVersion=12,infoObj={ \"v\" : 2, \"key\" : { \"_id\" : 1 }, " + "\"name\" : \"_id_\" }),assert=(commit_timestamp=none,durable_timestamp=none," + "read_timestamp=none,write_timestamp=off),block_allocation=best," + "block_compressor=snappy,cache_resident=false,checksum=on,colgroups=," + "collator=,columns=,dictionary=0,encryption=(name=percona,keyid=\"production_db\")," + "exclusive=false,extractor=,format=btree"); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(*result, "production_db"); +} + } // namespace } // namespace mongo From d9e50d4859dfa44aa35e002f01cc4b41b7abb96d Mon Sep 17 00:00:00 2001 From: Pawel Lebioda Date: Wed, 1 Apr 2026 11:42:15 +0200 Subject: [PATCH 3/3] PSMDB-1997: Add integration tests for deferred encryption key cleanup Add two test files and corresponding test suites for verifying deferred encryption key cleanup functionality: - deferred_key_cleanup_drop_database.js: Tests basic database create/drop operations with deferred cleanup enabled - deferred_key_cleanup_stress.js: Stress test with 16 parallel threads performing 200 iterations each of database create/drop Test suites run with both AES256-GCM and AES256-CBC cipher modes. The stress test uses db.getSiblingDB() instead of conn.getDB() to ensure the database connection works correctly when the worker function is serialized and executed in a parallel shell context. --- .../suites/tde_deferred_cleanup_cbc.yml | 24 +++ .../suites/tde_deferred_cleanup_gcm.yml | 24 +++ .../tde/deferred_key_cleanup_drop_database.js | 182 ++++++++++++++++ .../tde/deferred_key_cleanup_stress.js | 202 ++++++++++++++++++ 4 files changed, 432 insertions(+) create mode 100644 buildscripts/resmokeconfig/suites/tde_deferred_cleanup_cbc.yml create mode 100644 buildscripts/resmokeconfig/suites/tde_deferred_cleanup_gcm.yml create mode 100644 jstests/percona/tde/deferred_key_cleanup_drop_database.js create mode 100644 jstests/percona/tde/deferred_key_cleanup_stress.js diff --git a/buildscripts/resmokeconfig/suites/tde_deferred_cleanup_cbc.yml b/buildscripts/resmokeconfig/suites/tde_deferred_cleanup_cbc.yml new file mode 100644 index 0000000000000..442aa966c2f9f --- /dev/null +++ b/buildscripts/resmokeconfig/suites/tde_deferred_cleanup_cbc.yml @@ -0,0 +1,24 @@ +# Resmoke suite for testing deferred encryption key cleanup with AES256-CBC cipher mode. +# These tests stress test the encryptionKeyCleanupDeferred feature with aggressive +# cleanup intervals (1 second) while performing many dropDatabase operations. + +config_variables: + - &keyFileGood jstests/percona/tde/ekf + - &cipherMode AES256-CBC + +test_kind: js_test + +selector: + roots: + - jstests/percona/tde/deferred_key_cleanup_drop_database.js + - jstests/percona/tde/deferred_key_cleanup_stress.js + +# Tests start their own mongod with encryption and deferred cleanup enabled. +executor: + config: + shell_options: + global_vars: + TestData: + keyFileGood: *keyFileGood + cipherMode: *cipherMode + nodb: "" diff --git a/buildscripts/resmokeconfig/suites/tde_deferred_cleanup_gcm.yml b/buildscripts/resmokeconfig/suites/tde_deferred_cleanup_gcm.yml new file mode 100644 index 0000000000000..d0ecfa3544927 --- /dev/null +++ b/buildscripts/resmokeconfig/suites/tde_deferred_cleanup_gcm.yml @@ -0,0 +1,24 @@ +# Resmoke suite for testing deferred encryption key cleanup with AES256-GCM cipher mode. +# These tests stress test the encryptionKeyCleanupDeferred feature with aggressive +# cleanup intervals (1 second) while performing many dropDatabase operations. + +config_variables: + - &keyFileGood jstests/percona/tde/ekf + - &cipherMode AES256-GCM + +test_kind: js_test + +selector: + roots: + - jstests/percona/tde/deferred_key_cleanup_drop_database.js + - jstests/percona/tde/deferred_key_cleanup_stress.js + +# Tests start their own mongod with encryption and deferred cleanup enabled. +executor: + config: + shell_options: + global_vars: + TestData: + keyFileGood: *keyFileGood + cipherMode: *cipherMode + nodb: "" diff --git a/jstests/percona/tde/deferred_key_cleanup_drop_database.js b/jstests/percona/tde/deferred_key_cleanup_drop_database.js new file mode 100644 index 0000000000000..81264dbd62138 --- /dev/null +++ b/jstests/percona/tde/deferred_key_cleanup_drop_database.js @@ -0,0 +1,182 @@ +/** + * Tests that deferred encryption key cleanup works correctly with dropDatabase. + * + * This test verifies that when encryptionKeyCleanupDeferred is enabled: + * 1. Databases can be created and dropped without errors + * 2. Encryption keys are eventually cleaned up by the background process + * 3. No data corruption occurs + * + * @tags: [ + * requires_wiredtiger, + * ] + */ +(function () { + "use strict"; + + const keyFile = TestData.keyFileGood || "jstests/percona/tde/ekf"; + const cipherMode = TestData.cipherMode || "AES256-GCM"; + const cleanupIntervalSecs = 1; + const numDatabases = 10; + const numCollections = 3; + const numDocsPerCollection = 100; + + jsTestLog("Starting deferred key cleanup drop database test with cipher mode: " + cipherMode); + + // Start mongod with encryption and deferred key cleanup enabled + const conn = MongoRunner.runMongod({ + enableEncryption: "", + encryptionKeyFile: keyFile, + encryptionCipherMode: cipherMode, + setParameter: { + encryptionKeyCleanupDeferred: true, + encryptionKeyCleanupIntervalSeconds: cleanupIntervalSecs, + }, + }); + assert.neq(null, conn, "mongod failed to start with encryption enabled"); + + const testDbPrefix = "deferred_cleanup_test_"; + + // Helper function to create a database with collections and data + function createDatabaseWithData(dbName) { + const db = conn.getDB(dbName); + for (let c = 0; c < numCollections; c++) { + const collName = "coll_" + c; + const coll = db[collName]; + + // Insert documents + let docs = []; + for (let d = 0; d < numDocsPerCollection; d++) { + docs.push({ + _id: d, + data: "test_data_" + d, + dbName: dbName, + collName: collName, + timestamp: new Date(), + }); + } + assert.commandWorked(coll.insertMany(docs)); + + // Verify insertion + assert.eq( + numDocsPerCollection, + coll.countDocuments({}), + "Document count mismatch in " + dbName + "." + collName, + ); + } + return db; + } + + // Helper function to verify database integrity + function verifyDatabaseIntegrity(dbName) { + const db = conn.getDB(dbName); + for (let c = 0; c < numCollections; c++) { + const collName = "coll_" + c; + const coll = db[collName]; + + // Verify document count + const count = coll.countDocuments({}); + assert.eq(numDocsPerCollection, count, "Document count mismatch in " + dbName + "." + collName); + + // Verify data integrity by checking a few documents + for (let d = 0; d < Math.min(10, numDocsPerCollection); d++) { + const doc = coll.findOne({_id: d}); + assert.neq(null, doc, "Missing document _id=" + d + " in " + dbName + "." + collName); + assert.eq("test_data_" + d, doc.data, "Data mismatch for document _id=" + d); + assert.eq(dbName, doc.dbName, "dbName mismatch for document _id=" + d); + } + } + } + + jsTestLog("Phase 1: Create databases and verify data integrity"); + + // Create multiple databases + let createdDbs = []; + for (let i = 0; i < numDatabases; i++) { + const dbName = testDbPrefix + i; + jsTestLog("Creating database: " + dbName); + createDatabaseWithData(dbName); + createdDbs.push(dbName); + } + + // Verify all databases have correct data + for (const dbName of createdDbs) { + verifyDatabaseIntegrity(dbName); + } + jsTestLog("All " + numDatabases + " databases created and verified"); + + jsTestLog("Phase 2: Drop half of the databases"); + + // Drop half of the databases + let droppedDbs = []; + let remainingDbs = []; + for (let i = 0; i < numDatabases; i++) { + const dbName = testDbPrefix + i; + if (i % 2 === 0) { + jsTestLog("Dropping database: " + dbName); + const db = conn.getDB(dbName); + assert.commandWorked(db.dropDatabase()); + droppedDbs.push(dbName); + } else { + remainingDbs.push(dbName); + } + } + + jsTestLog("Phase 3: Verify remaining databases are intact"); + + // Verify remaining databases are still intact + for (const dbName of remainingDbs) { + verifyDatabaseIntegrity(dbName); + } + jsTestLog("All remaining databases verified after partial drop"); + + jsTestLog("Phase 4: Wait for deferred cleanup to run"); + + // Wait for deferred cleanup to run (at least 2 intervals) + sleep(cleanupIntervalSecs * 3 * 1000); + + jsTestLog("Phase 5: Verify remaining databases are still intact after cleanup"); + + // Verify remaining databases are still intact after cleanup ran + for (const dbName of remainingDbs) { + verifyDatabaseIntegrity(dbName); + } + + jsTestLog("Phase 6: Drop remaining databases"); + + // Drop remaining databases + for (const dbName of remainingDbs) { + jsTestLog("Dropping database: " + dbName); + const db = conn.getDB(dbName); + assert.commandWorked(db.dropDatabase()); + } + + jsTestLog("Phase 7: Wait for final cleanup"); + + // Wait for final cleanup + sleep(cleanupIntervalSecs * 3 * 1000); + + jsTestLog("Phase 8: Verify all test databases are gone"); + + // Verify all test databases are gone + const finalDbs = conn.getDB("admin").adminCommand({listDatabases: 1}).databases; + for (const dbInfo of finalDbs) { + assert(!dbInfo.name.startsWith(testDbPrefix), "Test database should have been dropped: " + dbInfo.name); + } + + jsTestLog("Phase 9: Create new databases to verify system is still functional"); + + // Create new databases to verify system is still functional + for (let i = 0; i < 3; i++) { + const dbName = testDbPrefix + "final_" + i; + createDatabaseWithData(dbName); + verifyDatabaseIntegrity(dbName); + + // Clean up + const db = conn.getDB(dbName); + assert.commandWorked(db.dropDatabase()); + } + + jsTestLog("Deferred key cleanup drop database test completed successfully"); + + MongoRunner.stopMongod(conn); +})(); diff --git a/jstests/percona/tde/deferred_key_cleanup_stress.js b/jstests/percona/tde/deferred_key_cleanup_stress.js new file mode 100644 index 0000000000000..ad548900ecea6 --- /dev/null +++ b/jstests/percona/tde/deferred_key_cleanup_stress.js @@ -0,0 +1,202 @@ +/** + * Stress test for deferred encryption key cleanup with aggressive cleanup intervals. + * + * This test runs 16 parallel threads, each performing 200 iterations of: + * - Creating a unique database + * - Creating collections and inserting documents + * - Verifying data integrity + * - Dropping the database + * + * The deferred key cleanup runs every 1 second, exercising the race condition + * handling between dropDatabase and encryption key cleanup. + * + * @tags: [ + * requires_wiredtiger, + * ] + */ +import {funWithArgs} from "jstests/libs/parallel_shell_helpers.js"; + +(function () { + "use strict"; + + const keyFile = TestData.keyFileGood || "jstests/percona/tde/ekf"; + const cipherMode = TestData.cipherMode || "AES256-GCM"; + const cleanupIntervalSecs = 1; + const numThreads = 16; + const iterationsPerThread = 200; + const numCollections = 2; + const numDocsPerCollection = 50; + + jsTestLog("Starting deferred key cleanup STRESS test"); + jsTestLog( + "Configuration: " + + numThreads + + " threads, " + + iterationsPerThread + + " iterations each, cipher mode: " + + cipherMode, + ); + + // Start mongod with encryption and aggressive deferred key cleanup + const conn = MongoRunner.runMongod({ + enableEncryption: "", + encryptionKeyFile: keyFile, + encryptionCipherMode: cipherMode, + setParameter: { + encryptionKeyCleanupDeferred: true, + encryptionKeyCleanupIntervalSeconds: cleanupIntervalSecs, + }, + }); + assert.neq(null, conn, "mongod failed to start with encryption enabled"); + + const testDbPrefix = "stress_test_"; + const adminDb = conn.getDB("admin"); + + // Worker function that runs in parallel shell + function workerThread(threadId, iterations, numColls, numDocs, dbPrefix) { + for (let iter = 0; iter < iterations; iter++) { + const dbName = dbPrefix + threadId + "_iter_" + iter; + const testDb = db.getSiblingDB(dbName); + + try { + // Create collections with data + for (let c = 0; c < numColls; c++) { + const collName = "coll_" + c; + const coll = testDb[collName]; + + // Insert documents + let docs = []; + for (let d = 0; d < numDocs; d++) { + docs.push({ + _id: d, + threadId: threadId, + iteration: iter, + data: "stress_test_data_" + d + "_" + Math.random(), + timestamp: new Date(), + }); + } + const insertResult = coll.insertMany(docs); + assert.commandWorked(insertResult); + + // Verify document count + const count = coll.countDocuments({}); + assert.eq( + numDocs, + count, + "Thread " + + threadId + + " iter " + + iter + + ": Document count mismatch in " + + dbName + + "." + + collName + + " - expected " + + numDocs + + ", got " + + count, + ); + + // Spot check a few documents for data integrity + for (let d = 0; d < Math.min(5, numDocs); d++) { + const doc = coll.findOne({_id: d}); + assert.neq(null, doc, "Thread " + threadId + " iter " + iter + ": Missing document _id=" + d); + assert.eq( + threadId, + doc.threadId, + "Thread " + threadId + " iter " + iter + ": threadId mismatch for document _id=" + d, + ); + assert.eq( + iter, + doc.iteration, + "Thread " + threadId + " iter " + iter + ": iteration mismatch for document _id=" + d, + ); + } + } + + // Drop the database + const dropResult = testDb.dropDatabase(); + assert.commandWorked(dropResult); + + // Periodic progress logging + if ((iter + 1) % 50 === 0) { + print("Thread " + threadId + ": Completed " + (iter + 1) + "/" + iterations + " iterations"); + } + } catch (e) { + print("Thread " + threadId + " iter " + iter + " ERROR: " + tojson(e)); + throw e; + } + } + return {threadId: threadId, iterations: iterations, status: "success"}; + } + + jsTestLog("Starting " + numThreads + " parallel worker threads"); + + // Launch parallel shells + let threads = []; + for (let t = 0; t < numThreads; t++) { + const threadId = t; + const thread = startParallelShell( + funWithArgs( + workerThread, + threadId, + iterationsPerThread, + numCollections, + numDocsPerCollection, + testDbPrefix, + ), + conn.port, + ); + threads.push(thread); + } + + jsTestLog("All threads launched, waiting for completion..."); + + // Wait for all threads to complete + let allSucceeded = true; + for (let t = 0; t < threads.length; t++) { + try { + threads[t](); + jsTestLog("Thread " + t + " completed successfully"); + } catch (e) { + jsTestLog("Thread " + t + " FAILED: " + tojson(e)); + allSucceeded = false; + } + } + + assert(allSucceeded, "One or more worker threads failed"); + + jsTestLog("All threads completed. Waiting for final cleanup cycle..."); + + // Wait for a few cleanup cycles to ensure all orphaned keys are processed + sleep(cleanupIntervalSecs * 5 * 1000); + + jsTestLog("Verifying no test databases remain..."); + + // Verify no test databases remain + const finalDbs = adminDb.adminCommand({listDatabases: 1}).databases; + let testDbsRemaining = []; + for (const dbInfo of finalDbs) { + if (dbInfo.name.startsWith(testDbPrefix)) { + testDbsRemaining.push(dbInfo.name); + } + } + assert.eq(0, testDbsRemaining.length, "Test databases should have been dropped: " + tojson(testDbsRemaining)); + + jsTestLog("Final verification: Create and drop a few more databases"); + + // Final verification: system should still be fully functional + for (let i = 0; i < 5; i++) { + const dbName = testDbPrefix + "final_verify_" + i; + const db = conn.getDB(dbName); + + assert.commandWorked(db.test.insertOne({verify: true, index: i})); + assert.eq(1, db.test.countDocuments({})); + assert.commandWorked(db.dropDatabase()); + } + + jsTestLog("Stress test completed successfully!"); + jsTestLog("Total operations: " + numThreads * iterationsPerThread + " create/drop cycles"); + + MongoRunner.stopMongod(conn); +})();