From 3e2e1baae1abcd753ec91efc0254137f42c9caa4 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Tue, 30 Sep 2025 16:57:44 +0800 Subject: [PATCH 01/71] lookahead --- redis.conf | 3 + src/aof.c | 12 + src/blocked.c | 3 +- src/cluster.c | 16 +- src/cluster.h | 1 + src/config.c | 1 + src/iothread.c | 3 +- src/memory_prefetch.c | 17 +- src/module.c | 18 +- src/multi.c | 70 ++-- src/networking.c | 503 +++++++++++++++++++------- src/replication.c | 4 +- src/script_lua.c | 3 +- src/server.c | 92 +++-- src/server.h | 70 +++- tests/unit/cluster/sharded-pubsub.tcl | 2 +- tests/unit/memefficiency.tcl | 5 + 17 files changed, 608 insertions(+), 215 deletions(-) diff --git a/redis.conf b/redis.conf index 5d2b27ffbae..df61aa98d84 100644 --- a/redis.conf +++ b/redis.conf @@ -2139,6 +2139,9 @@ client-output-buffer-limit pubsub 32mb 8mb 60 # # client-query-buffer-limit 1gb +# Defines how many commands in each client pipeline to decode and prefetch +# lookahead 16 + # In some scenarios client connections can hog up memory leading to OOM # errors or data eviction. To avoid this we can cap the accumulated memory # used by all client connections (all pubsub and normal clients). Once we diff --git a/src/aof.c b/src/aof.c index 8a9be94b61a..e0571290a5a 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1640,12 +1640,24 @@ int loadSingleAppendOnlyFile(char *filename) { if (fakeClient->flags & CLIENT_MULTI && fakeClient->cmd->proc != execCommand) { + /* queueMultiCommand requires a pendingCommand, so we create a "fake" one here + * for it to consume */ + pendingCommand *pcmd = zmalloc(sizeof(pendingCommand)); + initPendingCommand(pcmd); + addPengingCommand(&fakeClient->pending_cmds, pcmd); + + pcmd->argc = argc; + pcmd->argv_len = argc; + pcmd->argv = argv; + pcmd->cmd = cmd; + /* Note: we don't have to attempt calling evalGetCommandFlags, * since this is AOF, the checks in processCommand are not made * anyway.*/ queueMultiCommand(fakeClient, cmd->flags); } else { cmd->proc(fakeClient); + fakeClient->all_argv_len_sum = 0; /* Otherwise no one cleans this up and we reach cleanup with it non-zero */ } /* The fake client should not have a reply */ diff --git a/src/blocked.c b/src/blocked.c index 8d15f9de3de..238a0aacc21 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -199,8 +199,7 @@ void unblockClient(client *c, int queue_for_reprocessing) { * call reqresAppendResponse here (for clients blocked on key, * unblockClientOnKey is called, which eventually calls processCommand, * which calls reqresAppendResponse) */ - reqresAppendResponse(c); - resetClient(c); + prepareForNextCommand(c); } /* Clear the flags, and put the client in the unblocked list so that diff --git a/src/cluster.c b/src/cluster.c index 2f861b5c2fa..225d149b92b 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1113,7 +1113,8 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in robj *firstkey = NULL; int multiple_keys = 0; multiState *ms, _ms; - multiCmd mc; + pendingCommand mc; + pendingCommand *mcp = &mc; int i, slot = 0, migrating_slot = 0, importing_slot = 0, missing_keys = 0, existing_keys = 0; int pubsubshard_included = 0; /* Flag to indicate if a pubsub shard cmd is included. */ @@ -1141,8 +1142,11 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in * structure if the client is not in MULTI/EXEC state, this way * we have a single codepath below. */ ms = &_ms; - _ms.commands = &mc; + _ms.commands = &mcp; _ms.count = 1; + + /* Properly initialize the fake pendingCommand */ + initPendingCommand(&mc); mc.argv = argv; mc.argc = argc; mc.cmd = cmd; @@ -1156,9 +1160,11 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in int margc, numkeys, j; keyReference *keyindex; - mcmd = ms->commands[i].cmd; - margc = ms->commands[i].argc; - margv = ms->commands[i].argv; + pendingCommand *pcmd = ms->commands[i]; + + mcmd = pcmd->cmd; + margc = pcmd->argc; + margv = pcmd->argv; /* Only valid for sharded pubsub as regular pubsub can operate on any node and bypasses this layer. */ if (!pubsubshard_included && diff --git a/src/cluster.h b/src/cluster.h index 18b5bb46558..e9f4df7553d 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -22,6 +22,7 @@ #define CLUSTER_SLOT_MASK_BITS 14 /* Number of bits used for slot id. */ #define CLUSTER_SLOTS (1<read_error && c->io_flags & CLIENT_IO_PENDING_COMMAND) { c->flags |= CLIENT_PENDING_COMMAND; + c->flags |= CLIENT_IN_PREFETCH; if (processPendingCommandAndInputBuffer(c) == C_ERR) { /* If the client is no longer valid, it must be freed safely. */ continue; } + c->flags &= ~CLIENT_IN_PREFETCH; } /* We may have pending replies if io thread may not finish writing @@ -729,7 +731,6 @@ void initThreadedIO(void) { exit(1); } - prefetchCommandsBatchInit(); /* Spawn and initialize the I/O threads. */ for (int i = 1; i < server.io_threads_num; i++) { diff --git a/src/memory_prefetch.c b/src/memory_prefetch.c index 8f3f77ef2d6..00d06222d69 100644 --- a/src/memory_prefetch.c +++ b/src/memory_prefetch.c @@ -382,18 +382,21 @@ int addCommandToBatch(client *c) { batch->clients[batch->client_count++] = c; - if (likely(c->iolookedcmd)) { - /* Get command's keys positions */ + pendingCommand *pcmd = c->pending_cmds.head; + while (pcmd != NULL) { + if (pcmd->parsing_incomplete || !pcmd->cmd || pcmd->flags) break; + getKeysResult result = GETKEYS_RESULT_INIT; - int num_keys = getKeysFromCommand(c->iolookedcmd, c->argv, c->argc, &result); - for (int i = 0; i < num_keys && batch->key_count < batch->max_prefetch_size; i++) { - batch->keys[batch->key_count] = c->argv[result.keys[i].pos]; + int numkeys = getKeysFromCommand(pcmd->cmd, pcmd->argv, pcmd->argc, &result); + for (int i = 0; i < numkeys && batch->key_count < batch->max_prefetch_size; i++) { + batch->keys[batch->key_count] = pcmd->argv[result.keys[i].pos]; batch->keys_dicts[batch->key_count] = - kvstoreGetDict(c->db->keys, c->slot > 0 ? c->slot : 0); + kvstoreGetDict(c->db->keys, pcmd->slot > 0 ? pcmd->slot : 0); batch->key_count++; } getKeysFreeResult(&result); - } + pcmd = pcmd->next; + } return C_OK; } diff --git a/src/module.c b/src/module.c index ab8cafb191a..1a6360f1a55 100644 --- a/src/module.c +++ b/src/module.c @@ -674,11 +674,12 @@ void moduleReleaseTempClient(client *c) { listEmpty(c->reply); c->reply_bytes = 0; c->duration = 0; - resetClient(c); + resetClient(c, -1); + serverAssert(c->all_argv_len_sum == 0); c->bufpos = 0; c->flags = CLIENT_MODULE; c->user = NULL; /* Root user */ - c->cmd = c->lastcmd = c->realcmd = c->iolookedcmd = NULL; + c->cmd = c->lastcmd = c->realcmd = NULL; if (c->bstate.async_rm_call_handle) { RedisModuleAsyncRMCallPromise *promise = c->bstate.async_rm_call_handle; promise->c = NULL; /* Remove the client from the promise so it will no longer be possible to abort it. */ @@ -11035,12 +11036,21 @@ void moduleCallCommandFilters(client *c) { } /* If the filter sets a new command, including command or subcommand, - * the command looked up in IO threads will be invalid. */ - c->iolookedcmd = NULL; + * the command looked up will be invalid. */ + c->lookedcmd = NULL; c->argv = filter.argv; c->argv_len = filter.argv_len; c->argc = filter.argc; + + /* Update pending command if it exists. */ + pendingCommand *pcmd = c->current_pending_cmd; + if (pcmd) { + pcmd->argv = filter.argv; + pcmd->argc = filter.argc; + pcmd->argv_len = filter.argv_len; + pcmd->cmd = NULL; + } } /* Return the number of arguments a filtered command has. The number of diff --git a/src/multi.c b/src/multi.c index 8c5ec6f99e2..7bd4f259336 100644 --- a/src/multi.c +++ b/src/multi.c @@ -19,27 +19,19 @@ void initClientMultiState(client *c) { c->mstate.cmd_inv_flags = 0; c->mstate.argv_len_sums = 0; c->mstate.alloc_count = 0; + c->mstate.executing_cmd = -1; } /* Release all the resources associated with MULTI/EXEC state */ void freeClientMultiState(client *c) { - int j; - - for (j = 0; j < c->mstate.count; j++) { - int i; - multiCmd *mc = c->mstate.commands+j; - - for (i = 0; i < mc->argc; i++) - decrRefCount(mc->argv[i]); - zfree(mc->argv); + for (int i = 0; i < c->mstate.count; i++) { + freePendingCommand(c, c->mstate.commands[i]); } zfree(c->mstate.commands); } /* Add a new command into the MULTI commands queue */ void queueMultiCommand(client *c, uint64_t cmd_flags) { - multiCmd *mc; - /* No sense to waste memory if the transaction is already aborted. * this is useful in case client sends these in a pipeline, or doesn't * bother to read previous responses and didn't notice the multi was already @@ -49,29 +41,35 @@ void queueMultiCommand(client *c, uint64_t cmd_flags) { if (c->mstate.count == 0) { /* If a client is using multi/exec, assuming it is used to execute at least * two commands. Hence, creating by default size of 2. */ - c->mstate.commands = zmalloc(sizeof(multiCmd)*2); + c->mstate.commands = zmalloc(sizeof(pendingCommand*)*2); c->mstate.alloc_count = 2; } if (c->mstate.count == c->mstate.alloc_count) { c->mstate.alloc_count = c->mstate.alloc_count < INT_MAX/2 ? c->mstate.alloc_count*2 : INT_MAX; - c->mstate.commands = zrealloc(c->mstate.commands, sizeof(multiCmd)*(c->mstate.alloc_count)); + c->mstate.commands = zrealloc(c->mstate.commands, sizeof(pendingCommand*)*(c->mstate.alloc_count)); } - mc = c->mstate.commands+c->mstate.count; - mc->cmd = c->cmd; - mc->argc = c->argc; - mc->argv = c->argv; - mc->argv_len = c->argv_len; + + /* Move the pending command into the multi-state. + * We leave the empty list node in 'pending_cmds' for freeClientPendingCommands to clean up + * later, but set the value to NULL to indicate it has been moved out and should not be freed. */ + pendingCommand *pcmd = popPendingCommandFromHead(&c->pending_cmds); + c->current_pending_cmd = NULL; + pendingCommand **mc = c->mstate.commands + c->mstate.count; + *mc = pcmd; c->mstate.count++; c->mstate.cmd_flags |= cmd_flags; c->mstate.cmd_inv_flags |= ~cmd_flags; - c->mstate.argv_len_sums += c->argv_len_sum + sizeof(robj*)*c->argc; + c->mstate.argv_len_sums += (*mc)->argv_len_sum; + c->all_argv_len_sum -= (*mc)->argv_len_sum; + + (*mc)->argv_len_sum = 0; /* This is no longer tracked through all_argv_len_sum, so we don't want */ + /* to subtract it from there later. */ - /* Reset the client's args since we copied them into the mstate and shouldn't - * reference them from c anymore. */ + /* Reset the client's args since we moved them into the mstate and shouldn't + * reference them from 'c' anymore. */ c->argv = NULL; c->argc = 0; - c->argv_len_sum = 0; c->argv_len = 0; } @@ -129,6 +127,7 @@ void execCommand(client *c) { int j; robj **orig_argv; int orig_argc, orig_argv_len; + size_t orig_all_argv_len_sum; struct redisCommand *orig_cmd; if (!(c->flags & CLIENT_MULTI)) { @@ -172,12 +171,19 @@ void execCommand(client *c) { orig_argv_len = c->argv_len; orig_argc = c->argc; orig_cmd = c->cmd; + + /* Multi-state commands aren't tracked through all_argv_len_sum, so we don't want anything done while executing them to affect that field. + * Otherwise, we get inconsistencies and all_argv_len_sum doesn't go back to exactly 0 when the client is finished */ + orig_all_argv_len_sum = c->all_argv_len_sum; + + c->all_argv_len_sum = c->mstate.argv_len_sums; + addReplyArrayLen(c,c->mstate.count); for (j = 0; j < c->mstate.count; j++) { - c->argc = c->mstate.commands[j].argc; - c->argv = c->mstate.commands[j].argv; - c->argv_len = c->mstate.commands[j].argv_len; - c->cmd = c->realcmd = c->mstate.commands[j].cmd; + c->argc = c->mstate.commands[j]->argc; + c->argv = c->mstate.commands[j]->argv; + c->argv_len = c->mstate.commands[j]->argv_len; + c->cmd = c->realcmd = c->mstate.commands[j]->cmd; /* ACL permissions are also checked at the time of execution in case * they were changed after the commands were queued. */ @@ -207,6 +213,7 @@ void execCommand(client *c) { "This command is no longer allowed for the " "following reason: %s", reason); } else { + c->mstate.executing_cmd = j; if (c->id == CLIENT_ID_AOF) call(c,CMD_CALL_NONE); else @@ -216,10 +223,10 @@ void execCommand(client *c) { } /* Commands may alter argc/argv, restore mstate. */ - c->mstate.commands[j].argc = c->argc; - c->mstate.commands[j].argv = c->argv; - c->mstate.commands[j].argv_len = c->argv_len; - c->mstate.commands[j].cmd = c->cmd; + c->mstate.commands[j]->argc = c->argc; + c->mstate.commands[j]->argv = c->argv; + c->mstate.commands[j]->argv_len = c->argv_len; + c->mstate.commands[j]->cmd = c->cmd; } // restore old DENY_BLOCKING value @@ -230,6 +237,7 @@ void execCommand(client *c) { c->argv_len = orig_argv_len; c->argc = orig_argc; c->cmd = c->realcmd = orig_cmd; + c->all_argv_len_sum = orig_all_argv_len_sum; discardTransaction(c); server.in_exec = 0; @@ -485,6 +493,6 @@ size_t multiStateMemOverhead(client *c) { /* Add watched keys overhead, Note: this doesn't take into account the watched keys themselves, because they aren't managed per-client. */ mem += listLength(c->watched_keys) * (sizeof(listNode) + sizeof(watchedKey)); /* Reserved memory for queued multi commands. */ - mem += c->mstate.alloc_count * sizeof(multiCmd); + mem += c->mstate.alloc_count * sizeof(pendingCommand); return mem; } diff --git a/src/networking.c b/src/networking.c index 9f4fec0d71b..a4a6752e4f5 100644 --- a/src/networking.c +++ b/src/networking.c @@ -19,6 +19,7 @@ #include "script.h" #include "fpconv_dtoa.h" #include "fmtargs.h" +#include "memory_prefetch.h" #include #include #include @@ -32,11 +33,14 @@ static inline int _clientHasPendingRepliesSlave(client *c); static inline int _clientHasPendingRepliesNonSlave(client *c); static inline int _writeToClientNonSlave(client *c, ssize_t *nwritten); static inline int _writeToClientSlave(client *c, ssize_t *nwritten); +static int consumePendingCommand(client *c); int ProcessingEventsWhileBlocked = 0; /* See processEventsWhileBlocked(). */ __thread sds thread_reusable_qb = NULL; __thread int thread_reusable_qb_used = 0; /* Avoid multiple clients using reusable query * buffer due to nested command execution. */ +/* COMMAND_QUEUE_MIN_CAPACITY no longer needed with linked list implementation */ + /* Return the size consumed from the allocator, for the specified SDS string, * including internal fragmentation. This function is used in order to compute * the client output buffer size. */ @@ -162,12 +166,15 @@ client *createClient(connection *conn) { c->argc = 0; c->argv = NULL; c->argv_len = 0; - c->argv_len_sum = 0; + c->all_argv_len_sum = 0; + c->pending_cmds.head = c->pending_cmds.tail = NULL; + c->pending_cmds.len = c->pending_cmds.ready_len = 0; + c->current_pending_cmd = NULL; c->original_argc = 0; c->original_argv = NULL; c->deferred_objects = NULL; c->deferred_objects_num = 0; - c->cmd = c->lastcmd = c->realcmd = c->iolookedcmd = NULL; + c->cmd = c->lastcmd = c->realcmd = c->lookedcmd = NULL; c->cur_script = NULL; c->multibulklen = 0; c->bulklen = -1; @@ -183,6 +190,7 @@ client *createClient(connection *conn) { c->replstate = REPL_STATE_NONE; c->repl_start_cmd_stream_on_ack = 0; c->reploff = 0; + c->reploff_next = 0; c->read_reploff = 0; c->repl_applied = 0; c->repl_ack_off = 0; @@ -1528,8 +1536,7 @@ static inline void freeClientArgvInternal(client *c, int free_argv) { } c->argc = 0; c->cmd = NULL; - c->iolookedcmd = NULL; - c->argv_len_sum = 0; + c->lookedcmd = NULL; if (free_argv) { c->argv_len = 0; zfree(c->argv); @@ -1541,6 +1548,18 @@ void freeClientArgv(client *c) { freeClientArgvInternal(c, 1); } +void freeClientPendingCommands(client *c, int num_pcmds_to_free) { + /* (-1) means free all pending commands */ + if (num_pcmds_to_free == -1) + num_pcmds_to_free = c->pending_cmds.len; + + while (num_pcmds_to_free--) { + pendingCommand *pcmd = popPendingCommandFromHead(&c->pending_cmds); + serverAssert(pcmd); + freePendingCommand(c, pcmd); + } +} + /* Close all the slaves connections. This is useful in chained replication * when we resync with our own master and want to force all our slaves to * resync with us as well. */ @@ -1640,6 +1659,12 @@ void unlinkClient(client *c) { c->flags &= ~CLIENT_UNBLOCKED; } + freeClientPendingCommands(c, -1); + c->argv_len = 0; + c->argv = NULL; + c->argc = 0; + c->cmd = NULL; + /* Clear the tracking status. */ if (c->flags & CLIENT_TRACKING) disableTracking(c); } @@ -1821,7 +1846,6 @@ void freeClient(client *c) { listRelease(c->reply); zfree(c->buf); freeReplicaReferencedReplBuffer(c); - freeClientArgv(c); freeClientOriginalArgv(c); freeClientDeferredObjects(c, 1); if (c->deferred_reply_errors) @@ -1838,9 +1862,15 @@ void freeClient(client *c) { /* Unlink the client: this will close the socket, remove the I/O * handlers, and remove references of the client from different - * places where active clients may be referenced. */ + * places where active clients may be referenced. + * This will also clean all remaining pending commands in the client, + * as they are no longer valid. + */ unlinkClient(c); + freeClientMultiState(c); + serverAssert(c->pending_cmds.len == 0); + /* Master/slave cleanup Case 1: * we lost the connection with a slave. */ if (c->flags & CLIENT_SLAVE) { @@ -1895,7 +1925,7 @@ void freeClient(client *c) { if (c->name) decrRefCount(c->name); if (c->lib_name) decrRefCount(c->lib_name); if (c->lib_ver) decrRefCount(c->lib_ver); - freeClientMultiState(c); + serverAssert(c->all_argv_len_sum == 0); sdsfree(c->peerid); sdsfree(c->sockname); sdsfree(c->slave_addr); @@ -2282,14 +2312,35 @@ int handleClientsWithPendingWrites(void) { return processed; } -static inline void resetClientInternal(client *c, int free_argv) { - redisCommandProc *prevcmd = c->cmd ? c->cmd->proc : NULL; - - freeClientArgvInternal(c, free_argv); - c->cur_script = NULL; +/* Prepare the client for the parsing of the next command. */ +void resetClientQbufState(client *c) { c->reqtype = 0; c->multibulklen = 0; c->bulklen = -1; +} + +static inline void resetClientInternal(client *c, int num_pcmds_to_free) { + redisCommandProc *prevcmd = c->cmd ? c->cmd->proc : NULL; + + /* We may get here with no pending commands but with an argv that needs freeing. + * An example is in the case of modules (RM_Call) */ + if (c->current_pending_cmd) { + freeClientPendingCommands(c, num_pcmds_to_free); + if (c->pending_cmds.len == 0) + serverAssert(c->all_argv_len_sum == 0); + c->current_pending_cmd = NULL; + } else if (c->argv) { + freeClientArgvInternal(c, 1 /* free_argv */); + /* If we're dealing with a client that doesn't create pendingCommand structs (e.g.: a Lua client), + * clear the all_argv_len_sum counter so we don't get to freeing the client with it non-zero. */ + c->all_argv_len_sum = 0; + } + + c->argc = 0; + c->cmd = NULL; + c->argv_len = 0; + c->argv = NULL; + c->cur_script = NULL; c->slot = -1; c->cluster_compatibility_check_slot = -2; c->flags &= ~CLIENT_EXECUTING_COMMAND; @@ -2329,8 +2380,8 @@ static inline void resetClientInternal(client *c, int free_argv) { } /* resetClient prepare the client to process the next command */ -void resetClient(client *c) { - resetClientInternal(c, 1); +void resetClient(client *c, int num_pcmds_to_free) { + resetClientInternal(c, num_pcmds_to_free); } /* This function is used when we want to re-enter the event loop but there @@ -2373,14 +2424,14 @@ void unprotectClient(client *c) { * have a well formed command. The function also returns C_ERR when there is * a protocol error: in such a case the client structure is setup to reply * with the error and close the connection. */ -int processInlineBuffer(client *c) { +int parseInlineBuffer(client *c, pendingCommand *pcmd) { char *newline; int argc, j, linefeed_chars = 1; sds *argv, aux; size_t querylen; /* Search for end of line */ - newline = strchr(c->querybuf+c->qb_pos,'\n'); + newline = memchr(c->querybuf+c->qb_pos,'\n',sdslen(c->querybuf) - c->qb_pos); /* Nothing to do without a \r\n */ if (newline == NULL) { @@ -2428,20 +2479,18 @@ int processInlineBuffer(client *c) { /* Setup argv array on client structure */ if (argc) { - /* Create new argv if space is insufficient. */ - if (unlikely(argc > c->argv_len)) { - zfree(c->argv); - c->argv = zmalloc(sizeof(robj*)*argc); - c->argv_len = argc; - } - c->argv_len_sum = 0; + zfree(pcmd->argv); + pcmd->argv = zmalloc(sizeof(robj*)*argc); + pcmd->argv_len = argc; + pcmd->argv_len_sum = 0; } /* Create redis objects for all arguments. */ - for (c->argc = 0, j = 0; j < argc; j++) { - c->argv[c->argc] = createObject(OBJ_STRING,argv[j]); - c->argc++; - c->argv_len_sum += sdslen(argv[j]); + for (pcmd->argc = 0, j = 0; j < argc; j++) { + pcmd->argv[pcmd->argc] = createObject(OBJ_STRING,argv[j]); + pcmd->argc++; + pcmd->argv_len_sum += sdslen(argv[j]); + c->all_argv_len_sum += sdslen(argv[j]); } zfree(argv); @@ -2458,7 +2507,7 @@ int processInlineBuffer(client *c) { * Command) SET key value * Inline) SET key value\r\n */ - c->net_input_bytes_curr_cmd = (c->argv_len_sum + (c->argc - 1) + 2); + pcmd->input_bytes = (pcmd->argv_len_sum + (pcmd->argc - 1) + 2); return C_OK; } @@ -2496,32 +2545,21 @@ static void setProtocolError(const char *errstr, client *c) { c->flags |= (CLIENT_CLOSE_AFTER_REPLY|CLIENT_PROTOCOL_ERROR); } -/* Process the query buffer for client 'c', setting up the client argument - * vector for command execution. Returns C_OK if after running the function - * the client has a well-formed ready to be processed command, otherwise - * C_ERR if there is still to read more buffer to get the full command. - * The function also returns C_ERR when there is a protocol error: in such a - * case the client structure is setup to reply with the error and close - * the connection. - * - * This function is called if processInputBuffer() detects that the next - * command is in RESP format, so the first byte in the command is found - * to be '*'. Otherwise for inline commands processInlineBuffer() is called. */ -int processMultibulkBuffer(client *c) { +static int parseMultibulk(client *c, pendingCommand *pcmd) { char *newline = NULL; int ok; long long ll; size_t querybuf_len = sdslen(c->querybuf); /* Cache sdslen */ if (c->multibulklen == 0) { - /* The client should have been reset */ - serverAssertWithInfo(c,NULL,c->argc == 0); + /* TODO: The client should have been reset */ + serverAssertWithInfo(c,NULL,pcmd->argc == 0); /* Multi bulk length cannot be read without a \r\n */ - newline = strchr(c->querybuf+c->qb_pos,'\r'); + newline = memchr(c->querybuf+c->qb_pos,'\r',sdslen(c->querybuf) - c->qb_pos); if (newline == NULL) { if (querybuf_len-c->qb_pos > PROTO_INLINE_MAX_SIZE) { - c->read_error = CLIENT_READ_TOO_BIG_MBULK_COUNT_STRING; + pcmd->flags = CLIENT_READ_TOO_BIG_MBULK_COUNT_STRING; } return C_ERR; } @@ -2536,10 +2574,10 @@ int processMultibulkBuffer(client *c) { size_t multibulklen_slen = newline - (c->querybuf + 1 + c->qb_pos); ok = string2ll(c->querybuf+1+c->qb_pos,newline-(c->querybuf+1+c->qb_pos),&ll); if (!ok || ll > INT_MAX) { - c->read_error = CLIENT_READ_INVALID_MULTIBUCK_LENGTH; + pcmd->flags = CLIENT_READ_INVALID_MULTIBUCK_LENGTH; return C_ERR; } else if (ll > 10 && authRequired(c)) { - c->read_error = CLIENT_READ_UNAUTH_MBUCK_COUNT; + pcmd->flags = CLIENT_READ_UNAUTH_MBUCK_COUNT; return C_ERR; } @@ -2548,19 +2586,12 @@ int processMultibulkBuffer(client *c) { if (ll <= 0) return C_OK; c->multibulklen = ll; + c->bulklen = -1; - /* Setup argv array on client structure. - * Create new argv in the following cases: - * 1) When the requested size is greater than the current size. - * 2) When the requested size is less than the current size, because - * we always allocate argv gradually with a maximum size of 1024, - * Therefore, if argv_len exceeds this limit, we always reallocate. */ - if (unlikely(c->multibulklen > c->argv_len || c->argv_len > 1024)) { - zfree(c->argv); - c->argv_len = min(c->multibulklen, 1024); - c->argv = zmalloc(sizeof(robj*)*c->argv_len); - } - c->argv_len_sum = 0; + zfree(pcmd->argv); + pcmd->argv_len = min(c->multibulklen, 1024); + pcmd->argv = zmalloc(sizeof(robj*)*(pcmd->argv_len)); + pcmd->argv_len_sum = 0; /* Per-slot network bytes-in calculation. * @@ -2593,17 +2624,17 @@ int processMultibulkBuffer(client *c) { * * The 1st component is calculated within the below line. * */ - c->net_input_bytes_curr_cmd += (multibulklen_slen + 3); + pcmd->input_bytes += (multibulklen_slen + 3); } serverAssertWithInfo(c,NULL,c->multibulklen > 0); while(c->multibulklen) { /* Read bulk length if unknown */ if (c->bulklen == -1) { - newline = strchr(c->querybuf+c->qb_pos,'\r'); + newline = memchr(c->querybuf+c->qb_pos,'\r',sdslen(c->querybuf) - c->qb_pos); if (newline == NULL) { if (querybuf_len-c->qb_pos > PROTO_INLINE_MAX_SIZE) { - c->read_error = CLIENT_READ_TOO_BIG_BUCK_COUNT_STRING; + pcmd->flags = CLIENT_READ_TOO_BIG_BUCK_COUNT_STRING; return C_ERR; } break; @@ -2614,7 +2645,7 @@ int processMultibulkBuffer(client *c) { break; if (c->querybuf[c->qb_pos] != '$') { - c->read_error = CLIENT_READ_EXPECTED_DOLLAR; + pcmd->flags = CLIENT_READ_EXPECTED_DOLLAR; return C_ERR; } @@ -2622,10 +2653,10 @@ int processMultibulkBuffer(client *c) { ok = string2ll(c->querybuf+c->qb_pos+1,newline-(c->querybuf+c->qb_pos+1),&ll); if (!ok || ll < 0 || (!(c->flags & CLIENT_MASTER) && ll > server.proto_max_bulk_len)) { - c->read_error = CLIENT_READ_INVALID_BUCK_LENGTH; + pcmd->flags = CLIENT_READ_INVALID_BUCK_LENGTH; return C_ERR; } else if (ll > 16384 && authRequired(c)) { - c->read_error = CLIENT_READ_UNAUTH_BUCK_LENGTH; + pcmd->flags = CLIENT_READ_UNAUTH_BUCK_LENGTH; return C_ERR; } @@ -2659,7 +2690,9 @@ int processMultibulkBuffer(client *c) { } c->bulklen = ll; /* Per-slot network bytes-in calculation, 2nd component. */ - c->net_input_bytes_curr_cmd += (bulklen_slen + 3); + pcmd->input_bytes += (bulklen_slen + 3); + } else { + serverAssert(pcmd->parsing_incomplete); } /* Read bulk argument */ @@ -2667,10 +2700,9 @@ int processMultibulkBuffer(client *c) { break; } else { /* Check if we have space in argv, grow if needed */ - if (c->argc >= c->argv_len) { - serverAssert(c->argv_len); /* Ensure argv is not freed while the client is in the mid of parsing command. */ - c->argv_len = min(c->argv_len < INT_MAX/2 ? c->argv_len*2 : INT_MAX, c->argc+c->multibulklen); - c->argv = zrealloc(c->argv, sizeof(robj*)*c->argv_len); + if (pcmd->argc >= pcmd->argv_len) { + pcmd->argv_len = min(pcmd->argv_len < INT_MAX/2 ? (pcmd->argv_len)*2 : INT_MAX, pcmd->argc+c->multibulklen); + pcmd->argv = zrealloc(pcmd->argv, sizeof(robj*)*(pcmd->argv_len)); } /* Optimization: if a non-master client's buffer contains JUST our bulk element @@ -2681,8 +2713,9 @@ int processMultibulkBuffer(client *c) { c->bulklen >= PROTO_MBULK_BIG_ARG && querybuf_len == (size_t)(c->bulklen+2)) { - c->argv[c->argc++] = createObject(OBJ_STRING,c->querybuf); - c->argv_len_sum += c->bulklen; + (pcmd->argv)[(pcmd->argc)++] = createObject(OBJ_STRING,c->querybuf); + pcmd->argv_len_sum += c->bulklen; + c->all_argv_len_sum += c->bulklen; sdsIncrLen(c->querybuf,-2); /* remove CRLF */ /* Assume that if we saw a fat argument we'll see another one likely... * But only if that fat argument is not too big compared to the memory limit. */ @@ -2694,9 +2727,10 @@ int processMultibulkBuffer(client *c) { sdsclear(c->querybuf); querybuf_len = sdslen(c->querybuf); /* Update cached length */ } else { - c->argv[c->argc++] = + (pcmd->argv)[(pcmd->argc)++] = createStringObject(c->querybuf+c->qb_pos,c->bulklen); - c->argv_len_sum += c->bulklen; + pcmd->argv_len_sum += c->bulklen; + c->all_argv_len_sum += c->bulklen; c->qb_pos += c->bulklen+2; } c->bulklen = -1; @@ -2707,12 +2741,25 @@ int processMultibulkBuffer(client *c) { /* We're done when c->multibulk == 0 */ if (c->multibulklen == 0) { /* Per-slot network bytes-in calculation, 3rd and 4th components. */ - c->net_input_bytes_curr_cmd += (c->argv_len_sum + (c->argc * 2)); + pcmd->input_bytes += (pcmd->argv_len_sum + (pcmd->argc * 2)); + pcmd->parsing_incomplete = 0; return C_OK; } /* Still not ready to process the command */ - return C_ERR; + pcmd->parsing_incomplete = 1; + return C_OK; +} + +/* Prepare the client for executing the next command: + * + * 1. Append the response, if necessary. + * 2. Reset the client. + * 3. Update the all_argv_len_sum counter and advance the pending_cmd cyclic buffer. + */ +void prepareForNextCommand(client *c) { + reqresAppendResponse(c); + resetClientInternal(c, 1); } /* Perform necessary tasks after a command was executed: @@ -2730,14 +2777,14 @@ void commandProcessed(client *c) { * since we have not applied the command. */ if (c->flags & CLIENT_BLOCKED) return; - reqresAppendResponse(c); clusterSlotStatsAddNetworkBytesInForUserClient(c); - resetClientInternal(c, 0); + prepareForNextCommand(c); long long prev_offset = c->reploff; if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { /* Update the applied replication offset of our master. */ - c->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; + serverAssert(c->reploff_next > 0); + c->reploff = c->reploff_next; } /* If the client is a master we need to compute the difference @@ -2810,7 +2857,7 @@ int processPendingCommandAndInputBuffer(client *c) { * Note: when a master client steps into this function, * it can always satisfy this condition, because its querybuf * contains data not applied. */ - if (c->querybuf && sdslen(c->querybuf) > 0) { + if ((c->querybuf && sdslen(c->querybuf) > 0) || c->pending_cmds.ready_len > 0) { return processInputBuffer(c); } return C_OK; @@ -2878,12 +2925,86 @@ void handleClientReadError(client *c) { sdsfree(bytes); break; } + case CLIENT_READ_COMMAND_NOT_FOUND: + case CLIENT_READ_BAD_ARITY: + /* These are command validation errors, not protocol errors. + * They are handled in processCommand() via commandCheckExistence() + * and commandCheckArity(), which generate appropriate error responses. + * No action needed here. */ + break; default: - serverPanic("Unknown client read error"); + serverPanic("Unknown client read error: %d", c->read_error); + break; + } +} + +void parseInputBuffer(client *c) { + /* We limit the lookahead for unauthenticated connections to 1. + * This is both to reduce memory overhead, and to prevent errors: AUTH can + * affect the handling of succeeding commands. Parsing of "large" + * unauthenticated multibulk commands is rejected, which would cause those + * commands to incorrectly return an error to the client. */ + const int lookahead = authRequired(c) ? 1 : server.lookahead; + + /* Parse up to lookahead commands */ + while (c->pending_cmds.ready_len < lookahead && c->querybuf && c->qb_pos < sdslen(c->querybuf)) { + /* Determine request type when unknown. */ + if (!c->reqtype) { + if (c->querybuf[c->qb_pos] == '*') { + c->reqtype = PROTO_REQ_MULTIBULK; + } else { + c->reqtype = PROTO_REQ_INLINE; + } + } + + pendingCommand *pcmd = NULL; + if (c->reqtype == PROTO_REQ_INLINE) { + pcmd = zmalloc(sizeof(pendingCommand)); + initPendingCommand(pcmd); + if (parseInlineBuffer(c, pcmd) == C_ERR && !pcmd->flags) { + /* If it fails but there are no errors, it means that it might just be + * that the desired content cannot be parsed. At this point, we exit and wait for the next time. */ + freePendingCommand(c, pcmd); + return; + } + } else if (c->reqtype == PROTO_REQ_MULTIBULK) { + int incomplete = c->pending_cmds.tail && c->pending_cmds.tail->parsing_incomplete; + if (unlikely(incomplete)) { + pcmd = popPendingCommandFromHead(&c->pending_cmds); + } else { + pcmd = zmalloc(sizeof(pendingCommand)); + initPendingCommand(pcmd); + } + + if (parseMultibulk(c, pcmd) == C_ERR && !pcmd->flags) { + /* If it fails but there are no errors, it means that it might just be + * that the desired content cannot be parsed. At this point, we exit and wait for the next time. */ + freePendingCommand(c, pcmd); + return; + } + } else { + serverPanic("Unknown request type"); + } + + addPengingCommand(&c->pending_cmds, pcmd); + if (unlikely(pcmd->flags || pcmd->parsing_incomplete)) break; + + if (!pcmd->parsing_incomplete) { + pcmd->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; + preprocessCommand(c, pcmd); + resetClientQbufState(c); + } } } +/* Helper function to check if a read error is fatal (should stop processing) */ +static inline int isClientReadErrorFatal(int read_error) { + return read_error != 0 && + read_error != CLIENT_READ_COMMAND_NOT_FOUND && + read_error != CLIENT_READ_BAD_ARITY; +} + /* This function is called every time, in the client structure 'c', there is * more query buffer to process, because we read more data from the socket * or because a client was blocked and later reactivated, so there could be @@ -2891,7 +3012,8 @@ void handleClientReadError(client *c) { * return C_ERR in case the client was freed during the processing */ int processInputBuffer(client *c) { /* Keep processing while there is something in the input buffer */ - while(c->qb_pos < sdslen(c->querybuf)) { + while ((c->querybuf && c->qb_pos < sdslen(c->querybuf)) || + c->pending_cmds.ready_len > 0) { /* Immediately abort if the client is in the middle of something. */ if (c->flags & CLIENT_BLOCKED) break; @@ -2912,50 +3034,40 @@ int processInputBuffer(client *c) { * The same applies for clients we want to terminate ASAP. */ if (c->flags & (CLIENT_CLOSE_AFTER_REPLY|CLIENT_CLOSE_ASAP)) break; - /* Determine request type when unknown. */ - if (!c->reqtype) { - if (c->querybuf[c->qb_pos] == '*') { - c->reqtype = PROTO_REQ_MULTIBULK; - } else { - c->reqtype = PROTO_REQ_INLINE; + /* If commands are queued up, pop from the queue first */ + if (!consumePendingCommand(c)) { + parseInputBuffer(c); + if (consumePendingCommand(c) == 0) break; + + if (c->running_tid == IOTHREAD_MAIN_THREAD_ID && !(c->flags & CLIENT_IN_PREFETCH)) { + /* Prefetch the commands. */ + resetCommandsBatch(); + addCommandToBatch(c); + prefetchCommands(); } } - if (c->reqtype == PROTO_REQ_INLINE) { - if (processInlineBuffer(c) != C_OK) { - if (c->running_tid != IOTHREAD_MAIN_THREAD_ID && c->read_error) - enqueuePendingClientsToMainThread(c, 0); - break; - } - } else if (c->reqtype == PROTO_REQ_MULTIBULK) { - if (processMultibulkBuffer(c) != C_OK) { - if (c->running_tid != IOTHREAD_MAIN_THREAD_ID && c->read_error) - enqueuePendingClientsToMainThread(c, 0); - break; - } - } else { - serverPanic("Unknown request type"); + if (c->read_error && c->read_error != CLIENT_READ_COMMAND_NOT_FOUND && + c->read_error != CLIENT_READ_BAD_ARITY) { + break; + } + + if (c->running_tid != IOTHREAD_MAIN_THREAD_ID && c->read_error) { + enqueuePendingClientsToMainThread(c, 0); + break; } /* Multibulk processing could see a <= 0 length. */ - if (c->argc == 0) { - freeClientArgvInternal(c, 0); - c->reqtype = 0; - c->multibulklen = 0; - c->bulklen = -1; + if (!c->argc) { + /* A naked newline can be sent from masters as a keep-alive, or from slaves to refresh + * the last ACK time. In that case there's no command to actually execute. */ + prepareForNextCommand(c); } else { /* If we are in the context of an I/O thread, we can't really * execute the command here. All we can do is to flag the client * as one that needs to process the command. */ if (c->running_tid != IOTHREAD_MAIN_THREAD_ID) { c->io_flags |= CLIENT_IO_PENDING_COMMAND; - c->iolookedcmd = lookupCommand(c->argv, c->argc); - if (c->iolookedcmd && !commandCheckArity(c->iolookedcmd, c->argc, NULL)) { - /* The command was found, but the arity is invalid, reset it and let main - * thread handle. To avoid memory prefetching on an invalid command. */ - c->iolookedcmd = NULL; - } - c->slot = getSlotFromCommand(c->iolookedcmd, c->argv, c->argc); enqueuePendingClientsToMainThread(c, 0); break; } @@ -2985,6 +3097,7 @@ int processInputBuffer(client *c) { * so the repl_applied is not equal to qb_pos. */ if (c->repl_applied) { sdsrange(c->querybuf,c->repl_applied,-1); + serverAssert(c->qb_pos >= (size_t)c->repl_applied); c->qb_pos -= c->repl_applied; c->repl_applied = 0; } @@ -3272,7 +3385,7 @@ sds catClientInfoString(sds s, client *client) { " watch=%i", (int) listLength(client->watched_keys), " qbuf=%U", client->querybuf ? (unsigned long long) sdslen(client->querybuf) : 0, " qbuf-free=%U", client->querybuf ? (unsigned long long) sdsavail(client->querybuf) : 0, - " argv-mem=%U", (unsigned long long) client->argv_len_sum, + " argv-mem=%U", (unsigned long long) client->all_argv_len_sum, " multi-mem=%U", (unsigned long long) client->mstate.argv_len_sums, " rbs=%U", (unsigned long long) client->buf_usable_size, " rbp=%U", (unsigned long long) client->buf_peak, @@ -4181,14 +4294,51 @@ void rewriteClientCommandVector(client *c, int argc, ...) { void replaceClientCommandVector(client *c, int argc, robj **argv) { int j; retainOriginalCommandVector(c); + + /* We don't need to just fix the client argv, we also need to fix the pending command (same argv), + * But sometimes we reach here not from a real client, but from a Lua 'scriptRunCtx'. This flow bypasses the + * pending-command system entirely and uses c->argv directly. In this case there's no pending commands + * to update, so we skip that code. */ + pendingCommand *pcmd = NULL; + int is_mstate = 0; + if (c->mstate.executing_cmd < 0) { + is_mstate = 0; + if (c->pending_cmds.ready_len > 0) { + pcmd = c->pending_cmds.head; + serverAssert(!pcmd->parsing_incomplete); + } + } else { + is_mstate = 1; + serverAssert(c->mstate.executing_cmd < c->mstate.count); + pcmd = c->mstate.commands[c->mstate.executing_cmd]; + } + + if (pcmd) { + serverAssert(pcmd->argv == c->argv); + pcmd->argv = argv; + pcmd->argc = argc; + } freeClientArgv(c); c->argv = argv; c->argc = c->argv_len = argc; - c->argv_len_sum = 0; - for (j = 0; j < c->argc; j++) - if (c->argv[j]) - c->argv_len_sum += getStringObjectLen(c->argv[j]); + + if (!is_mstate) { /* multi-state does not track all_argv_len_sum, see code in queueMultiCommand */ + size_t new_argv_len_sum = 0; + for (j = 0; j < c->argc; j++) + if (c->argv[j]) + new_argv_len_sum += getStringObjectLen(c->argv[j]); + + if (!pcmd) { + c->all_argv_len_sum = new_argv_len_sum; + } else { + c->all_argv_len_sum -= pcmd->argv_len_sum; + pcmd->argv_len_sum = new_argv_len_sum; + c->all_argv_len_sum += pcmd->argv_len_sum; + } + } c->cmd = lookupCommandOrOriginal(c->argv,c->argc); + if (pcmd) + pcmd->cmd = c->cmd; serverAssertWithInfo(c,NULL,c->cmd != NULL); } @@ -4209,6 +4359,13 @@ void rewriteClientCommandArgument(client *c, int i, robj *newval) { robj *oldval; retainOriginalCommandVector(c); + /* We don't need to just fix the client argv, we also need to fix the pending command (same argv), + * But sometimes we reach here not from a real client, but from a Lua 'scriptRunCtx'. This flow bypasses the + * pending-command system entirely and uses c->argv directly. In this case there's no pending commands + * to update, so we skip that code. */ + pendingCommand *pcmd = c->pending_cmds.head ? c->pending_cmds.head: NULL; + int update_pcmd = pcmd && pcmd->argv == c->argv; + /* We need to handle both extending beyond argc (just update it and * initialize the new element) or beyond argv_len (realloc is needed). */ @@ -4221,12 +4378,12 @@ void rewriteClientCommandArgument(client *c, int i, robj *newval) { c->argv[i] = NULL; } oldval = c->argv[i]; - if (oldval) c->argv_len_sum -= getStringObjectLen(oldval); + if (oldval) c->all_argv_len_sum -= getStringObjectLen(oldval); if (newval) { c->argv[i] = newval; incrRefCount(newval); - c->argv_len_sum += getStringObjectLen(newval); + c->all_argv_len_sum += getStringObjectLen(newval); } else { /* move the remaining arguments one step left */ for (int j = i+1; j < c->argc; j++) { @@ -4236,10 +4393,20 @@ void rewriteClientCommandArgument(client *c, int i, robj *newval) { } if (oldval) decrRefCount(oldval); + if (update_pcmd) { + pcmd->argv = c->argv; + pcmd->argc = c->argc; + pcmd->argv_len = c->argv_len; + if (oldval) pcmd->argv_len_sum -= getStringObjectLen(oldval); + if (newval) pcmd->argv_len_sum += getStringObjectLen(newval); + } + /* If this is the command name make sure to fix c->cmd. */ if (i == 0) { c->cmd = lookupCommandOrOriginal(c->argv,c->argc); serverAssertWithInfo(c,NULL,c->cmd != NULL); + if (update_pcmd) + pcmd->cmd = c->cmd; } } @@ -4281,7 +4448,7 @@ size_t getClientMemoryUsage(client *c, size_t *output_buffer_mem_usage) { /* For efficiency (less work keeping track of the argv memory), it doesn't include the used memory * i.e. unused sds space and internal fragmentation, just the string length. but this is enough to * spot problematic clients. */ - mem += c->argv_len_sum + sizeof(robj*)*c->argc; + mem += c->all_argv_len_sum + sizeof(robj*)*c->argc; mem += multiStateMemOverhead(c); /* Add memory overhead of pubsub channels and patterns. Note: this is just the overhead of the robj pointers @@ -4715,3 +4882,97 @@ void evictClients(void) { } } } + +void initPendingCommand(pendingCommand *pcmd) { + memset(pcmd, 0, sizeof(pendingCommand)); + pcmd->slot = INVALID_CLUSTER_SLOT; +} + +void freePendingCommand(client *c, pendingCommand *pcmd) { + if (!pcmd) + return; + + if (pcmd->argv) { + for (int j = 0; j < pcmd->argc; j++) + decrRefCount(pcmd->argv[j]); + + zfree(pcmd->argv); + serverAssert(c->all_argv_len_sum >= pcmd->argv_len_sum); /* assert this doesn't try to go negative */ + c->all_argv_len_sum -= pcmd->argv_len_sum; + } + + zfree(pcmd); +} + +/* Add a command to the tail of the pending command list. */ +void addPengingCommand(pendingCommandList *queue, pendingCommand *cmd) { + cmd->next = NULL; + cmd->prev = queue->tail; + + if (queue->tail) { + queue->tail->next = cmd; + } else { + /* Queue was empty */ + queue->head = cmd; + } + + queue->tail = cmd; + queue->len++; + if (!cmd->parsing_incomplete) queue->ready_len++; +} + +pendingCommand *popPendingCommandFromHead(pendingCommandList *list) { + pendingCommand *cmd = list->head; + if (!cmd) return NULL; /* List is empty */ + + list->head = cmd->next; + if (list->head) { + list->head->prev = NULL; + } else { + /* Queue was empty */ + list->tail = NULL; + } + + cmd->next = cmd->prev = NULL; + list->len--; + if (!cmd->parsing_incomplete) list->ready_len--; + return cmd; +} + +pendingCommand *popPendingCommandFromTail(pendingCommandList *list) { + pendingCommand *cmd = list->tail; + if (!cmd) return NULL; /* List is empty */ + + list->tail = cmd->prev; + if (list->tail) { + list->tail->next = NULL; + } else { + /* Queue became empty */ + list->head = NULL; + } + + cmd->next = cmd->prev = NULL; + list->len--; + if (!cmd->parsing_incomplete) list->ready_len--; + return cmd; +} + +/* Consumes the first ready command from the pending command list and sets it as + * the client's current command. The command remains in the list but is marked as + * current. Returns 1 on success, 0 if no ready command is available (empty list + * or head command is still parsing). */ +static int consumePendingCommand(client *c) { + pendingCommand *curcmd = c->pending_cmds.head; + if (!curcmd || curcmd->parsing_incomplete) return 0; + + c->argc = curcmd->argc; + c->argv = curcmd->argv; + c->argv_len = curcmd->argv_len; + c->net_input_bytes_curr_cmd += curcmd->input_bytes; + c->reploff_next = curcmd->reploff; + c->slot = curcmd->slot; + c->lookedcmd = curcmd->cmd; + c->read_error = curcmd->flags; + c->current_pending_cmd = curcmd; + return 1; +} diff --git a/src/replication.c b/src/replication.c index 32921f19653..28bb27a2ddb 100644 --- a/src/replication.c +++ b/src/replication.c @@ -4199,12 +4199,14 @@ void replicationCacheMaster(client *c) { server.master->qb_pos = 0; server.master->repl_applied = 0; server.master->read_reploff = server.master->reploff; + server.master->reploff_next = 0; if (c->flags & CLIENT_MULTI) discardTransaction(c); listEmpty(c->reply); c->sentlen = 0; c->reply_bytes = 0; c->bufpos = 0; - resetClient(c); + resetClient(c, -1); + resetClientQbufState(c); /* Save the master. Server.master will be set to null later by * replicationHandleMasterDisconnection(). */ diff --git a/src/script_lua.c b/src/script_lua.c index 2e8220743c3..fc3cf6e9b4d 100644 --- a/src/script_lua.c +++ b/src/script_lua.c @@ -974,7 +974,8 @@ static int luaRedisGenericCommand(lua_State *lua, int raise_error) { c->argc = c->argv_len = 0; c->user = NULL; c->argv = NULL; - resetClient(c); + c->all_argv_len_sum = 0; + resetClient(c, 1); inuse--; if (raise_error) { diff --git a/src/server.c b/src/server.c index 350572111a0..61914e5244e 100644 --- a/src/server.c +++ b/src/server.c @@ -872,23 +872,6 @@ int clientsCronResizeQueryBuffer(client *c) { return 0; } -/* If the client has been idle for too long, free the client's arguments. */ -int clientsCronFreeArgvIfIdle(client *c) { - /* If the client is in the middle of parsing a command, or if argv is in use - * (e.g. parsed in the IO thread but not yet executed, or blocked), exit ASAP. */ - if (!c->argv || c->multibulklen || c->argc) return 0; - - /* Free argv if the client has been idle for more than 2 seconds or if argv - * size is too large. */ - time_t idletime = server.unixtime - c->lastinteraction; - if (idletime > 2 || c->argv_len > 128) { - c->argv_len = 0; - zfree(c->argv); - c->argv = NULL; - } - return 0; -} - /* The client output buffer can be adjusted to better fit the memory requirements. * * the logic is: @@ -960,7 +943,7 @@ int CurrentPeakMemUsageSlot = 0; int clientsCronTrackExpansiveClients(client *c) { size_t qb_size = c->querybuf ? sdsZmallocSize(c->querybuf) : 0; size_t argv_size = c->argv ? zmalloc_size(c->argv) : 0; - size_t in_usage = qb_size + c->argv_len_sum + argv_size; + size_t in_usage = qb_size + c->all_argv_len_sum + argv_size; size_t out_usage = getClientOutputBufferMemoryUsage(c); /* Track the biggest values observed so far in this slot. */ @@ -1112,7 +1095,6 @@ int clientsCronRunClient(client *c) { * terminated. */ if (clientsCronHandleTimeout(c,now)) return 1; if (clientsCronResizeQueryBuffer(c)) return 1; - if (clientsCronFreeArgvIfIdle(c)) return 1; if (clientsCronResizeOutputBuffer(c,now)) return 1; if (clientsCronTrackExpansiveClients(c)) return 1; @@ -2980,6 +2962,8 @@ void initServer(void) { if (server.maxmemory_clients != 0) initServerClientMemUsageBuckets(); + + prefetchCommandsBatchInit(); } void initListeners(void) { @@ -4061,6 +4045,52 @@ uint64_t getCommandFlags(client *c) { return cmd_flags; } +void preprocessCommand(client *c, pendingCommand *pcmd) { + pcmd->slot = INVALID_CLUSTER_SLOT; + if (pcmd->argc == 0) + return; + + /* Check if we can reuse the last command instead of looking it up. + * The last command is either the penultimate pending command (if it exists), or c->lastcmd. */ + struct redisCommand *last_cmd = c->pending_cmds.tail->prev ? c->pending_cmds.head->cmd : c->lastcmd; + + if (isCommandReusable(last_cmd, pcmd->argv[0])) + pcmd->cmd = last_cmd; + else + pcmd->cmd = lookupCommand(pcmd->argv, pcmd->argc); + + if (!pcmd->cmd) { + pcmd->flags = CLIENT_READ_COMMAND_NOT_FOUND; + return; + } + + if ((pcmd->cmd->arity > 0 && pcmd->cmd->arity != pcmd->argc) || + (pcmd->argc < -pcmd->cmd->arity)) + { + pcmd->flags = CLIENT_READ_BAD_ARITY; + return; + } + + if (server.cluster_enabled) { + getKeysResult result = (getKeysResult)GETKEYS_RESULT_INIT; + int numkeys = getKeysFromCommand(pcmd->cmd, pcmd->argv, pcmd->argc, &result); + for (int i = 0; i < numkeys; i++) { + robj *thiskey = pcmd->argv[result.keys[i].pos]; + int thisslot = (int)keyHashSlot((char*)thiskey->ptr, sdslen(thiskey->ptr)); + if (pcmd->slot == INVALID_CLUSTER_SLOT) { + pcmd->slot = thisslot; + } else if (pcmd->slot != thisslot) { + serverLog(LL_NOTICE, "preprocessCommand: CROSS SLOT ERROR"); + /* Invalidate the slot to indicate that there is a cross-slot error */ + pcmd->slot = INVALID_CLUSTER_SLOT; + /* Cross slot error. */ + break; + } + } + getKeysFreeResult(&result); + } +} + /* If this function gets called we already read a whole * command, arguments are in the client argv/argc fields. * processCommand() execute the command or prepare the @@ -4104,11 +4134,17 @@ int processCommand(client *c) { * we do not have to repeat the same checks */ if (!client_reprocessing_command) { /* check if we can reuse the last command instead of looking up if we already have that info */ - struct redisCommand *cmd = NULL; - if (isCommandReusable(c->lastcmd, c->argv[0])) - cmd = c->lastcmd; - else - cmd = c->iolookedcmd ? c->iolookedcmd : lookupCommand(c->argv, c->argc); + struct redisCommand *cmd = c->lookedcmd; + + /* The command may have been modified by modules (e.g., in CommandFilters callbacks), + * so we need to look it up again. */ + if (!cmd) { + if (isCommandReusable(c->lastcmd, c->argv[0])) + cmd = c->lastcmd; + else + cmd = lookupCommand(c->argv, c->argc); + } + if (!cmd) { /* Handle possible security attacks. */ if (!strcasecmp(c->argv[0]->ptr,"host:") || !strcasecmp(c->argv[0]->ptr,"post")) { @@ -4441,9 +4477,9 @@ int areCommandKeysInSameSlot(client *c, int *hashslot) { /* If client is in multi-exec, we need to check the slot of all keys * in the transaction. */ for (int i = 0; i < (ms ? ms->count : 1); i++) { - struct redisCommand *cmd = ms ? ms->commands[i].cmd : c->cmd; - robj **argv = ms ? ms->commands[i].argv : c->argv; - int argc = ms ? ms->commands[i].argc : c->argc; + struct redisCommand *cmd = ms ? ms->commands[i]->cmd : c->cmd; + robj **argv = ms ? ms->commands[i]->argv : c->argv; + int argc = ms ? ms->commands[i]->argc : c->argc; getKeysResult result = GETKEYS_RESULT_INIT; int numkeys = getKeysFromCommand(cmd, argv, argc, &result); @@ -6965,7 +7001,7 @@ void dismissClientMemory(client *c) { dismissMemory(c->buf, c->buf_usable_size); if (c->querybuf) dismissSds(c->querybuf); /* Dismiss argv array only if we estimate it contains a big buffer. */ - if (c->argc && c->argv_len_sum/c->argc >= server.page_size) { + if (c->argc && c->all_argv_len_sum/c->argc >= server.page_size) { for (int i = 0; i < c->argc; i++) { dismissObject(c->argv[i], 0); } diff --git a/src/server.h b/src/server.h index a5cf8a73fe0..482ffe656dc 100644 --- a/src/server.h +++ b/src/server.h @@ -202,6 +202,9 @@ struct hdr_histogram; * in order to make sure of not over provisioning more than 128 fds. */ #define CONFIG_FDSET_INCR (CONFIG_MIN_RESERVED_FDS+96) +/* Default lookahead value */ +#define REDIS_DEFAULT_LOOKAHEAD 16 + /* OOM Score Adjustment classes. */ #define CONFIG_OOM_MASTER 0 #define CONFIG_OOM_REPLICA 1 @@ -431,6 +434,7 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; #define CLIENT_REEXECUTING_COMMAND (1ULL<<50) /* The client is re-executing the command. */ #define CLIENT_REPL_RDB_CHANNEL (1ULL<<51) /* Client which is used for rdb delivery as part of rdb channel replication */ #define CLIENT_INTERNAL (1ULL<<52) /* Internal client connection */ +#define CLIENT_IN_PREFETCH (1ULL<<53) /* The client is in the prefetching batch. */ /* Any flag that does not let optimize FLUSH SYNC to run it in bg as blocking client ASYNC */ #define CLIENT_AVOID_BLOCKING_ASYNC_FLUSH (CLIENT_DENY_BLOCKING|CLIENT_MULTI|CLIENT_LUA_DEBUG|CLIENT_LUA_DEBUG_SYNC|CLIENT_MODULE) @@ -461,6 +465,8 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; #define CLIENT_READ_CONN_DISCONNECTED 11 #define CLIENT_READ_CONN_CLOSED 12 #define CLIENT_READ_REACHED_MAX_QUERYBUF 13 +#define CLIENT_READ_COMMAND_NOT_FOUND 14 +#define CLIENT_READ_BAD_ARITY 15 /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ @@ -1137,16 +1143,11 @@ typedef struct rdbLoadingCtx { functionsLibCtx* functions_lib_ctx; }rdbLoadingCtx; -/* Client MULTI/EXEC state */ -typedef struct multiCmd { - robj **argv; - int argv_len; - int argc; - struct redisCommand *cmd; -} multiCmd; - +typedef struct pendingCommand pendingCommand; typedef struct multiState { - multiCmd *commands; /* Array of MULTI commands */ + pendingCommand **commands; /* Array of pointers to MULTI commands */ + int executing_cmd; /* The index of the currently exeuted transaction + command (index in commands field) */ int count; /* Total number of MULTI commands */ int cmd_flags; /* The accumulated command flags OR-ed together. So if at least a command has a given flag, it @@ -1155,7 +1156,7 @@ typedef struct multiState { is possible to know if all the commands have a certain flag. */ size_t argv_len_sums; /* mem used by all commands arguments */ - int alloc_count; /* total number of multiCmd struct memory reserved. */ + int alloc_count; /* total number of pendingCommand struct memory reserved. */ } multiState; /* This structure holds the blocking operation state for a client. @@ -1204,6 +1205,14 @@ typedef struct readyList { robj *key; } readyList; +/* List of pending commands. */ +typedef struct pendingCommandList { + pendingCommand *head; + pendingCommand *tail; + int len; /* Number of commands in the list */ + int ready_len; /* Number of commands that are ready to be processed */ +} pendingCommandList; + /* This structure represents a Redis user. This is useful for ACLs, the * user is associated to the connection after the connection is authenticated. * If there is no associated user, the connection uses the default user. */ @@ -1343,11 +1352,13 @@ typedef struct client { int argv_len; /* Size of argv array (may be more than argc) */ int original_argc; /* Num of arguments of original command if arguments were rewritten. */ robj **original_argv; /* Arguments of original command if arguments were rewritten. */ - size_t argv_len_sum; /* Sum of lengths of objects in argv list. */ + size_t all_argv_len_sum; /* Sum of lengths of objects in all pendingCommand argv lists */ + pendingCommandList pending_cmds; /* List of parsed pending commands */ + pendingCommand *current_pending_cmd; robj **deferred_objects; /* Array of deferred objects to free. */ int deferred_objects_num; /* Number of deferred objects to free. */ struct redisCommand *cmd, *lastcmd; /* Last command executed. */ - struct redisCommand *iolookedcmd; /* Command looked up in IO threads. */ + struct redisCommand *lookedcmd; /* Command looked up in lookahead. */ struct redisCommand *realcmd; /* The original command that was executed by the client, Used to update error stats in case the c->cmd was modified during the command invocation (like on GEOADD for example). */ @@ -1383,6 +1394,7 @@ typedef struct client { sds replpreamble; /* Replication DB preamble. */ long long read_reploff; /* Read replication offset if this is a master. */ long long reploff; /* Applied replication offset if this is a master. */ + long long reploff_next; /* Next value to set for reploff when a command finishes executing */ long long repl_applied; /* Applied replication data count in querybuf, if this is a replica. */ long long repl_ack_off; /* Replication ack offset, if this is a slave. */ long long repl_aof_off; /* Replication AOF fsync ack offset, if this is a slave. */ @@ -1970,6 +1982,7 @@ struct redisServer { int active_defrag_cycle_max; /* maximal effort for defrag in CPU percentage */ unsigned long active_defrag_max_scan_fields; /* maximum number of fields of set/hash/zset/list to process from within the main dict scan */ size_t client_max_querybuf_len; /* Limit for client query buffer length */ + int lookahead; /* how many commands in each client pipeline to decode and prefetch */ int dbnum; /* Total number of configured DBs */ int supervised; /* 1 if supervised, 0 otherwise. */ int supervised_mode; /* See SUPERVISED_* */ @@ -2337,6 +2350,25 @@ typedef struct { } getKeysResult; #define GETKEYS_RESULT_INIT { 0, MAX_KEYS_BUFFER, {{0}}, NULL } +/* Parser state and parse result of a command from a client's input buffer. */ +struct pendingCommand { + int argc; /* Num of arguments of current command. */ + int argv_len; /* Size of argv array (may be more than argc) */ + robj **argv; /* Arguments of current command. */ + size_t argv_len_sum; /* Sum of lengths of objects in argv list. */ + unsigned long long input_bytes; + struct redisCommand *cmd; + // getKeysResult keys_result; + long long reploff; /* c->reploff should be set to this value when the command is processed */ + int slot; /* The slot the command is executing against. Set to INVALID_CLUSTER_SLOT if no slot is being used or if + the command has a cross slot error */ + uint8_t flags; + int parsing_incomplete; + + struct pendingCommand *next; + struct pendingCommand *prev; +}; + /* Key specs definitions. * * Brief: This is a scheme that tries to describe the location @@ -2797,6 +2829,13 @@ void moduleDefragEnd(void); void *moduleGetHandleByName(char *modulename); int moduleIsModuleCommand(void *module_handle, struct redisCommand *cmd); +/* pcmd */ +void initPendingCommand(pendingCommand *pcmd); +void freePendingCommand(client *c, pendingCommand *pcmd); +void addPengingCommand(pendingCommandList *queue, pendingCommand *cmd); +pendingCommand *popPendingCommandFromHead(pendingCommandList *queue); +pendingCommand *popPendingCommandFromTail(pendingCommandList *queue); + /* Utils */ long long ustime(void); mstime_t mstime(void); @@ -2822,9 +2861,11 @@ void deauthenticateAndCloseClient(client *c); void logInvalidUseAndFreeClientAsync(client *c, const char *fmt, ...); int beforeNextClient(client *c); void clearClientConnectionState(client *c); -void resetClient(client *c); +void resetClient(client *c, int num_pcmds_to_free); +void resetClientQbufState(client *c); void freeClientOriginalArgv(client *c); void freeClientArgv(client *c); +void freeClientPendingCommands(client *c, int num_pcmds_to_free); void tryDeferFreeClientObject(client *c, robj *o); void freeClientDeferredObjects(client *c, int free_array); void sendReplyToClient(connection *conn); @@ -3334,8 +3375,11 @@ void updatePeakMemory(size_t used_memory); size_t freeMemoryGetNotCountedMemory(void); int overMaxmemoryAfterAlloc(size_t moremem); uint64_t getCommandFlags(client *c); +void preprocessCommand(client *c, pendingCommand *pcmd); int processCommand(client *c); void commandProcessed(client *c); +void prepareForNextCommand(client *c); + int processPendingCommandAndInputBuffer(client *c); int processCommandAndResetClient(client *c); int areCommandKeysInSameSlot(client *c, int *hashslot); diff --git a/tests/unit/cluster/sharded-pubsub.tcl b/tests/unit/cluster/sharded-pubsub.tcl index 0347ac65351..57b550ab727 100644 --- a/tests/unit/cluster/sharded-pubsub.tcl +++ b/tests/unit/cluster/sharded-pubsub.tcl @@ -29,7 +29,7 @@ start_cluster 1 1 {tags {external:skip cluster}} { $primary MULTI $primary SPUBLISH ch1 "hello" $primary GET foo - catch {[$primary EXEC]} err + catch {$primary EXEC} err assert_match {CROSSSLOT*} $err } diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl index 75bd96fbac1..5be6db0278d 100644 --- a/tests/unit/memefficiency.tcl +++ b/tests/unit/memefficiency.tcl @@ -76,6 +76,10 @@ run_solo {defrag} { } proc test_active_defrag {type} { + + # note: Disabling lookahead because it changes the number and order of allocations which interferes with defrag and causes tests to fail + r config set lookahead 1 + if {[string match {*jemalloc*} [s mem_allocator]] && [r debug mallctl arenas.page] <= 8192} { test "Active defrag main dictionary: $type" { r config set hz 100 @@ -777,6 +781,7 @@ run_solo {defrag} { if {$::verbose} { puts "frag $frag" } + puts 1111 assert {$frag >= $expected_frag} r config set latency-monitor-threshold 5 From 24816321ff63592c25301f8964412d892770c084 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Tue, 30 Sep 2025 17:04:50 +0800 Subject: [PATCH 02/71] Fix complain --- src/networking.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index a4a6752e4f5..38eef6b9dfc 100644 --- a/src/networking.c +++ b/src/networking.c @@ -3047,8 +3047,7 @@ int processInputBuffer(client *c) { } } - if (c->read_error && c->read_error != CLIENT_READ_COMMAND_NOT_FOUND && - c->read_error != CLIENT_READ_BAD_ARITY) { + if (isClientReadErrorFatal(c->read_error)) { break; } From 742cb79e275bcf74382e5cacd2d5e4c9bbfdff3e Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 1 Oct 2025 19:01:40 +0800 Subject: [PATCH 03/71] Revert code in processInputBuffer to avoid too many conflicts Co-authored-by: oranagra --- src/networking.c | 141 ++++++++++++++++++++++++----------------------- 1 file changed, 73 insertions(+), 68 deletions(-) diff --git a/src/networking.c b/src/networking.c index 38eef6b9dfc..8614f4ceb2a 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2938,65 +2938,6 @@ void handleClientReadError(client *c) { } } -void parseInputBuffer(client *c) { - /* We limit the lookahead for unauthenticated connections to 1. - * This is both to reduce memory overhead, and to prevent errors: AUTH can - * affect the handling of succeeding commands. Parsing of "large" - * unauthenticated multibulk commands is rejected, which would cause those - * commands to incorrectly return an error to the client. */ - const int lookahead = authRequired(c) ? 1 : server.lookahead; - - /* Parse up to lookahead commands */ - while (c->pending_cmds.ready_len < lookahead && c->querybuf && c->qb_pos < sdslen(c->querybuf)) { - /* Determine request type when unknown. */ - if (!c->reqtype) { - if (c->querybuf[c->qb_pos] == '*') { - c->reqtype = PROTO_REQ_MULTIBULK; - } else { - c->reqtype = PROTO_REQ_INLINE; - } - } - - pendingCommand *pcmd = NULL; - if (c->reqtype == PROTO_REQ_INLINE) { - pcmd = zmalloc(sizeof(pendingCommand)); - initPendingCommand(pcmd); - if (parseInlineBuffer(c, pcmd) == C_ERR && !pcmd->flags) { - /* If it fails but there are no errors, it means that it might just be - * that the desired content cannot be parsed. At this point, we exit and wait for the next time. */ - freePendingCommand(c, pcmd); - return; - } - } else if (c->reqtype == PROTO_REQ_MULTIBULK) { - int incomplete = c->pending_cmds.tail && c->pending_cmds.tail->parsing_incomplete; - if (unlikely(incomplete)) { - pcmd = popPendingCommandFromHead(&c->pending_cmds); - } else { - pcmd = zmalloc(sizeof(pendingCommand)); - initPendingCommand(pcmd); - } - - if (parseMultibulk(c, pcmd) == C_ERR && !pcmd->flags) { - /* If it fails but there are no errors, it means that it might just be - * that the desired content cannot be parsed. At this point, we exit and wait for the next time. */ - freePendingCommand(c, pcmd); - return; - } - } else { - serverPanic("Unknown request type"); - } - - addPengingCommand(&c->pending_cmds, pcmd); - if (unlikely(pcmd->flags || pcmd->parsing_incomplete)) - break; - - if (!pcmd->parsing_incomplete) { - pcmd->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; - preprocessCommand(c, pcmd); - resetClientQbufState(c); - } - } -} /* Helper function to check if a read error is fatal (should stop processing) */ static inline int isClientReadErrorFatal(int read_error) { @@ -3034,19 +2975,83 @@ int processInputBuffer(client *c) { * The same applies for clients we want to terminate ASAP. */ if (c->flags & (CLIENT_CLOSE_AFTER_REPLY|CLIENT_CLOSE_ASAP)) break; - /* If commands are queued up, pop from the queue first */ - if (!consumePendingCommand(c)) { - parseInputBuffer(c); - if (consumePendingCommand(c) == 0) break; + /* We limit the lookahead for unauthenticated connections to 1. + * This is both to reduce memory overhead, and to prevent errors: AUTH can + * affect the handling of succeeding commands. Parsing of "large" + * unauthenticated multibulk commands is rejected, which would cause those + * commands to incorrectly return an error to the client. */ + const int lookahead = authRequired(c) ? 1 : server.lookahead; + + /* Determine if we need to parse more commands from the query buffer. + * Only parse when there are no ready commands waiting to be processed. */ + const int parse_more = !c->pending_cmds.ready_len; - if (c->running_tid == IOTHREAD_MAIN_THREAD_ID && !(c->flags & CLIENT_IN_PREFETCH)) { - /* Prefetch the commands. */ - resetCommandsBatch(); - addCommandToBatch(c); - prefetchCommands(); + /* Parse up to lookahead commands only if we don't have enough ready commands */ + while (parse_more && c->pending_cmds.ready_len < lookahead && + c->querybuf && c->qb_pos < sdslen(c->querybuf)) + { + /* Determine request type when unknown. */ + if (!c->reqtype) { + if (c->querybuf[c->qb_pos] == '*') { + c->reqtype = PROTO_REQ_MULTIBULK; + } else { + c->reqtype = PROTO_REQ_INLINE; + } + } + + pendingCommand *pcmd = NULL; + if (c->reqtype == PROTO_REQ_INLINE) { + pcmd = zmalloc(sizeof(pendingCommand)); + initPendingCommand(pcmd); + if (parseInlineBuffer(c, pcmd) == C_ERR && !pcmd->flags) { + /* If it fails but there are no errors, it means that it might just be + * that the desired content cannot be parsed. At this point, we exit and wait for the next time. */ + freePendingCommand(c, pcmd); + break; + } + } else if (c->reqtype == PROTO_REQ_MULTIBULK) { + int incomplete = c->pending_cmds.tail && c->pending_cmds.tail->parsing_incomplete; + if (unlikely(incomplete)) { + pcmd = popPendingCommandFromHead(&c->pending_cmds); + } else { + pcmd = zmalloc(sizeof(pendingCommand)); + initPendingCommand(pcmd); + } + + if (parseMultibulk(c, pcmd) == C_ERR && !pcmd->flags) { + /* If it fails but there are no errors, it means that it might just be + * that the desired content cannot be parsed. At this point, we exit and wait for the next time. */ + freePendingCommand(c, pcmd); + break; + } + } else { + serverPanic("Unknown request type"); } + + addPengingCommand(&c->pending_cmds, pcmd); + if (unlikely(pcmd->flags || pcmd->parsing_incomplete)) + break; + + if (!pcmd->parsing_incomplete) { + pcmd->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; + preprocessCommand(c, pcmd); + resetClientQbufState(c); + } + } + + /* Try to consume the next ready command from the pending command list. */ + if (!consumePendingCommand(c)) break; + + /* Prefetch the command if we are in the main thread. If running in an IO thread, + * jprefetch will be deferred until the client is processed by the main thread. */ + if (c->running_tid == IOTHREAD_MAIN_THREAD_ID && !(c->flags & CLIENT_IN_PREFETCH)) { + /* Prefetch the commands. */ + resetCommandsBatch(); + addCommandToBatch(c); + prefetchCommands(); } + /* Check if the client has a fatal read error that requires stopping processing. */ if (isClientReadErrorFatal(c->read_error)) { break; } From e0ff33ad28ded4c76400b99aa5a914305edc3101 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 1 Oct 2025 19:02:47 +0800 Subject: [PATCH 04/71] Rename CLIENT_IN_PREFETCH to CLIENT_IN_MEMORY_PREFETCH Co-authored-by: oranagra --- src/iothread.c | 4 ++-- src/networking.c | 4 ++-- src/server.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/iothread.c b/src/iothread.c index 8a65f11a5f2..9401da23122 100644 --- a/src/iothread.c +++ b/src/iothread.c @@ -467,12 +467,12 @@ int processClientsFromIOThread(IOThread *t) { /* Process the pending command and input buffer. */ if (!c->read_error && c->io_flags & CLIENT_IO_PENDING_COMMAND) { c->flags |= CLIENT_PENDING_COMMAND; - c->flags |= CLIENT_IN_PREFETCH; + c->flags |= CLIENT_IN_MEMORY_PREFETCH; if (processPendingCommandAndInputBuffer(c) == C_ERR) { /* If the client is no longer valid, it must be freed safely. */ continue; } - c->flags &= ~CLIENT_IN_PREFETCH; + c->flags &= ~CLIENT_IN_MEMORY_PREFETCH; } /* We may have pending replies if io thread may not finish writing diff --git a/src/networking.c b/src/networking.c index 8614f4ceb2a..bd3740a0a25 100644 --- a/src/networking.c +++ b/src/networking.c @@ -3043,8 +3043,8 @@ int processInputBuffer(client *c) { if (!consumePendingCommand(c)) break; /* Prefetch the command if we are in the main thread. If running in an IO thread, - * jprefetch will be deferred until the client is processed by the main thread. */ - if (c->running_tid == IOTHREAD_MAIN_THREAD_ID && !(c->flags & CLIENT_IN_PREFETCH)) { + * prefetch will be deferred until the client is processed by the main thread. */ + if (c->running_tid == IOTHREAD_MAIN_THREAD_ID && !(c->flags & CLIENT_IN_MEMORY_PREFETCH)) { /* Prefetch the commands. */ resetCommandsBatch(); addCommandToBatch(c); diff --git a/src/server.h b/src/server.h index 482ffe656dc..72ca8766e8a 100644 --- a/src/server.h +++ b/src/server.h @@ -434,7 +434,7 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; #define CLIENT_REEXECUTING_COMMAND (1ULL<<50) /* The client is re-executing the command. */ #define CLIENT_REPL_RDB_CHANNEL (1ULL<<51) /* Client which is used for rdb delivery as part of rdb channel replication */ #define CLIENT_INTERNAL (1ULL<<52) /* Internal client connection */ -#define CLIENT_IN_PREFETCH (1ULL<<53) /* The client is in the prefetching batch. */ +#define CLIENT_IN_MEMORY_PREFETCH (1ULL<<53) /* The client is in the prefetching batch to avoid nested prefetch. */ /* Any flag that does not let optimize FLUSH SYNC to run it in bg as blocking client ASYNC */ #define CLIENT_AVOID_BLOCKING_ASYNC_FLUSH (CLIENT_DENY_BLOCKING|CLIENT_MULTI|CLIENT_LUA_DEBUG|CLIENT_LUA_DEBUG_SYNC|CLIENT_MODULE) From 77d7c5b1e6d1bb2b6e780c7820760104d19dbb15 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 1 Oct 2025 21:37:47 +0800 Subject: [PATCH 05/71] no longer use flags instead of read_error for pendingCommand Co-authored-by: oranagra --- src/memory_prefetch.c | 3 ++- src/networking.c | 57 +++++++++++++++++++++---------------------- src/server.c | 4 +-- src/server.h | 10 +++++--- 4 files changed, 39 insertions(+), 35 deletions(-) diff --git a/src/memory_prefetch.c b/src/memory_prefetch.c index 00d06222d69..fee24b43171 100644 --- a/src/memory_prefetch.c +++ b/src/memory_prefetch.c @@ -384,7 +384,8 @@ int addCommandToBatch(client *c) { pendingCommand *pcmd = c->pending_cmds.head; while (pcmd != NULL) { - if (pcmd->parsing_incomplete || !pcmd->cmd || pcmd->flags) break; + /* Skip commands that have not been preprocessed, or have errors. */ + if ((pcmd->flags & PENDING_CMD_FLAG_INCOMPLETE) || !pcmd->cmd || pcmd->read_error) break; getKeysResult result = GETKEYS_RESULT_INIT; int numkeys = getKeysFromCommand(pcmd->cmd, pcmd->argv, pcmd->argc, &result); diff --git a/src/networking.c b/src/networking.c index bd3740a0a25..b1a655bac06 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2436,7 +2436,7 @@ int parseInlineBuffer(client *c, pendingCommand *pcmd) { /* Nothing to do without a \r\n */ if (newline == NULL) { if (sdslen(c->querybuf)-c->qb_pos > PROTO_INLINE_MAX_SIZE) { - c->read_error = CLIENT_READ_TOO_BIG_INLINE_REQUEST; + pcmd->read_error = CLIENT_READ_TOO_BIG_INLINE_REQUEST; } return C_ERR; } @@ -2451,7 +2451,7 @@ int parseInlineBuffer(client *c, pendingCommand *pcmd) { argv = sdssplitargs(aux,&argc); sdsfree(aux); if (argv == NULL) { - c->read_error = CLIENT_READ_UNBALANCED_QUOTES; + pcmd->read_error = CLIENT_READ_UNBALANCED_QUOTES; return C_ERR; } @@ -2470,7 +2470,7 @@ int parseInlineBuffer(client *c, pendingCommand *pcmd) { * to keep the connection active. */ if (querylen != 0 && c->flags & CLIENT_MASTER) { sdsfreesplitres(argv,argc); - c->read_error = CLIENT_READ_MASTER_USING_INLINE_PROTOCAL; + pcmd->read_error = CLIENT_READ_MASTER_USING_INLINE_PROTOCAL; return C_ERR; } @@ -2559,7 +2559,7 @@ static int parseMultibulk(client *c, pendingCommand *pcmd) { newline = memchr(c->querybuf+c->qb_pos,'\r',sdslen(c->querybuf) - c->qb_pos); if (newline == NULL) { if (querybuf_len-c->qb_pos > PROTO_INLINE_MAX_SIZE) { - pcmd->flags = CLIENT_READ_TOO_BIG_MBULK_COUNT_STRING; + pcmd->read_error = CLIENT_READ_TOO_BIG_MBULK_COUNT_STRING; } return C_ERR; } @@ -2574,10 +2574,10 @@ static int parseMultibulk(client *c, pendingCommand *pcmd) { size_t multibulklen_slen = newline - (c->querybuf + 1 + c->qb_pos); ok = string2ll(c->querybuf+1+c->qb_pos,newline-(c->querybuf+1+c->qb_pos),&ll); if (!ok || ll > INT_MAX) { - pcmd->flags = CLIENT_READ_INVALID_MULTIBUCK_LENGTH; + pcmd->read_error = CLIENT_READ_INVALID_MULTIBUCK_LENGTH; return C_ERR; } else if (ll > 10 && authRequired(c)) { - pcmd->flags = CLIENT_READ_UNAUTH_MBUCK_COUNT; + pcmd->read_error = CLIENT_READ_UNAUTH_MBUCK_COUNT; return C_ERR; } @@ -2634,7 +2634,7 @@ static int parseMultibulk(client *c, pendingCommand *pcmd) { newline = memchr(c->querybuf+c->qb_pos,'\r',sdslen(c->querybuf) - c->qb_pos); if (newline == NULL) { if (querybuf_len-c->qb_pos > PROTO_INLINE_MAX_SIZE) { - pcmd->flags = CLIENT_READ_TOO_BIG_BUCK_COUNT_STRING; + pcmd->read_error = CLIENT_READ_TOO_BIG_BUCK_COUNT_STRING; return C_ERR; } break; @@ -2645,7 +2645,7 @@ static int parseMultibulk(client *c, pendingCommand *pcmd) { break; if (c->querybuf[c->qb_pos] != '$') { - pcmd->flags = CLIENT_READ_EXPECTED_DOLLAR; + pcmd->read_error = CLIENT_READ_EXPECTED_DOLLAR; return C_ERR; } @@ -2653,10 +2653,10 @@ static int parseMultibulk(client *c, pendingCommand *pcmd) { ok = string2ll(c->querybuf+c->qb_pos+1,newline-(c->querybuf+c->qb_pos+1),&ll); if (!ok || ll < 0 || (!(c->flags & CLIENT_MASTER) && ll > server.proto_max_bulk_len)) { - pcmd->flags = CLIENT_READ_INVALID_BUCK_LENGTH; + pcmd->read_error = CLIENT_READ_INVALID_BUCK_LENGTH; return C_ERR; } else if (ll > 16384 && authRequired(c)) { - pcmd->flags = CLIENT_READ_UNAUTH_BUCK_LENGTH; + pcmd->read_error = CLIENT_READ_UNAUTH_BUCK_LENGTH; return C_ERR; } @@ -2692,7 +2692,7 @@ static int parseMultibulk(client *c, pendingCommand *pcmd) { /* Per-slot network bytes-in calculation, 2nd component. */ pcmd->input_bytes += (bulklen_slen + 3); } else { - serverAssert(pcmd->parsing_incomplete); + serverAssert(pcmd->flags & PENDING_CMD_FLAG_INCOMPLETE); } /* Read bulk argument */ @@ -2742,12 +2742,12 @@ static int parseMultibulk(client *c, pendingCommand *pcmd) { if (c->multibulklen == 0) { /* Per-slot network bytes-in calculation, 3rd and 4th components. */ pcmd->input_bytes += (pcmd->argv_len_sum + (pcmd->argc * 2)); - pcmd->parsing_incomplete = 0; + pcmd->flags &= ~PENDING_CMD_FLAG_INCOMPLETE; return C_OK; } /* Still not ready to process the command */ - pcmd->parsing_incomplete = 1; + pcmd->flags |= PENDING_CMD_FLAG_INCOMPLETE; return C_OK; } @@ -3003,22 +3003,23 @@ int processInputBuffer(client *c) { if (c->reqtype == PROTO_REQ_INLINE) { pcmd = zmalloc(sizeof(pendingCommand)); initPendingCommand(pcmd); - if (parseInlineBuffer(c, pcmd) == C_ERR && !pcmd->flags) { + if (parseInlineBuffer(c, pcmd) == C_ERR && !pcmd->read_error) { /* If it fails but there are no errors, it means that it might just be * that the desired content cannot be parsed. At this point, we exit and wait for the next time. */ freePendingCommand(c, pcmd); break; } } else if (c->reqtype == PROTO_REQ_MULTIBULK) { - int incomplete = c->pending_cmds.tail && c->pending_cmds.tail->parsing_incomplete; + int incomplete = (c->pending_cmds.len != c->pending_cmds.ready_len); + // int incomplete = c->pending_cmds.tail && c->pending_cmds.tail->parsing_incomplete; if (unlikely(incomplete)) { - pcmd = popPendingCommandFromHead(&c->pending_cmds); + pcmd = popPendingCommandFromTail(&c->pending_cmds); } else { pcmd = zmalloc(sizeof(pendingCommand)); initPendingCommand(pcmd); } - if (parseMultibulk(c, pcmd) == C_ERR && !pcmd->flags) { + if (parseMultibulk(c, pcmd) == C_ERR && !pcmd->read_error) { /* If it fails but there are no errors, it means that it might just be * that the desired content cannot be parsed. At this point, we exit and wait for the next time. */ freePendingCommand(c, pcmd); @@ -3029,14 +3030,12 @@ int processInputBuffer(client *c) { } addPengingCommand(&c->pending_cmds, pcmd); - if (unlikely(pcmd->flags || pcmd->parsing_incomplete)) + if (unlikely(pcmd->read_error || (pcmd->flags & PENDING_CMD_FLAG_INCOMPLETE))) break; - if (!pcmd->parsing_incomplete) { - pcmd->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; - preprocessCommand(c, pcmd); - resetClientQbufState(c); - } + pcmd->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; + preprocessCommand(c, pcmd); + resetClientQbufState(c); } /* Try to consume the next ready command from the pending command list. */ @@ -4309,7 +4308,7 @@ void replaceClientCommandVector(client *c, int argc, robj **argv) { is_mstate = 0; if (c->pending_cmds.ready_len > 0) { pcmd = c->pending_cmds.head; - serverAssert(!pcmd->parsing_incomplete); + serverAssert(!(pcmd->flags & PENDING_CMD_FLAG_INCOMPLETE)); } } else { is_mstate = 1; @@ -4922,7 +4921,7 @@ void addPengingCommand(pendingCommandList *queue, pendingCommand *cmd) { queue->tail = cmd; queue->len++; - if (!cmd->parsing_incomplete) queue->ready_len++; + if (!(cmd->flags & PENDING_CMD_FLAG_INCOMPLETE)) queue->ready_len++; } pendingCommand *popPendingCommandFromHead(pendingCommandList *list) { @@ -4939,7 +4938,7 @@ pendingCommand *popPendingCommandFromHead(pendingCommandList *list) { cmd->next = cmd->prev = NULL; list->len--; - if (!cmd->parsing_incomplete) list->ready_len--; + if (!(cmd->flags & PENDING_CMD_FLAG_INCOMPLETE)) list->ready_len--; return cmd; } @@ -4957,7 +4956,7 @@ pendingCommand *popPendingCommandFromTail(pendingCommandList *list) { cmd->next = cmd->prev = NULL; list->len--; - if (!cmd->parsing_incomplete) list->ready_len--; + if (!(cmd->flags & PENDING_CMD_FLAG_INCOMPLETE)) list->ready_len--; return cmd; } @@ -4967,7 +4966,7 @@ pendingCommand *popPendingCommandFromTail(pendingCommandList *list) { * or head command is still parsing). */ static int consumePendingCommand(client *c) { pendingCommand *curcmd = c->pending_cmds.head; - if (!curcmd || curcmd->parsing_incomplete) return 0; + if (!curcmd || (curcmd->flags & PENDING_CMD_FLAG_INCOMPLETE)) return 0; c->argc = curcmd->argc; c->argv = curcmd->argv; @@ -4976,7 +4975,7 @@ static int consumePendingCommand(client *c) { c->reploff_next = curcmd->reploff; c->slot = curcmd->slot; c->lookedcmd = curcmd->cmd; - c->read_error = curcmd->flags; + c->read_error = curcmd->read_error; c->current_pending_cmd = curcmd; return 1; } diff --git a/src/server.c b/src/server.c index 61914e5244e..a215feeefa4 100644 --- a/src/server.c +++ b/src/server.c @@ -4060,14 +4060,14 @@ void preprocessCommand(client *c, pendingCommand *pcmd) { pcmd->cmd = lookupCommand(pcmd->argv, pcmd->argc); if (!pcmd->cmd) { - pcmd->flags = CLIENT_READ_COMMAND_NOT_FOUND; + pcmd->read_error = CLIENT_READ_COMMAND_NOT_FOUND; return; } if ((pcmd->cmd->arity > 0 && pcmd->cmd->arity != pcmd->argc) || (pcmd->argc < -pcmd->cmd->arity)) { - pcmd->flags = CLIENT_READ_BAD_ARITY; + pcmd->read_error = CLIENT_READ_BAD_ARITY; return; } diff --git a/src/server.h b/src/server.h index 72ca8766e8a..4355c444dd1 100644 --- a/src/server.h +++ b/src/server.h @@ -2350,6 +2350,11 @@ typedef struct { } getKeysResult; #define GETKEYS_RESULT_INIT { 0, MAX_KEYS_BUFFER, {{0}}, NULL } +/* pendingCommand flags */ +enum { + PENDING_CMD_FLAG_INCOMPLETE = 1 << 0, /* Command parsing is incomplete, still waiting for more data */ +}; + /* Parser state and parse result of a command from a client's input buffer. */ struct pendingCommand { int argc; /* Num of arguments of current command. */ @@ -2358,12 +2363,11 @@ struct pendingCommand { size_t argv_len_sum; /* Sum of lengths of objects in argv list. */ unsigned long long input_bytes; struct redisCommand *cmd; - // getKeysResult keys_result; long long reploff; /* c->reploff should be set to this value when the command is processed */ + int flags; int slot; /* The slot the command is executing against. Set to INVALID_CLUSTER_SLOT if no slot is being used or if the command has a cross slot error */ - uint8_t flags; - int parsing_incomplete; + uint8_t read_error; struct pendingCommand *next; struct pendingCommand *prev; From 4de9dd97a62d3c166be34c09aab16d06a7b88fc4 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 1 Oct 2025 21:40:30 +0800 Subject: [PATCH 06/71] Update comment Co-authored-by: oranagra --- src/networking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index b1a655bac06..9a59807d9a6 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2552,7 +2552,7 @@ static int parseMultibulk(client *c, pendingCommand *pcmd) { size_t querybuf_len = sdslen(c->querybuf); /* Cache sdslen */ if (c->multibulklen == 0) { - /* TODO: The client should have been reset */ + /* The pending command should have been reset */ serverAssertWithInfo(c,NULL,pcmd->argc == 0); /* Multi bulk length cannot be read without a \r\n */ From 20948954a97ee03ada3e334015e8ce215a9e5d9e Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 1 Oct 2025 21:41:53 +0800 Subject: [PATCH 07/71] Remove unused code Co-authored-by: oranagra --- src/networking.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 9a59807d9a6..ffd31de3e61 100644 --- a/src/networking.c +++ b/src/networking.c @@ -39,8 +39,6 @@ __thread sds thread_reusable_qb = NULL; __thread int thread_reusable_qb_used = 0; /* Avoid multiple clients using reusable query * buffer due to nested command execution. */ -/* COMMAND_QUEUE_MIN_CAPACITY no longer needed with linked list implementation */ - /* Return the size consumed from the allocator, for the specified SDS string, * including internal fragmentation. This function is used in order to compute * the client output buffer size. */ From 122228776950da1bbf8e5f085019d46d29399c1e Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 1 Oct 2025 21:43:21 +0800 Subject: [PATCH 08/71] Remove unused code --- src/networking.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index ffd31de3e61..d49141b9a60 100644 --- a/src/networking.c +++ b/src/networking.c @@ -3009,7 +3009,6 @@ int processInputBuffer(client *c) { } } else if (c->reqtype == PROTO_REQ_MULTIBULK) { int incomplete = (c->pending_cmds.len != c->pending_cmds.ready_len); - // int incomplete = c->pending_cmds.tail && c->pending_cmds.tail->parsing_incomplete; if (unlikely(incomplete)) { pcmd = popPendingCommandFromTail(&c->pending_cmds); } else { From 682b40a8370f8b4799b6292be26e5b2f7662038a Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 1 Oct 2025 21:44:56 +0800 Subject: [PATCH 09/71] Rename methods --- src/networking.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/networking.c b/src/networking.c index d49141b9a60..9b717654bb9 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2422,7 +2422,7 @@ void unprotectClient(client *c) { * have a well formed command. The function also returns C_ERR when there is * a protocol error: in such a case the client structure is setup to reply * with the error and close the connection. */ -int parseInlineBuffer(client *c, pendingCommand *pcmd) { +int processInlineBuffer(client *c, pendingCommand *pcmd) { char *newline; int argc, j, linefeed_chars = 1; sds *argv, aux; @@ -2543,7 +2543,7 @@ static void setProtocolError(const char *errstr, client *c) { c->flags |= (CLIENT_CLOSE_AFTER_REPLY|CLIENT_PROTOCOL_ERROR); } -static int parseMultibulk(client *c, pendingCommand *pcmd) { +static int processMultibulkBuffer(client *c, pendingCommand *pcmd) { char *newline = NULL; int ok; long long ll; @@ -3001,7 +3001,7 @@ int processInputBuffer(client *c) { if (c->reqtype == PROTO_REQ_INLINE) { pcmd = zmalloc(sizeof(pendingCommand)); initPendingCommand(pcmd); - if (parseInlineBuffer(c, pcmd) == C_ERR && !pcmd->read_error) { + if (processInlineBuffer(c, pcmd) == C_ERR && !pcmd->read_error) { /* If it fails but there are no errors, it means that it might just be * that the desired content cannot be parsed. At this point, we exit and wait for the next time. */ freePendingCommand(c, pcmd); @@ -3016,7 +3016,7 @@ int processInputBuffer(client *c) { initPendingCommand(pcmd); } - if (parseMultibulk(c, pcmd) == C_ERR && !pcmd->read_error) { + if (processMultibulkBuffer(c, pcmd) == C_ERR && !pcmd->read_error) { /* If it fails but there are no errors, it means that it might just be * that the desired content cannot be parsed. At this point, we exit and wait for the next time. */ freePendingCommand(c, pcmd); From bcfcf93c2e3734b0c0f1972f2dbcab320efaca66 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 1 Oct 2025 21:46:15 +0800 Subject: [PATCH 10/71] Refine --- src/iothread.c | 1 - src/networking.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/iothread.c b/src/iothread.c index 9401da23122..8b54a36c630 100644 --- a/src/iothread.c +++ b/src/iothread.c @@ -731,7 +731,6 @@ void initThreadedIO(void) { exit(1); } - /* Spawn and initialize the I/O threads. */ for (int i = 1; i < server.io_threads_num; i++) { IOThread *t = &IOThreads[i]; diff --git a/src/networking.c b/src/networking.c index 9b717654bb9..5be255afcc2 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2543,6 +2543,17 @@ static void setProtocolError(const char *errstr, client *c) { c->flags |= (CLIENT_CLOSE_AFTER_REPLY|CLIENT_PROTOCOL_ERROR); } +/* Process the query buffer for client 'c', setting up the client argument + * vector for command execution. Returns C_OK if after running the function + * the client has a well-formed ready to be processed command, otherwise + * C_ERR if there is still to read more buffer to get the full command. + * The function also returns C_ERR when there is a protocol error: in such a + * case the client structure is setup to reply with the error and close + * the connection. + * + * This function is called if processInputBuffer() detects that the next + * command is in RESP format, so the first byte in the command is found + * to be '*'. Otherwise for inline commands processInlineBuffer() is called. */ static int processMultibulkBuffer(client *c, pendingCommand *pcmd) { char *newline = NULL; int ok; From 13dd132533c36250e8bf7d37a667eac6562253bf Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 2 Oct 2025 12:56:40 +0800 Subject: [PATCH 11/71] Fix the issue that read_error wasn't handled correctly with io thread --- src/iothread.c | 30 +++++++++++++++--------------- src/networking.c | 10 ++++------ src/server.h | 1 + 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/iothread.c b/src/iothread.c index 8b54a36c630..c94831d2410 100644 --- a/src/iothread.c +++ b/src/iothread.c @@ -465,7 +465,7 @@ int processClientsFromIOThread(IOThread *t) { } /* Process the pending command and input buffer. */ - if (!c->read_error && c->io_flags & CLIENT_IO_PENDING_COMMAND) { + if (!isClientReadErrorFatal(c->read_error) && c->io_flags & CLIENT_IO_PENDING_COMMAND) { c->flags |= CLIENT_PENDING_COMMAND; c->flags |= CLIENT_IN_MEMORY_PREFETCH; if (processPendingCommandAndInputBuffer(c) == C_ERR) { @@ -674,20 +674,20 @@ void IOThreadClientsCron(IOThread *t) { /* Process at least a few clients while we are at it, even if we need * to process less than CLIENTS_CRON_MIN_ITERATIONS to meet our contract * of processing each client once per second. */ - int iterations = listLength(t->clients) / CONFIG_DEFAULT_HZ; - if (iterations < CLIENTS_CRON_MIN_ITERATIONS) { - iterations = CLIENTS_CRON_MIN_ITERATIONS; - } - - listIter li; - listNode *ln; - listRewind(t->clients, &li); - while ((ln = listNext(&li)) && iterations--) { - client *c = listNodeValue(ln); - /* Mark the client as pending cron, main thread will process it. */ - c->io_flags |= CLIENT_IO_PENDING_CRON; - enqueuePendingClientsToMainThread(c, 0); - } +// int iterations = listLength(t->clients) / CONFIG_DEFAULT_HZ; +// if (iterations < CLIENTS_CRON_MIN_ITERATIONS) { +// iterations = CLIENTS_CRON_MIN_ITERATIONS; +// } +// +// listIter li; +// listNode *ln; +// listRewind(t->clients, &li); +// while ((ln = listNext(&li)) && iterations--) { +// client *c = listNodeValue(ln); +// /* Mark the client as pending cron, main thread will process it. */ +// c->io_flags |= CLIENT_IO_PENDING_CRON; +// enqueuePendingClientsToMainThread(c, 0); +// } } /* This is the IO thread timer interrupt, CONFIG_DEFAULT_HZ times per second. diff --git a/src/networking.c b/src/networking.c index 5be255afcc2..6cf48b63ed6 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2949,7 +2949,7 @@ void handleClientReadError(client *c) { /* Helper function to check if a read error is fatal (should stop processing) */ -static inline int isClientReadErrorFatal(int read_error) { +int isClientReadErrorFatal(int read_error) { return read_error != 0 && read_error != CLIENT_READ_COMMAND_NOT_FOUND && read_error != CLIENT_READ_BAD_ARITY; @@ -3060,11 +3060,9 @@ int processInputBuffer(client *c) { /* Check if the client has a fatal read error that requires stopping processing. */ if (isClientReadErrorFatal(c->read_error)) { - break; - } - - if (c->running_tid != IOTHREAD_MAIN_THREAD_ID && c->read_error) { - enqueuePendingClientsToMainThread(c, 0); + if (c->running_tid != IOTHREAD_MAIN_THREAD_ID) { + enqueuePendingClientsToMainThread(c, 0); + } break; } diff --git a/src/server.h b/src/server.h index 4355c444dd1..d0a3347ee6c 100644 --- a/src/server.h +++ b/src/server.h @@ -2879,6 +2879,7 @@ void setDeferredMapLen(client *c, void *node, long length); void setDeferredSetLen(client *c, void *node, long length); void setDeferredAttributeLen(client *c, void *node, long length); void setDeferredPushLen(client *c, void *node, long length); +int isClientReadErrorFatal(int read_error); int processInputBuffer(client *c); void acceptCommonHandler(connection *conn, int flags, char *ip); void readQueryFromClient(connection *conn); From 7bffad235f6152e2bcd77683232ac08c1502c4ad Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 2 Oct 2025 13:23:25 +0800 Subject: [PATCH 12/71] Remove consumePendingCommand() --- src/networking.c | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/src/networking.c b/src/networking.c index 6cf48b63ed6..fbe38e2d58d 100644 --- a/src/networking.c +++ b/src/networking.c @@ -33,7 +33,6 @@ static inline int _clientHasPendingRepliesSlave(client *c); static inline int _clientHasPendingRepliesNonSlave(client *c); static inline int _writeToClientNonSlave(client *c, ssize_t *nwritten); static inline int _writeToClientSlave(client *c, ssize_t *nwritten); -static int consumePendingCommand(client *c); int ProcessingEventsWhileBlocked = 0; /* See processEventsWhileBlocked(). */ __thread sds thread_reusable_qb = NULL; __thread int thread_reusable_qb_used = 0; /* Avoid multiple clients using reusable query @@ -3047,7 +3046,20 @@ int processInputBuffer(client *c) { } /* Try to consume the next ready command from the pending command list. */ - if (!consumePendingCommand(c)) break; + if (!c->pending_cmds.ready_len) + break; + pendingCommand *curcmd = c->pending_cmds.head; + + /* We populate the old client fields so we don't have to modify all existing logic to work with pendingCommands */ + c->argc = curcmd->argc; + c->argv = curcmd->argv; + c->argv_len = curcmd->argv_len; + c->net_input_bytes_curr_cmd += curcmd->input_bytes; + c->reploff_next = curcmd->reploff; + c->slot = curcmd->slot; + c->lookedcmd = curcmd->cmd; + c->read_error = curcmd->read_error; + c->current_pending_cmd = curcmd; /* Prefetch the command if we are in the main thread. If running in an IO thread, * prefetch will be deferred until the client is processed by the main thread. */ @@ -4965,23 +4977,3 @@ pendingCommand *popPendingCommandFromTail(pendingCommandList *list) { if (!(cmd->flags & PENDING_CMD_FLAG_INCOMPLETE)) list->ready_len--; return cmd; } - -/* Consumes the first ready command from the pending command list and sets it as - * the client's current command. The command remains in the list but is marked as - * current. Returns 1 on success, 0 if no ready command is available (empty list - * or head command is still parsing). */ -static int consumePendingCommand(client *c) { - pendingCommand *curcmd = c->pending_cmds.head; - if (!curcmd || (curcmd->flags & PENDING_CMD_FLAG_INCOMPLETE)) return 0; - - c->argc = curcmd->argc; - c->argv = curcmd->argv; - c->argv_len = curcmd->argv_len; - c->net_input_bytes_curr_cmd += curcmd->input_bytes; - c->reploff_next = curcmd->reploff; - c->slot = curcmd->slot; - c->lookedcmd = curcmd->cmd; - c->read_error = curcmd->read_error; - c->current_pending_cmd = curcmd; - return 1; -} From 60be3f46f72df14a9981fabbce3d2a7b3195b82e Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 2 Oct 2025 13:51:12 +0800 Subject: [PATCH 13/71] Fix mistake comment out --- src/iothread.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/iothread.c b/src/iothread.c index c94831d2410..4b4751c3647 100644 --- a/src/iothread.c +++ b/src/iothread.c @@ -674,20 +674,20 @@ void IOThreadClientsCron(IOThread *t) { /* Process at least a few clients while we are at it, even if we need * to process less than CLIENTS_CRON_MIN_ITERATIONS to meet our contract * of processing each client once per second. */ -// int iterations = listLength(t->clients) / CONFIG_DEFAULT_HZ; -// if (iterations < CLIENTS_CRON_MIN_ITERATIONS) { -// iterations = CLIENTS_CRON_MIN_ITERATIONS; -// } -// -// listIter li; -// listNode *ln; -// listRewind(t->clients, &li); -// while ((ln = listNext(&li)) && iterations--) { -// client *c = listNodeValue(ln); -// /* Mark the client as pending cron, main thread will process it. */ -// c->io_flags |= CLIENT_IO_PENDING_CRON; -// enqueuePendingClientsToMainThread(c, 0); -// } + int iterations = listLength(t->clients) / CONFIG_DEFAULT_HZ; + if (iterations < CLIENTS_CRON_MIN_ITERATIONS) { + iterations = CLIENTS_CRON_MIN_ITERATIONS; + } + + listIter li; + listNode *ln; + listRewind(t->clients, &li); + while ((ln = listNext(&li)) && iterations--) { + client *c = listNodeValue(ln); + /* Mark the client as pending cron, main thread will process it. */ + c->io_flags |= CLIENT_IO_PENDING_CRON; + enqueuePendingClientsToMainThread(c, 0); + } } /* This is the IO thread timer interrupt, CONFIG_DEFAULT_HZ times per second. From 16d2682e1f2b54fead1bee8c82229318fbaedf9f Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 2 Oct 2025 17:35:43 +0800 Subject: [PATCH 14/71] Move the calucation of lookahead outside of if --- src/networking.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/networking.c b/src/networking.c index fbe38e2d58d..f4402308914 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2960,6 +2960,13 @@ int isClientReadErrorFatal(int read_error) { * pending query buffer, already representing a full command, to process. * return C_ERR in case the client was freed during the processing */ int processInputBuffer(client *c) { + /* We limit the lookahead for unauthenticated connections to 1. + * This is both to reduce memory overhead, and to prevent errors: AUTH can + * affect the handling of succeeding commands. Parsing of "large" + * unauthenticated multibulk commands is rejected, which would cause those + * commands to incorrectly return an error to the client. */ + const int lookahead = authRequired(c) ? 1 : server.lookahead; + /* Keep processing while there is something in the input buffer */ while ((c->querybuf && c->qb_pos < sdslen(c->querybuf)) || c->pending_cmds.ready_len > 0) { @@ -2983,13 +2990,6 @@ int processInputBuffer(client *c) { * The same applies for clients we want to terminate ASAP. */ if (c->flags & (CLIENT_CLOSE_AFTER_REPLY|CLIENT_CLOSE_ASAP)) break; - /* We limit the lookahead for unauthenticated connections to 1. - * This is both to reduce memory overhead, and to prevent errors: AUTH can - * affect the handling of succeeding commands. Parsing of "large" - * unauthenticated multibulk commands is rejected, which would cause those - * commands to incorrectly return an error to the client. */ - const int lookahead = authRequired(c) ? 1 : server.lookahead; - /* Determine if we need to parse more commands from the query buffer. * Only parse when there are no ready commands waiting to be processed. */ const int parse_more = !c->pending_cmds.ready_len; From f080aa60a81c92396edb2b3d31189c5326acf94d Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Fri, 3 Oct 2025 21:08:40 +0800 Subject: [PATCH 15/71] Avoid the client calling the shutdownCommand again after unblocking again --- src/blocked.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/blocked.c b/src/blocked.c index 238a0aacc21..c873093f0ae 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -200,6 +200,10 @@ void unblockClient(client *c, int queue_for_reprocessing) { * unblockClientOnKey is called, which eventually calls processCommand, * which calls reqresAppendResponse) */ prepareForNextCommand(c); + } else if (c->bstate.btype == BLOCKED_SHUTDOWN) { + /* Free the current pending command to prevent it from being executed again + * when the client is unblocked from shutdown state. */ + freeClientPendingCommands(c, 1); } /* Clear the flags, and put the client in the unblocked list so that From 1888ca1595adb111f807e583ecb2d088d8282433 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Sat, 4 Oct 2025 14:29:30 +0800 Subject: [PATCH 16/71] Reset client for blocked shutdown client --- src/blocked.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/blocked.c b/src/blocked.c index c873093f0ae..f79bb28d639 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -204,6 +204,10 @@ void unblockClient(client *c, int queue_for_reprocessing) { /* Free the current pending command to prevent it from being executed again * when the client is unblocked from shutdown state. */ freeClientPendingCommands(c, 1); + c->argv = NULL; + c->argc = 0; + c->argv_len = 0; + c->current_pending_cmd = NULL; } /* Clear the flags, and put the client in the unblocked list so that From 61d639ad454a37df2391dec35044dd23fc634729 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Sat, 4 Oct 2025 21:32:16 +0800 Subject: [PATCH 17/71] Fix race condition for DefaultUser->flags --- src/networking.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/networking.c b/src/networking.c index f4402308914..2e40879cc33 100644 --- a/src/networking.c +++ b/src/networking.c @@ -112,9 +112,14 @@ static void clientSetDefaultAuth(client *c) { int authRequired(client *c) { /* Check if the user is authenticated. This check is skipped in case - * the default user is flagged as "nopass" and is active. */ - int auth_required = (!(DefaultUser->flags & USER_FLAG_NOPASS) || - (DefaultUser->flags & USER_FLAG_DISABLED)) && + * the default user is flagged as "nopass" and is active. + * + * Note that reading DefaultUser flags atomically to minimize race condition. + * In the worst case, we might get a slightly stale value, but this won't + * cause security issues, just potentially one extra command parsing. */ + uint32_t default_flags = DefaultUser->flags; + int auth_required = (!(default_flags & USER_FLAG_NOPASS) || + (default_flags & USER_FLAG_DISABLED)) && !c->authenticated; return auth_required; } From 77c2b72af12825b44159ca2daf6e495c1adf9854 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Sun, 5 Oct 2025 09:27:36 +0800 Subject: [PATCH 18/71] Make user->flags atomic --- src/acl.c | 21 ++++++++------------- src/networking.c | 9 +++------ src/server.h | 2 +- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/src/acl.c b/src/acl.c index b9f81bcc836..639b7699f5b 100644 --- a/src/acl.c +++ b/src/acl.c @@ -421,8 +421,7 @@ user *ACLCreateUser(const char *name, size_t namelen) { if (raxFind(Users,(unsigned char*)name,namelen,NULL)) return NULL; user *u = zmalloc(sizeof(*u)); u->name = sdsnewlen(name,namelen); - u->flags = USER_FLAG_DISABLED; - u->flags |= USER_FLAG_SANITIZE_PAYLOAD; + atomicSet(u->flags, USER_FLAG_DISABLED | USER_FLAG_SANITIZE_PAYLOAD); u->passwords = listCreate(); u->acl_string = NULL; listSetMatchMethod(u->passwords,ACLListMatchSds); @@ -1289,22 +1288,18 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen) { if (oplen == -1) oplen = strlen(op); if (oplen == 0) return C_OK; /* Empty string is a no-operation. */ if (!strcasecmp(op,"on")) { - u->flags |= USER_FLAG_ENABLED; - u->flags &= ~USER_FLAG_DISABLED; + atomicSet(u->flags, (u->flags | USER_FLAG_ENABLED) & ~USER_FLAG_DISABLED); } else if (!strcasecmp(op,"off")) { - u->flags |= USER_FLAG_DISABLED; - u->flags &= ~USER_FLAG_ENABLED; + atomicSet(u->flags, (u->flags | USER_FLAG_DISABLED) & ~USER_FLAG_ENABLED); } else if (!strcasecmp(op,"skip-sanitize-payload")) { - u->flags |= USER_FLAG_SANITIZE_PAYLOAD_SKIP; - u->flags &= ~USER_FLAG_SANITIZE_PAYLOAD; + atomicSet(u->flags, (u->flags | USER_FLAG_SANITIZE_PAYLOAD_SKIP) & ~USER_FLAG_SANITIZE_PAYLOAD); } else if (!strcasecmp(op,"sanitize-payload")) { - u->flags &= ~USER_FLAG_SANITIZE_PAYLOAD_SKIP; - u->flags |= USER_FLAG_SANITIZE_PAYLOAD; + atomicSet(u->flags, (u->flags | USER_FLAG_SANITIZE_PAYLOAD) & ~USER_FLAG_SANITIZE_PAYLOAD_SKIP); } else if (!strcasecmp(op,"nopass")) { - u->flags |= USER_FLAG_NOPASS; + atomicSet(u->flags, u->flags | USER_FLAG_NOPASS); listEmpty(u->passwords); } else if (!strcasecmp(op,"resetpass")) { - u->flags &= ~USER_FLAG_NOPASS; + atomicSet(u->flags, u->flags & ~USER_FLAG_NOPASS); listEmpty(u->passwords); } else if (op[0] == '>' || op[0] == '#') { sds newpass; @@ -1324,7 +1319,7 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen) { listAddNodeTail(u->passwords,newpass); else sdsfree(newpass); - u->flags &= ~USER_FLAG_NOPASS; + atomicSet(u->flags, u->flags & ~USER_FLAG_NOPASS); } else if (op[0] == '<' || op[0] == '!') { sds delpass; if (op[0] == '<') { diff --git a/src/networking.c b/src/networking.c index 2e40879cc33..8dbd9ad2452 100644 --- a/src/networking.c +++ b/src/networking.c @@ -112,12 +112,9 @@ static void clientSetDefaultAuth(client *c) { int authRequired(client *c) { /* Check if the user is authenticated. This check is skipped in case - * the default user is flagged as "nopass" and is active. - * - * Note that reading DefaultUser flags atomically to minimize race condition. - * In the worst case, we might get a slightly stale value, but this won't - * cause security issues, just potentially one extra command parsing. */ - uint32_t default_flags = DefaultUser->flags; + * the default user is flagged as "nopass" and is active. */ + uint32_t default_flags; + atomicGet(DefaultUser->flags, default_flags); int auth_required = (!(default_flags & USER_FLAG_NOPASS) || (default_flags & USER_FLAG_DISABLED)) && !c->authenticated; diff --git a/src/server.h b/src/server.h index d0a3347ee6c..9c7042ea212 100644 --- a/src/server.h +++ b/src/server.h @@ -1243,7 +1243,7 @@ typedef struct pendingCommandList { typedef struct { sds name; /* The username as an SDS string. */ - uint32_t flags; /* See USER_FLAG_* */ + redisAtomic uint32_t flags; /* See USER_FLAG_* */ list *passwords; /* A list of SDS valid passwords for this user. */ list *selectors; /* A list of selectors this user validates commands against. This list will always contain at least From 59ed64016c8fe9d415ae0d684fa7446a5fa27f88 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Sun, 5 Oct 2025 20:22:14 +0800 Subject: [PATCH 19/71] Revert strchr() --- src/networking.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 8dbd9ad2452..aa4a57b4c53 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2430,7 +2430,7 @@ int processInlineBuffer(client *c, pendingCommand *pcmd) { size_t querylen; /* Search for end of line */ - newline = memchr(c->querybuf+c->qb_pos,'\n',sdslen(c->querybuf) - c->qb_pos); + newline = strchr(c->querybuf+c->qb_pos,'\n'); /* Nothing to do without a \r\n */ if (newline == NULL) { @@ -2566,7 +2566,7 @@ static int processMultibulkBuffer(client *c, pendingCommand *pcmd) { serverAssertWithInfo(c,NULL,pcmd->argc == 0); /* Multi bulk length cannot be read without a \r\n */ - newline = memchr(c->querybuf+c->qb_pos,'\r',sdslen(c->querybuf) - c->qb_pos); + newline = strchr(c->querybuf+c->qb_pos,'\r'); if (newline == NULL) { if (querybuf_len-c->qb_pos > PROTO_INLINE_MAX_SIZE) { pcmd->read_error = CLIENT_READ_TOO_BIG_MBULK_COUNT_STRING; From ed08a678f2fb76ff29dd108e524c3318fb3fed20 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Sun, 5 Oct 2025 21:28:32 +0800 Subject: [PATCH 20/71] Add update_slot_stats argument to update cluster slot stats for prepareForNextCommand() Co-authored-by: oranagra --- src/blocked.c | 2 +- src/networking.c | 10 ++++++---- src/server.h | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/blocked.c b/src/blocked.c index f79bb28d639..006ce8877d1 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -199,7 +199,7 @@ void unblockClient(client *c, int queue_for_reprocessing) { * call reqresAppendResponse here (for clients blocked on key, * unblockClientOnKey is called, which eventually calls processCommand, * which calls reqresAppendResponse) */ - prepareForNextCommand(c); + prepareForNextCommand(c, 0); } else if (c->bstate.btype == BLOCKED_SHUTDOWN) { /* Free the current pending command to prevent it from being executed again * when the client is unblocked from shutdown state. */ diff --git a/src/networking.c b/src/networking.c index aa4a57b4c53..82af445f3b2 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2766,10 +2766,13 @@ static int processMultibulkBuffer(client *c, pendingCommand *pcmd) { * 1. Append the response, if necessary. * 2. Reset the client. * 3. Update the all_argv_len_sum counter and advance the pending_cmd cyclic buffer. + * 4. Update the cluster slot stats, if necessary. */ -void prepareForNextCommand(client *c) { +void prepareForNextCommand(client *c, int update_slot_stats) { reqresAppendResponse(c); resetClientInternal(c, 1); + if (update_slot_stats) + clusterSlotStatsAddNetworkBytesInForUserClient(c); } /* Perform necessary tasks after a command was executed: @@ -2787,8 +2790,7 @@ void commandProcessed(client *c) { * since we have not applied the command. */ if (c->flags & CLIENT_BLOCKED) return; - clusterSlotStatsAddNetworkBytesInForUserClient(c); - prepareForNextCommand(c); + prepareForNextCommand(c, 1); long long prev_offset = c->reploff; if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { @@ -3084,7 +3086,7 @@ int processInputBuffer(client *c) { if (!c->argc) { /* A naked newline can be sent from masters as a keep-alive, or from slaves to refresh * the last ACK time. In that case there's no command to actually execute. */ - prepareForNextCommand(c); + prepareForNextCommand(c, 0); } else { /* If we are in the context of an I/O thread, we can't really * execute the command here. All we can do is to flag the client diff --git a/src/server.h b/src/server.h index 9c7042ea212..0ce11b66b29 100644 --- a/src/server.h +++ b/src/server.h @@ -3383,7 +3383,7 @@ uint64_t getCommandFlags(client *c); void preprocessCommand(client *c, pendingCommand *pcmd); int processCommand(client *c); void commandProcessed(client *c); -void prepareForNextCommand(client *c); +void prepareForNextCommand(client *c, int update_slot_stats); int processPendingCommandAndInputBuffer(client *c); int processCommandAndResetClient(client *c); From 4dd99d4c63554098b2a86a902dde74f1ca091c01 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Sun, 5 Oct 2025 21:45:54 +0800 Subject: [PATCH 21/71] call clusterSlotStatsAddNetworkBytesInForUserClient() before resetClientInternal() --- src/networking.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 82af445f3b2..0a9061c1cc3 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2770,9 +2770,11 @@ static int processMultibulkBuffer(client *c, pendingCommand *pcmd) { */ void prepareForNextCommand(client *c, int update_slot_stats) { reqresAppendResponse(c); - resetClientInternal(c, 1); - if (update_slot_stats) + if (update_slot_stats) { + /* We should do this before reset client. */ clusterSlotStatsAddNetworkBytesInForUserClient(c); + } + resetClientInternal(c, 1); } /* Perform necessary tasks after a command was executed: From 8cfae3f8d372838ba099afe302686abfb5060efb Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 8 Oct 2025 08:47:06 +0800 Subject: [PATCH 22/71] use the slot from preprocess() in getNodeByQuery() --- src/cluster.c | 14 ++++++++++++-- src/server.c | 1 + src/server.h | 2 ++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 225d149b92b..5f8c337d9f1 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1150,6 +1150,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in mc.argv = argv; mc.argc = argc; mc.cmd = cmd; + mc.slot = hashslot ? *hashslot : INVALID_CLUSTER_SLOT; } /* Check that all the keys are in the same hash slot, and obtain this @@ -1178,9 +1179,18 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in keyindex = result.keys; for (j = 0; j < numkeys; j++) { + /* The command has keys and was checked for cross-slot between its keys in preprocessCommand() */ + if (pcmd->read_error == CLIENT_READ_CROSS_SLOT) { + /* Error: multiple keys from different slots. */ + if (error_code) + *error_code = CLUSTER_REDIR_CROSS_SLOT; + return NULL; + } + robj *thiskey = margv[keyindex[j].pos]; - int thisslot = keyHashSlot((char*)thiskey->ptr, - sdslen(thiskey->ptr)); + int thisslot = pcmd->slot; + if (thisslot == INVALID_CLUSTER_SLOT) + thisslot = keyHashSlot((char*)thiskey->ptr, sdslen(thiskey->ptr)); if (firstkey == NULL) { /* This is the first key we see. Check what is the slot diff --git a/src/server.c b/src/server.c index a215feeefa4..7c0a92d2eb2 100644 --- a/src/server.c +++ b/src/server.c @@ -4084,6 +4084,7 @@ void preprocessCommand(client *c, pendingCommand *pcmd) { /* Invalidate the slot to indicate that there is a cross-slot error */ pcmd->slot = INVALID_CLUSTER_SLOT; /* Cross slot error. */ + pcmd->read_error = CLIENT_READ_CROSS_SLOT; break; } } diff --git a/src/server.h b/src/server.h index 0ce11b66b29..3bdeda9b4f2 100644 --- a/src/server.h +++ b/src/server.h @@ -467,6 +467,7 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; #define CLIENT_READ_REACHED_MAX_QUERYBUF 13 #define CLIENT_READ_COMMAND_NOT_FOUND 14 #define CLIENT_READ_BAD_ARITY 15 +#define CLIENT_READ_CROSS_SLOT 16 /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ @@ -2363,6 +2364,7 @@ struct pendingCommand { size_t argv_len_sum; /* Sum of lengths of objects in argv list. */ unsigned long long input_bytes; struct redisCommand *cmd; + getKeysResult keys_result; long long reploff; /* c->reploff should be set to this value when the command is processed */ int flags; int slot; /* The slot the command is executing against. Set to INVALID_CLUSTER_SLOT if no slot is being used or if From 5c9460a7b1de11bb057dc409ea1d1de0ef8dffae Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 8 Oct 2025 09:29:14 +0800 Subject: [PATCH 23/71] add CLIENT_READ_CROSS_SLOT for isClientReadErrorFatal() --- src/networking.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 0a9061c1cc3..bf09ba7b2a4 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2957,7 +2957,8 @@ void handleClientReadError(client *c) { int isClientReadErrorFatal(int read_error) { return read_error != 0 && read_error != CLIENT_READ_COMMAND_NOT_FOUND && - read_error != CLIENT_READ_BAD_ARITY; + read_error != CLIENT_READ_BAD_ARITY && + read_error != CLIENT_READ_CROSS_SLOT; } /* This function is called every time, in the client structure 'c', there is From 02ede135eec705e9ed12afd9271f8e93259c9d2f Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 8 Oct 2025 09:49:39 +0800 Subject: [PATCH 24/71] add CLIENT_READ_CROSS_SLOT for isClientReadErrorFatal() --- src/networking.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/networking.c b/src/networking.c index bf09ba7b2a4..0c76a1e7c11 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2941,6 +2941,7 @@ void handleClientReadError(client *c) { } case CLIENT_READ_COMMAND_NOT_FOUND: case CLIENT_READ_BAD_ARITY: + case CLIENT_READ_CROSS_SLOT: /* These are command validation errors, not protocol errors. * They are handled in processCommand() via commandCheckExistence() * and commandCheckArity(), which generate appropriate error responses. From d2050482a8a0dcbdc97a91e147bd9910956e6bec Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 8 Oct 2025 09:57:33 +0800 Subject: [PATCH 25/71] simplify the using of isClientReadErrorFatal() --- src/iothread.c | 4 ++-- src/networking.c | 22 +++++++--------------- src/server.h | 2 +- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/iothread.c b/src/iothread.c index 4b4751c3647..3d6c1b1c7e6 100644 --- a/src/iothread.c +++ b/src/iothread.c @@ -444,7 +444,7 @@ int processClientsFromIOThread(IOThread *t) { /* If a read error occurs, handle it in the main thread first, since we * want to print logs about client information before freeing. */ - if (c->read_error) handleClientReadError(c); + if (isClientReadErrorFatal(c)) handleClientReadError(c); /* The client is asked to close in IO thread. */ if (c->io_flags & CLIENT_IO_CLOSE_ASAP) { @@ -465,7 +465,7 @@ int processClientsFromIOThread(IOThread *t) { } /* Process the pending command and input buffer. */ - if (!isClientReadErrorFatal(c->read_error) && c->io_flags & CLIENT_IO_PENDING_COMMAND) { + if (!isClientReadErrorFatal(c) && c->io_flags & CLIENT_IO_PENDING_COMMAND) { c->flags |= CLIENT_PENDING_COMMAND; c->flags |= CLIENT_IN_MEMORY_PREFETCH; if (processPendingCommandAndInputBuffer(c) == C_ERR) { diff --git a/src/networking.c b/src/networking.c index 0c76a1e7c11..902925afe2a 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2939,14 +2939,6 @@ void handleClientReadError(client *c) { sdsfree(bytes); break; } - case CLIENT_READ_COMMAND_NOT_FOUND: - case CLIENT_READ_BAD_ARITY: - case CLIENT_READ_CROSS_SLOT: - /* These are command validation errors, not protocol errors. - * They are handled in processCommand() via commandCheckExistence() - * and commandCheckArity(), which generate appropriate error responses. - * No action needed here. */ - break; default: serverPanic("Unknown client read error: %d", c->read_error); break; @@ -2955,11 +2947,11 @@ void handleClientReadError(client *c) { /* Helper function to check if a read error is fatal (should stop processing) */ -int isClientReadErrorFatal(int read_error) { - return read_error != 0 && - read_error != CLIENT_READ_COMMAND_NOT_FOUND && - read_error != CLIENT_READ_BAD_ARITY && - read_error != CLIENT_READ_CROSS_SLOT; +int isClientReadErrorFatal(client *c) { + return c->read_error != 0 && + c->read_error != CLIENT_READ_COMMAND_NOT_FOUND && + c->read_error != CLIENT_READ_BAD_ARITY && + c->read_error != CLIENT_READ_CROSS_SLOT; } /* This function is called every time, in the client structure 'c', there is @@ -3079,7 +3071,7 @@ int processInputBuffer(client *c) { } /* Check if the client has a fatal read error that requires stopping processing. */ - if (isClientReadErrorFatal(c->read_error)) { + if (isClientReadErrorFatal(c)) { if (c->running_tid != IOTHREAD_MAIN_THREAD_ID) { enqueuePendingClientsToMainThread(c, 0); } @@ -3269,7 +3261,7 @@ void readQueryFromClient(connection *conn) { c = NULL; done: - if (c && c->read_error) { + if (c && isClientReadErrorFatal(c)) { if (c->running_tid == IOTHREAD_MAIN_THREAD_ID) { handleClientReadError(c); } diff --git a/src/server.h b/src/server.h index 3bdeda9b4f2..d6812260ed5 100644 --- a/src/server.h +++ b/src/server.h @@ -2881,7 +2881,7 @@ void setDeferredMapLen(client *c, void *node, long length); void setDeferredSetLen(client *c, void *node, long length); void setDeferredAttributeLen(client *c, void *node, long length); void setDeferredPushLen(client *c, void *node, long length); -int isClientReadErrorFatal(int read_error); +int isClientReadErrorFatal(client *c); int processInputBuffer(client *c); void acceptCommonHandler(connection *conn, int flags, char *ip); void readQueryFromClient(connection *conn); From 3911dc12a6a0c85eabd28cde274bd56720b6179a Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 9 Oct 2025 09:29:05 +0800 Subject: [PATCH 26/71] cache the key result int pending command --- src/cluster.c | 21 ++++++++++++++------- src/cluster.h | 2 +- src/module.c | 6 +++++- src/networking.c | 2 ++ src/script.c | 2 +- src/server.c | 18 +++++++++++++----- src/server.h | 1 + tests/unit/memefficiency.tcl | 1 - 8 files changed, 37 insertions(+), 16 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 5f8c337d9f1..82014d86f37 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1107,7 +1107,7 @@ void clusterCommand(client *c) { * * CLUSTER_REDIR_DOWN_STATE and CLUSTER_REDIR_DOWN_RO_STATE if the cluster is * down but the user attempts to execute a command that addresses one or more keys. */ -clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, uint64_t cmd_flags, int *error_code) { +clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, getKeysResult *keys_result, uint64_t cmd_flags, int *error_code) { clusterNode *myself = getMyClusterNode(); clusterNode *n = NULL; robj *firstkey = NULL; @@ -1151,6 +1151,10 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in mc.argc = argc; mc.cmd = cmd; mc.slot = hashslot ? *hashslot : INVALID_CLUSTER_SLOT; + if (keys_result) + mc.keys_result = *keys_result; + else + mc.flags |= PENDING_CMD_KEYRESULT_INVALID; } /* Check that all the keys are in the same hash slot, and obtain this @@ -1158,7 +1162,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in for (i = 0; i < ms->count; i++) { struct redisCommand *mcmd; robj **margv; - int margc, numkeys, j; + int margc, j; keyReference *keyindex; pendingCommand *pcmd = ms->commands[i]; @@ -1175,10 +1179,13 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in } getKeysResult result = GETKEYS_RESULT_INIT; - numkeys = getKeysFromCommand(mcmd,margv,margc,&result); + if (pcmd->flags & PENDING_CMD_KEYRESULT_INVALID) + getKeysFromCommand(mcmd,margv,margc,&result); + else + result = pcmd->keys_result; keyindex = result.keys; - for (j = 0; j < numkeys; j++) { + for (j = 0; j < result.numkeys; j++) { /* The command has keys and was checked for cross-slot between its keys in preprocessCommand() */ if (pcmd->read_error == CLIENT_READ_CROSS_SLOT) { /* Error: multiple keys from different slots. */ @@ -1204,7 +1211,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in * not trapped earlier in processCommand(). Report the same * error to the client. */ if (n == NULL) { - getKeysFreeResult(&result); + if (!keys_result) getKeysFreeResult(&result); if (error_code) *error_code = CLUSTER_REDIR_DOWN_UNBOUND; return NULL; @@ -1227,7 +1234,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in * the same key/channel as the first we saw. */ if (slot != thisslot) { /* Error: multiple keys from different slots. */ - getKeysFreeResult(&result); + if (!keys_result) getKeysFreeResult(&result); if (error_code) *error_code = CLUSTER_REDIR_CROSS_SLOT; return NULL; @@ -1252,7 +1259,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in else existing_keys++; } } - getKeysFreeResult(&result); + if (!keys_result) getKeysFreeResult(&result); } /* No key at all in command? then we can serve the request diff --git a/src/cluster.h b/src/cluster.h index e9f4df7553d..eacf3eaedf6 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -149,7 +149,7 @@ unsigned int countKeysInSlot(unsigned int slot); int getSlotOrReply(client *c, robj *o); /* functions with shared implementations */ -clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, uint64_t cmd_flags, int *error_code); +clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, getKeysResult *result, uint64_t cmd_flags, int *error_code); int clusterRedirectBlockedClientIfNeeded(client *c); void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code); void migrateCloseTimedoutSockets(void); diff --git a/src/module.c b/src/module.c index 1a6360f1a55..fd09ef60030 100644 --- a/src/module.c +++ b/src/module.c @@ -6657,7 +6657,7 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch c->flags &= ~(CLIENT_READONLY|CLIENT_ASKING); c->flags |= ctx->client->flags & (CLIENT_READONLY|CLIENT_ASKING); const uint64_t cmd_flags = getCommandFlags(c); - if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,cmd_flags,&error_code) != + if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,NULL,cmd_flags,&error_code) != getMyClusterNode()) { sds msg = NULL; @@ -11050,6 +11050,10 @@ void moduleCallCommandFilters(client *c) { pcmd->argc = filter.argc; pcmd->argv_len = filter.argv_len; pcmd->cmd = NULL; + pcmd->slot = INVALID_CLUSTER_SLOT; + pcmd->flags |= PENDING_CMD_KEYRESULT_INVALID; + getKeysFreeResult(&pcmd->keys_result); + pcmd->keys_result = (getKeysResult)GETKEYS_RESULT_INIT; } } diff --git a/src/networking.c b/src/networking.c index 902925afe2a..98833e5bd0f 100644 --- a/src/networking.c +++ b/src/networking.c @@ -4913,6 +4913,8 @@ void freePendingCommand(client *c, pendingCommand *pcmd) { if (!pcmd) return; + getKeysFreeResult(&pcmd->keys_result); + if (pcmd->argv) { for (int j = 0; j < pcmd->argc; j++) decrRefCount(pcmd->argv[j]); diff --git a/src/script.c b/src/script.c index dfbca951135..fbb63852d3d 100644 --- a/src/script.c +++ b/src/script.c @@ -486,7 +486,7 @@ static int scriptVerifyClusterState(scriptRunCtx *run_ctx, client *c, client *or c->flags |= original_c->flags & (CLIENT_READONLY | CLIENT_ASKING); const uint64_t cmd_flags = getCommandFlags(c); int hashslot = -1; - if (getNodeByQuery(c, c->cmd, c->argv, c->argc, &hashslot, cmd_flags, &error_code) != getMyClusterNode()) { + if (getNodeByQuery(c, c->cmd, c->argv, c->argc, &hashslot, NULL, cmd_flags, &error_code) != getMyClusterNode()) { if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) { *err = sdsnew( "Script attempted to execute a write command while the " diff --git a/src/server.c b/src/server.c index 7c0a92d2eb2..fcae00be057 100644 --- a/src/server.c +++ b/src/server.c @@ -4071,11 +4071,17 @@ void preprocessCommand(client *c, pendingCommand *pcmd) { return; } + pcmd->keys_result = (getKeysResult)GETKEYS_RESULT_INIT; + int numkeys = getKeysFromCommand(pcmd->cmd, pcmd->argv, pcmd->argc, &pcmd->keys_result); + if (numkeys < 0) { + pcmd->flags |= PENDING_CMD_KEYRESULT_INVALID; + /* We skip the checks below since We expect the command to be rejected in this case */ + return; + } + if (server.cluster_enabled) { - getKeysResult result = (getKeysResult)GETKEYS_RESULT_INIT; - int numkeys = getKeysFromCommand(pcmd->cmd, pcmd->argv, pcmd->argc, &result); for (int i = 0; i < numkeys; i++) { - robj *thiskey = pcmd->argv[result.keys[i].pos]; + robj *thiskey = pcmd->argv[pcmd->keys_result.keys[i].pos]; int thisslot = (int)keyHashSlot((char*)thiskey->ptr, sdslen(thiskey->ptr)); if (pcmd->slot == INVALID_CLUSTER_SLOT) { pcmd->slot = thisslot; @@ -4088,7 +4094,6 @@ void preprocessCommand(client *c, pendingCommand *pcmd) { break; } } - getKeysFreeResult(&result); } } @@ -4242,8 +4247,11 @@ int processCommand(client *c) { c->cmd->proc != execCommand)) { int error_code; + pendingCommand *pcmd = c->pending_cmds.head; + getKeysResult *key_result = (pcmd->flags & PENDING_CMD_KEYRESULT_INVALID) ? NULL : &pcmd->keys_result; + // getKeysResult *key_result = NULL; clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc, - &c->slot,cmd_flags,&error_code); + &c->slot,key_result,cmd_flags,&error_code); if (n == NULL || !clusterNodeIsMyself(n)) { if (c->cmd->proc == execCommand) { discardTransaction(c); diff --git a/src/server.h b/src/server.h index d6812260ed5..7bb8fe65177 100644 --- a/src/server.h +++ b/src/server.h @@ -2354,6 +2354,7 @@ typedef struct { /* pendingCommand flags */ enum { PENDING_CMD_FLAG_INCOMPLETE = 1 << 0, /* Command parsing is incomplete, still waiting for more data */ + PENDING_CMD_KEYRESULT_INVALID = 1 << 1, /* Key result is invalid, needs to be recomputed */ }; /* Parser state and parse result of a command from a client's input buffer. */ diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl index 5be6db0278d..2a1a9e641e0 100644 --- a/tests/unit/memefficiency.tcl +++ b/tests/unit/memefficiency.tcl @@ -781,7 +781,6 @@ run_solo {defrag} { if {$::verbose} { puts "frag $frag" } - puts 1111 assert {$frag >= $expected_frag} r config set latency-monitor-threshold 5 From 7f8aabfbfb14c53a26d7d4a31e0fc177cb0962d3 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 9 Oct 2025 09:39:52 +0800 Subject: [PATCH 27/71] Smoking test --- tests/integration/shutdown.tcl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/integration/shutdown.tcl b/tests/integration/shutdown.tcl index 4169d64b7eb..2c05caf10a3 100644 --- a/tests/integration/shutdown.tcl +++ b/tests/integration/shutdown.tcl @@ -177,6 +177,12 @@ test "Shutting down master waits for replica then fails" { catch { $rd2 read } e2 assert_match "*Errors trying to SHUTDOWN. Check logs*" $e1 assert_match "*Errors trying to SHUTDOWN. Check logs*" $e2 + + # Verify that after shutdown is cancelled, the client is properly + # reset and can handle other commands normally. + $rd1 PING + assert_equal "PONG" [$rd1 read] + $rd1 close $rd2 close From 2b01213f5f79cd57467b75c59acff7da37f88377 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 9 Oct 2025 09:45:31 +0800 Subject: [PATCH 28/71] Fix client not being properly reset after shutdown cancellation --- src/blocked.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/blocked.c b/src/blocked.c index 8d15f9de3de..2a005be01f8 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -122,6 +122,18 @@ void processUnblockedClients(void) { listDelNode(server.unblocked_clients,ln); c->flags &= ~CLIENT_UNBLOCKED; + /* Reset the client for a new query, unless the client has pending command to process + * or in case a shutdown operation was canceled and we are still in the processCommand sequence */ + if (!(c->flags & CLIENT_PENDING_COMMAND)) { + freeClientOriginalArgv(c); + /* Clients that are not blocked on keys are not reprocessed so we must + * call reqresAppendResponse here (for clients blocked on key, + * unblockClientOnKey is called, which eventually calls processCommand, + * which calls reqresAppendResponse) */ + reqresAppendResponse(c); + resetClient(c); + } + if (c->flags & CLIENT_MODULE) { if (!(c->flags & CLIENT_BLOCKED)) { moduleCallCommandUnblockedHandler(c); @@ -191,17 +203,6 @@ void unblockClient(client *c, int queue_for_reprocessing) { serverPanic("Unknown btype in unblockClient()."); } - /* Reset the client for a new query, unless the client has pending command to process - * or in case a shutdown operation was canceled and we are still in the processCommand sequence */ - if (!(c->flags & CLIENT_PENDING_COMMAND) && c->bstate.btype != BLOCKED_SHUTDOWN) { - freeClientOriginalArgv(c); - /* Clients that are not blocked on keys are not reprocessed so we must - * call reqresAppendResponse here (for clients blocked on key, - * unblockClientOnKey is called, which eventually calls processCommand, - * which calls reqresAppendResponse) */ - reqresAppendResponse(c); - resetClient(c); - } /* Clear the flags, and put the client in the unblocked list so that * we'll process new commands in its query buffer ASAP. */ @@ -266,6 +267,7 @@ void replyToClientsBlockedOnShutdown(void) { while((ln = listNext(&li))) { client *c = listNodeValue(ln); if (c->flags & CLIENT_BLOCKED && c->bstate.btype == BLOCKED_SHUTDOWN) { + c->duration = 0; addReplyError(c, "Errors trying to SHUTDOWN. Check logs."); unblockClient(c, 1); } From c1ebab3a5642632e685d3868dc4ef6986fad23ee Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 9 Oct 2025 10:55:32 +0800 Subject: [PATCH 29/71] Cleanup --- src/server.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/server.c b/src/server.c index fcae00be057..9af6ada9b53 100644 --- a/src/server.c +++ b/src/server.c @@ -4249,7 +4249,6 @@ int processCommand(client *c) { int error_code; pendingCommand *pcmd = c->pending_cmds.head; getKeysResult *key_result = (pcmd->flags & PENDING_CMD_KEYRESULT_INVALID) ? NULL : &pcmd->keys_result; - // getKeysResult *key_result = NULL; clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc, &c->slot,key_result,cmd_flags,&error_code); if (n == NULL || !clusterNodeIsMyself(n)) { From 4e6a35703a186f2e23ddb7720b71218afa88b584 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 9 Oct 2025 12:08:54 +0800 Subject: [PATCH 30/71] Fix unnecessary memory prefetch when iothread is disabled --- src/networking.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/networking.c b/src/networking.c index 98833e5bd0f..3e0779ebad1 100644 --- a/src/networking.c +++ b/src/networking.c @@ -3061,9 +3061,12 @@ int processInputBuffer(client *c) { c->read_error = curcmd->read_error; c->current_pending_cmd = curcmd; - /* Prefetch the command if we are in the main thread. If running in an IO thread, - * prefetch will be deferred until the client is processed by the main thread. */ - if (c->running_tid == IOTHREAD_MAIN_THREAD_ID && !(c->flags & CLIENT_IN_MEMORY_PREFETCH)) { + /* Prefetch the command only when more commands have been parsed and we + * are in the main thread. If running in an IO thread, prefetch will be + * deferred until the client is processed by the main thread. */ + if (parse_more && c->running_tid == IOTHREAD_MAIN_THREAD_ID && + !(c->flags & CLIENT_IN_MEMORY_PREFETCH)) + { /* Prefetch the commands. */ resetCommandsBatch(); addCommandToBatch(c); From 025041526e69ff4dd865b4d64f48642e781b1891 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 9 Oct 2025 21:03:10 +0800 Subject: [PATCH 31/71] Use cached getKeysResult for other places Co-authored-by: oranagra --- src/acl.c | 24 ++++++++++++++++-------- src/cluster.c | 12 ++++++------ src/memory_prefetch.c | 8 +++----- src/module.c | 8 +++++--- src/networking.c | 6 ++++++ src/script_lua.c | 2 +- src/server.c | 6 ++---- src/server.h | 7 ++++--- src/sort.c | 2 +- 9 files changed, 44 insertions(+), 31 deletions(-) diff --git a/src/acl.c b/src/acl.c index 639b7699f5b..a4687bad119 100644 --- a/src/acl.c +++ b/src/acl.c @@ -1788,7 +1788,7 @@ int ACLUserCheckKeyPerm(user *u, const char *key, int keylen, int flags) { * granted in addition to the access required by the command. Returns 1 * if the user has access or 0 otherwise. */ -int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, robj **argv, int argc, int flags) { +int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, robj **argv, int argc, getKeysResult *key_result, int flags) { listIter li; listNode *ln; int local_idxptr; @@ -1800,6 +1800,10 @@ int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, * calls to prevent duplicate lookups. */ aclKeyResultCache cache; initACLKeyResultCache(&cache); + if (key_result) { + cache.keys = *key_result; + cache.keys_init = 1; + } /* Check each selector sequentially */ listRewind(u->selectors,&li); @@ -1807,11 +1811,11 @@ int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, aclSelector *s = (aclSelector *) listNodeValue(ln); int acl_retval = ACLSelectorCheckCmd(s, cmd, argv, argc, &local_idxptr, &cache); if (acl_retval == ACL_OK && ACLSelectorHasUnrestrictedKeyAccess(s, flags)) { - cleanupACLKeyResultCache(&cache); + if (!key_result) cleanupACLKeyResultCache(&cache); return 1; } } - cleanupACLKeyResultCache(&cache); + if (!key_result) cleanupACLKeyResultCache(&cache); return 0; } @@ -1847,7 +1851,7 @@ int ACLUserCheckChannelPerm(user *u, sds channel, int is_pattern) { * If the command fails an ACL check, idxptr will be to set to the first argv entry that * causes the failure, either 0 if the command itself fails or the idx of the key/channel * that causes the failure */ -int ACLCheckAllUserCommandPerm(user *u, struct redisCommand *cmd, robj **argv, int argc, int *idxptr) { +int ACLCheckAllUserCommandPerm(user *u, struct redisCommand *cmd, robj **argv, int argc, getKeysResult *key_result, int *idxptr) { listIter li; listNode *ln; @@ -1864,6 +1868,10 @@ int ACLCheckAllUserCommandPerm(user *u, struct redisCommand *cmd, robj **argv, i * calls to prevent duplicate lookups. */ aclKeyResultCache cache; initACLKeyResultCache(&cache); + if (key_result) { + cache.keys = *key_result; + cache.keys_init = 1; + } /* Check each selector sequentially */ listRewind(u->selectors,&li); @@ -1871,7 +1879,7 @@ int ACLCheckAllUserCommandPerm(user *u, struct redisCommand *cmd, robj **argv, i aclSelector *s = (aclSelector *) listNodeValue(ln); int acl_retval = ACLSelectorCheckCmd(s, cmd, argv, argc, &local_idxptr, &cache); if (acl_retval == ACL_OK) { - cleanupACLKeyResultCache(&cache); + if (!key_result) cleanupACLKeyResultCache(&cache); return ACL_OK; } if (acl_retval > relevant_error || @@ -1883,13 +1891,13 @@ int ACLCheckAllUserCommandPerm(user *u, struct redisCommand *cmd, robj **argv, i } *idxptr = last_idx; - cleanupACLKeyResultCache(&cache); + if (!key_result) cleanupACLKeyResultCache(&cache); return relevant_error; } /* High level API for checking if a client can execute the queued up command */ int ACLCheckAllPerm(client *c, int *idxptr) { - return ACLCheckAllUserCommandPerm(c->user, c->cmd, c->argv, c->argc, idxptr); + return ACLCheckAllUserCommandPerm(c->user, c->cmd, c->argv, c->argc, NULL, idxptr); } /* If 'new' can access all channels 'original' could then return NULL; @@ -3139,7 +3147,7 @@ void aclCommand(client *c) { } int idx; - int result = ACLCheckAllUserCommandPerm(u, cmd, c->argv + 3, c->argc - 3, &idx); + int result = ACLCheckAllUserCommandPerm(u, cmd, c->argv + 3, c->argc - 3, NULL, &idx); if (result != ACL_OK) { sds err = getAclErrorMessage(result, u, cmd, c->argv[idx+3]->ptr, 1); addReplyBulkSds(c, err); diff --git a/src/cluster.c b/src/cluster.c index 82014d86f37..7843bfef82b 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1151,10 +1151,10 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in mc.argc = argc; mc.cmd = cmd; mc.slot = hashslot ? *hashslot : INVALID_CLUSTER_SLOT; - if (keys_result) + if (keys_result) { mc.keys_result = *keys_result; - else - mc.flags |= PENDING_CMD_KEYRESULT_INVALID; + mc.flags |= PENDING_CMD_KEYRESULT_VALID; + } } /* Check that all the keys are in the same hash slot, and obtain this @@ -1179,10 +1179,10 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in } getKeysResult result = GETKEYS_RESULT_INIT; - if (pcmd->flags & PENDING_CMD_KEYRESULT_INVALID) - getKeysFromCommand(mcmd,margv,margc,&result); - else + if (likely(pcmd->flags & PENDING_CMD_KEYRESULT_VALID)) result = pcmd->keys_result; + else + getKeysFromCommand(mcmd,margv,margc,&result); keyindex = result.keys; for (j = 0; j < result.numkeys; j++) { diff --git a/src/memory_prefetch.c b/src/memory_prefetch.c index fee24b43171..3ba2c8a9924 100644 --- a/src/memory_prefetch.c +++ b/src/memory_prefetch.c @@ -387,15 +387,13 @@ int addCommandToBatch(client *c) { /* Skip commands that have not been preprocessed, or have errors. */ if ((pcmd->flags & PENDING_CMD_FLAG_INCOMPLETE) || !pcmd->cmd || pcmd->read_error) break; - getKeysResult result = GETKEYS_RESULT_INIT; - int numkeys = getKeysFromCommand(pcmd->cmd, pcmd->argv, pcmd->argc, &result); - for (int i = 0; i < numkeys && batch->key_count < batch->max_prefetch_size; i++) { - batch->keys[batch->key_count] = pcmd->argv[result.keys[i].pos]; + serverAssert(pcmd->flags & PENDING_CMD_KEYRESULT_VALID); + for (int i = 0; i < pcmd->keys_result.numkeys && batch->key_count < batch->max_prefetch_size; i++) { + batch->keys[batch->key_count] = pcmd->argv[pcmd->keys_result.keys[i].pos]; batch->keys_dicts[batch->key_count] = kvstoreGetDict(c->db->keys, pcmd->slot > 0 ? pcmd->slot : 0); batch->key_count++; } - getKeysFreeResult(&result); pcmd = pcmd->next; } diff --git a/src/module.c b/src/module.c index fd09ef60030..62b3709a95d 100644 --- a/src/module.c +++ b/src/module.c @@ -6632,7 +6632,7 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch int acl_errpos; int acl_retval; - acl_retval = ACLCheckAllUserCommandPerm(user,c->cmd,c->argv,c->argc,&acl_errpos); + acl_retval = ACLCheckAllUserCommandPerm(user,c->cmd,c->argv,c->argc,NULL,&acl_errpos); if (acl_retval != ACL_OK) { sds object = (acl_retval == ACL_DENIED_CMD) ? sdsdup(c->cmd->fullname) : sdsdup(c->argv[acl_errpos]->ptr); addACLLogEntry(ctx->client, acl_retval, ACL_LOG_CTX_MODULE, -1, c->user->name, object); @@ -9901,7 +9901,7 @@ int RM_ACLCheckCommandPermissions(RedisModuleUser *user, RedisModuleString **arg return REDISMODULE_ERR; } - if (ACLCheckAllUserCommandPerm(user->user, cmd, argv, argc, &keyidxptr) != ACL_OK) { + if (ACLCheckAllUserCommandPerm(user->user, cmd, argv, argc, NULL, &keyidxptr) != ACL_OK) { errno = EACCES; return REDISMODULE_ERR; } @@ -11051,8 +11051,10 @@ void moduleCallCommandFilters(client *c) { pcmd->argv_len = filter.argv_len; pcmd->cmd = NULL; pcmd->slot = INVALID_CLUSTER_SLOT; - pcmd->flags |= PENDING_CMD_KEYRESULT_INVALID; + + /* Reset keys result */ getKeysFreeResult(&pcmd->keys_result); + pcmd->flags &= ~PENDING_CMD_KEYRESULT_VALID; pcmd->keys_result = (getKeysResult)GETKEYS_RESULT_INIT; } } diff --git a/src/networking.c b/src/networking.c index 3e0779ebad1..88213e9dea8 100644 --- a/src/networking.c +++ b/src/networking.c @@ -4982,3 +4982,9 @@ pendingCommand *popPendingCommandFromTail(pendingCommandList *list) { if (!(cmd->flags & PENDING_CMD_FLAG_INCOMPLETE)) list->ready_len--; return cmd; } + +getKeysResult *getClientCachedKeyResult(client *c) { + if (c->current_pending_cmd && c->current_pending_cmd->flags & PENDING_CMD_KEYRESULT_VALID) + return &c->current_pending_cmd->keys_result; + return NULL; +} diff --git a/src/script_lua.c b/src/script_lua.c index fc3cf6e9b4d..f742611c77a 100644 --- a/src/script_lua.c +++ b/src/script_lua.c @@ -1135,7 +1135,7 @@ static int luaRedisAclCheckCmdPermissionsCommand(lua_State *lua) { raise_error = 1; } else { int keyidxptr; - if (ACLCheckAllUserCommandPerm(rctx->original_client->user, cmd, argv, argc, &keyidxptr) != ACL_OK) { + if (ACLCheckAllUserCommandPerm(rctx->original_client->user, cmd, argv, argc, NULL, &keyidxptr) != ACL_OK) { lua_pushboolean(lua, 0); } else { lua_pushboolean(lua, 1); diff --git a/src/server.c b/src/server.c index 9af6ada9b53..b4be950d677 100644 --- a/src/server.c +++ b/src/server.c @@ -4074,10 +4074,10 @@ void preprocessCommand(client *c, pendingCommand *pcmd) { pcmd->keys_result = (getKeysResult)GETKEYS_RESULT_INIT; int numkeys = getKeysFromCommand(pcmd->cmd, pcmd->argv, pcmd->argc, &pcmd->keys_result); if (numkeys < 0) { - pcmd->flags |= PENDING_CMD_KEYRESULT_INVALID; /* We skip the checks below since We expect the command to be rejected in this case */ return; } + pcmd->flags |= PENDING_CMD_KEYRESULT_VALID; if (server.cluster_enabled) { for (int i = 0; i < numkeys; i++) { @@ -4247,10 +4247,8 @@ int processCommand(client *c) { c->cmd->proc != execCommand)) { int error_code; - pendingCommand *pcmd = c->pending_cmds.head; - getKeysResult *key_result = (pcmd->flags & PENDING_CMD_KEYRESULT_INVALID) ? NULL : &pcmd->keys_result; clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc, - &c->slot,key_result,cmd_flags,&error_code); + &c->slot,getClientCachedKeyResult(c),cmd_flags,&error_code); if (n == NULL || !clusterNodeIsMyself(n)) { if (c->cmd->proc == execCommand) { discardTransaction(c); diff --git a/src/server.h b/src/server.h index 7bb8fe65177..ba013881662 100644 --- a/src/server.h +++ b/src/server.h @@ -2354,7 +2354,7 @@ typedef struct { /* pendingCommand flags */ enum { PENDING_CMD_FLAG_INCOMPLETE = 1 << 0, /* Command parsing is incomplete, still waiting for more data */ - PENDING_CMD_KEYRESULT_INVALID = 1 << 1, /* Key result is invalid, needs to be recomputed */ + PENDING_CMD_KEYRESULT_VALID = 1 << 1, /* Command's keys_result is valid and cached */ }; /* Parser state and parse result of a command from a client's input buffer. */ @@ -2977,6 +2977,7 @@ void unprotectClient(client *c); client *lookupClientByID(uint64_t id); int authRequired(client *c); void putClientInPendingWriteQueue(client *c); +getKeysResult *getClientCachedKeyResult(client *c); /* reply macros */ #define ADD_REPLY_BULK_CBUFFER_STRING_CONSTANT(c, str) addReplyBulkCBuffer(c, str, strlen(str)) @@ -3289,8 +3290,8 @@ void ACLClearCommandID(void); user *ACLGetUserByName(const char *name, size_t namelen); int ACLUserCheckKeyPerm(user *u, const char *key, int keylen, int flags); int ACLUserCheckChannelPerm(user *u, sds channel, int literal); -int ACLCheckAllUserCommandPerm(user *u, struct redisCommand *cmd, robj **argv, int argc, int *idxptr); -int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, robj **argv, int argc, int flags); +int ACLCheckAllUserCommandPerm(user *u, struct redisCommand *cmd, robj **argv, int argc, getKeysResult *key_result, int *idxptr); +int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, robj **argv, int argc, getKeysResult *key_result, int flags); int ACLCheckAllPerm(client *c, int *idxptr); int ACLSetUser(user *u, const char *op, ssize_t oplen); sds ACLStringSetUser(user *u, sds username, sds *argv, int argc); diff --git a/src/sort.c b/src/sort.c index 3d53eec8883..fef6f92737c 100644 --- a/src/sort.c +++ b/src/sort.c @@ -193,7 +193,7 @@ void sortCommandGeneric(client *c, int readonly) { listSetFreeMethod(operations,zfree); j = 2; /* options start at argv[2] */ - user_has_full_key_access = ACLUserCheckCmdWithUnrestrictedKeyAccess(c->user, c->cmd, c->argv, c->argc, CMD_KEY_ACCESS); + user_has_full_key_access = ACLUserCheckCmdWithUnrestrictedKeyAccess(c->user, c->cmd, c->argv, c->argc, getClientCachedKeyResult(c), CMD_KEY_ACCESS); /* The SORT command has an SQL-alike syntax, parse it */ while(j < c->argc) { From 47a20c45917576af9c66fd1e00fae6855fb59f5e Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 9 Oct 2025 21:10:56 +0800 Subject: [PATCH 32/71] Update outdated comment --- src/blocked.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/blocked.c b/src/blocked.c index 2a005be01f8..8ed0109922f 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -122,8 +122,7 @@ void processUnblockedClients(void) { listDelNode(server.unblocked_clients,ln); c->flags &= ~CLIENT_UNBLOCKED; - /* Reset the client for a new query, unless the client has pending command to process - * or in case a shutdown operation was canceled and we are still in the processCommand sequence */ + /* Reset the client for a new query, unless the client has pending command to process. */ if (!(c->flags & CLIENT_PENDING_COMMAND)) { freeClientOriginalArgv(c); /* Clients that are not blocked on keys are not reprocessed so we must From 1ad40059c0e4ab34ca1104bfe98469fd5040d707 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Fri, 10 Oct 2025 10:33:32 +0800 Subject: [PATCH 33/71] Change the way to get getKeysResult in preprocess() --- src/acl.c | 12 ++++-------- src/cluster.c | 25 +++++++++++++++++++++++++ src/cluster.h | 1 + src/db.c | 32 ++++++++++++++++++++++++++++++++ src/server.c | 11 +++++++---- src/server.h | 3 ++- src/sort.c | 2 +- 7 files changed, 72 insertions(+), 14 deletions(-) diff --git a/src/acl.c b/src/acl.c index a4687bad119..6483cbe9f64 100644 --- a/src/acl.c +++ b/src/acl.c @@ -1788,7 +1788,7 @@ int ACLUserCheckKeyPerm(user *u, const char *key, int keylen, int flags) { * granted in addition to the access required by the command. Returns 1 * if the user has access or 0 otherwise. */ -int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, robj **argv, int argc, getKeysResult *key_result, int flags) { +int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, robj **argv, int argc, int flags) { listIter li; listNode *ln; int local_idxptr; @@ -1800,10 +1800,6 @@ int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, * calls to prevent duplicate lookups. */ aclKeyResultCache cache; initACLKeyResultCache(&cache); - if (key_result) { - cache.keys = *key_result; - cache.keys_init = 1; - } /* Check each selector sequentially */ listRewind(u->selectors,&li); @@ -1811,11 +1807,11 @@ int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, aclSelector *s = (aclSelector *) listNodeValue(ln); int acl_retval = ACLSelectorCheckCmd(s, cmd, argv, argc, &local_idxptr, &cache); if (acl_retval == ACL_OK && ACLSelectorHasUnrestrictedKeyAccess(s, flags)) { - if (!key_result) cleanupACLKeyResultCache(&cache); + cleanupACLKeyResultCache(&cache); return 1; } } - if (!key_result) cleanupACLKeyResultCache(&cache); + cleanupACLKeyResultCache(&cache); return 0; } @@ -1897,7 +1893,7 @@ int ACLCheckAllUserCommandPerm(user *u, struct redisCommand *cmd, robj **argv, i /* High level API for checking if a client can execute the queued up command */ int ACLCheckAllPerm(client *c, int *idxptr) { - return ACLCheckAllUserCommandPerm(c->user, c->cmd, c->argv, c->argc, NULL, idxptr); + return ACLCheckAllUserCommandPerm(c->user, c->cmd, c->argv, c->argc, getClientCachedKeyResult(c), idxptr); } /* If 'new' can access all channels 'original' could then return NULL; diff --git a/src/cluster.c b/src/cluster.c index 7843bfef82b..aaa89332af0 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1075,6 +1075,31 @@ void clusterCommand(client *c) { } } +/* Extract slot number from keys in a keys_result structure and return to caller. + * Returns INVALID_CLUSTER_SLOT if keys belong to different slots (cross-slot error), + * or if there are no keys. + */ +int extractSlotFromKeysResult(robj **argv, getKeysResult *keys_result) { + if (keys_result->numkeys == 0) + return INVALID_CLUSTER_SLOT; + + if (!server.cluster_enabled) + return 0; + + int first_slot = INVALID_CLUSTER_SLOT; + for (int j = 0; j < keys_result->numkeys; j++) { + robj *this_key = argv[keys_result->keys[j].pos]; + int this_slot = (int)keyHashSlot((char*)this_key->ptr, sdslen(this_key->ptr)); + + if (first_slot == INVALID_CLUSTER_SLOT) + first_slot = this_slot; + else if (first_slot != this_slot) { + return INVALID_CLUSTER_SLOT; + } + } + return first_slot; +} + /* Return the pointer to the cluster node that is able to serve the command. * For the function to succeed the command should only target either: * diff --git a/src/cluster.h b/src/cluster.h index eacf3eaedf6..308d91329af 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -150,6 +150,7 @@ int getSlotOrReply(client *c, robj *o); /* functions with shared implementations */ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, getKeysResult *result, uint64_t cmd_flags, int *error_code); +int extractSlotFromKeysResult(robj **argv, getKeysResult *keys_result); int clusterRedirectBlockedClientIfNeeded(client *c); void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code); void migrateCloseTimedoutSockets(void); diff --git a/src/db.c b/src/db.c index a0009432698..d0061a75912 100644 --- a/src/db.c +++ b/src/db.c @@ -2971,6 +2971,38 @@ int getChannelsFromCommand(struct redisCommand *cmd, robj **argv, int argc, getK return 0; } +/* Extract keys/channels from a command and calculate the cluster slot. + * Returns the number of keys/channels extracted. + * The slot number is returned by reference into *slot. + * If is_incomplete is not NULL, it will be set for key extraction. + * + * This function handles both regular commands (keys) and sharded pubsub + * commands (channels), but excludes regular pubsub commands which don't + * have slots. + */ +int extractKeysAndSlot(struct redisCommand *cmd, robj **argv, int argc, + getKeysResult *result, int *slot) { + int num_keys = -1; + + if (!doesCommandHaveChannelsWithFlags(cmd, CMD_CHANNEL_PUBLISH | CMD_CHANNEL_SUBSCRIBE)) { + num_keys = getKeysFromCommandWithSpecs(cmd, argv, argc, GET_KEYSPEC_DEFAULT, result); + } else { + /* Only extract channels for commands that have key_specs (sharded pubsub). + * Regular pubsub commands (PUBLISH, SUBSCRIBE) don't have slots. */ + if (cmd->key_specs_num > 0) { + num_keys = getChannelsFromCommand(cmd, argv, argc, result); + } else { + num_keys = 0; + } + } + + *slot = INVALID_CLUSTER_SLOT; + if (num_keys >= 0) + *slot = extractSlotFromKeysResult(argv, result); + + return num_keys; +} + /* The base case is to use the keys position as given in the command table * (firstkey, lastkey, step). * This function works only on command with the legacy_range_key_spec, diff --git a/src/server.c b/src/server.c index b4be950d677..c74a9b8ab59 100644 --- a/src/server.c +++ b/src/server.c @@ -4072,17 +4072,20 @@ void preprocessCommand(client *c, pendingCommand *pcmd) { } pcmd->keys_result = (getKeysResult)GETKEYS_RESULT_INIT; - int numkeys = getKeysFromCommand(pcmd->cmd, pcmd->argv, pcmd->argc, &pcmd->keys_result); - if (numkeys < 0) { + int num_keys = extractKeysAndSlot(pcmd->cmd, pcmd->argv, pcmd->argc, + &pcmd->keys_result, &pcmd->slot); + if (num_keys < 0) { /* We skip the checks below since We expect the command to be rejected in this case */ return; } pcmd->flags |= PENDING_CMD_KEYRESULT_VALID; if (server.cluster_enabled) { - for (int i = 0; i < numkeys; i++) { - robj *thiskey = pcmd->argv[pcmd->keys_result.keys[i].pos]; + robj **margv = pcmd->argv; + for (int j = 0; j < pcmd->keys_result.numkeys; j++) { + robj *thiskey = margv[pcmd->keys_result.keys[j].pos]; int thisslot = (int)keyHashSlot((char*)thiskey->ptr, sdslen(thiskey->ptr)); + if (pcmd->slot == INVALID_CLUSTER_SLOT) { pcmd->slot = thisslot; } else if (pcmd->slot != thisslot) { diff --git a/src/server.h b/src/server.h index ba013881662..136e3d6a132 100644 --- a/src/server.h +++ b/src/server.h @@ -3291,7 +3291,7 @@ user *ACLGetUserByName(const char *name, size_t namelen); int ACLUserCheckKeyPerm(user *u, const char *key, int keylen, int flags); int ACLUserCheckChannelPerm(user *u, sds channel, int literal); int ACLCheckAllUserCommandPerm(user *u, struct redisCommand *cmd, robj **argv, int argc, getKeysResult *key_result, int *idxptr); -int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, robj **argv, int argc, getKeysResult *key_result, int flags); +int ACLUserCheckCmdWithUnrestrictedKeyAccess(user *u, struct redisCommand *cmd, robj **argv, int argc, int flags); int ACLCheckAllPerm(client *c, int *idxptr); int ACLSetUser(user *u, const char *op, ssize_t oplen); sds ACLStringSetUser(user *u, sds username, sds *argv, int argc); @@ -3795,6 +3795,7 @@ int doesCommandHaveKeys(struct redisCommand *cmd); int getChannelsFromCommand(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result); int doesCommandHaveChannelsWithFlags(struct redisCommand *cmd, int flags); void getKeysFreeResult(getKeysResult *result); +int extractKeysAndSlot(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result, int *slot); int sintercardGetKeys(struct redisCommand *cmd,robj **argv, int argc, getKeysResult *result); int zunionInterDiffGetKeys(struct redisCommand *cmd,robj **argv, int argc, getKeysResult *result); int zunionInterDiffStoreGetKeys(struct redisCommand *cmd,robj **argv, int argc, getKeysResult *result); diff --git a/src/sort.c b/src/sort.c index fef6f92737c..3d53eec8883 100644 --- a/src/sort.c +++ b/src/sort.c @@ -193,7 +193,7 @@ void sortCommandGeneric(client *c, int readonly) { listSetFreeMethod(operations,zfree); j = 2; /* options start at argv[2] */ - user_has_full_key_access = ACLUserCheckCmdWithUnrestrictedKeyAccess(c->user, c->cmd, c->argv, c->argc, getClientCachedKeyResult(c), CMD_KEY_ACCESS); + user_has_full_key_access = ACLUserCheckCmdWithUnrestrictedKeyAccess(c->user, c->cmd, c->argv, c->argc, CMD_KEY_ACCESS); /* The SORT command has an SQL-alike syntax, parse it */ while(j < c->argc) { From c046a62b1590ec3180f8068490dfb03eac406fb8 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Fri, 10 Oct 2025 12:37:23 +0800 Subject: [PATCH 34/71] delay the execution of preprocessCommand if possible --- src/networking.c | 16 ++++++++++++++-- src/server.h | 5 +++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/src/networking.c b/src/networking.c index 88213e9dea8..4336dec818e 100644 --- a/src/networking.c +++ b/src/networking.c @@ -3042,6 +3042,7 @@ int processInputBuffer(client *c) { pcmd->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; preprocessCommand(c, pcmd); + pcmd->flags |= PENDING_CMD_FLAG_PREPROCESSED; resetClientQbufState(c); } @@ -4983,8 +4984,19 @@ pendingCommand *popPendingCommandFromTail(pendingCommandList *list) { return cmd; } +/* Get cached key result for current pending command */ getKeysResult *getClientCachedKeyResult(client *c) { - if (c->current_pending_cmd && c->current_pending_cmd->flags & PENDING_CMD_KEYRESULT_VALID) - return &c->current_pending_cmd->keys_result; + pendingCommand *pcmd = c->current_pending_cmd; + if (pcmd) { + /* Preprocess the command if needed */ + if (!(pcmd->flags & PENDING_CMD_FLAG_PREPROCESSED)) { + preprocessCommand(c, pcmd); + pcmd->flags |= PENDING_CMD_FLAG_PREPROCESSED; + } + + /* Return cached result if available */ + if (pcmd->flags & PENDING_CMD_KEYRESULT_VALID) + return &c->current_pending_cmd->keys_result; + } return NULL; } diff --git a/src/server.h b/src/server.h index 136e3d6a132..e8de7c42b7e 100644 --- a/src/server.h +++ b/src/server.h @@ -2353,8 +2353,9 @@ typedef struct { /* pendingCommand flags */ enum { - PENDING_CMD_FLAG_INCOMPLETE = 1 << 0, /* Command parsing is incomplete, still waiting for more data */ - PENDING_CMD_KEYRESULT_VALID = 1 << 1, /* Command's keys_result is valid and cached */ + PENDING_CMD_FLAG_INCOMPLETE = 1 << 0, /* Command parsing is incomplete, still waiting for more data */ + PENDING_CMD_FLAG_PREPROCESSED = 1 << 1, /* This command has passed pre-processing */ + PENDING_CMD_KEYRESULT_VALID = 1 << 2, /* Command's keys_result is valid and cached */ }; /* Parser state and parse result of a command from a client's input buffer. */ From a636e930bdd33bb428597b2dc524e1c8236f7057 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Fri, 10 Oct 2025 12:47:57 +0800 Subject: [PATCH 35/71] Remove unnecessary changes --- tests/support/util.tcl | 34 +++++++++++++++++----- tests/unit/moduleapi/async_rm_call.tcl | 2 +- tests/unit/moduleapi/keyspace_events.tcl | 4 +-- tests/unit/moduleapi/postnotifications.tcl | 2 +- 4 files changed, 31 insertions(+), 11 deletions(-) diff --git a/tests/support/util.tcl b/tests/support/util.tcl index b8b917d2b22..e79b88f79d2 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -685,21 +685,41 @@ proc process_is_alive pid { } } -proc pause_process pid { +proc get_system_name {} { + return [string tolower [exec uname -s]] +} + +proc get_proc_state {pid} { + if {[get_system_name] eq {sunos}} { + return [exec ps -o s= -p $pid] + } else { + return [exec ps -o state= -p $pid] + } +} + +proc get_proc_job {pid} { + if {[get_system_name] eq {sunos}} { + return [exec ps -l -p $pid] + } else { + return [exec ps j $pid] + } +} + +proc pause_process {pid} { exec kill -SIGSTOP $pid wait_for_condition 50 100 { - [string match {*T*} [lindex [exec ps j $pid] 16]] + [string match "T*" [get_proc_state $pid]] } else { - puts [exec ps j $pid] + puts [get_proc_job $pid] fail "process didn't stop" } } -proc resume_process pid { +proc resume_process {pid} { wait_for_condition 50 1000 { - [string match "T*" [exec ps -o state= -p $pid]] + [string match "T*" [get_proc_state $pid]] } else { - puts [exec ps j $pid] + puts [get_proc_job $pid] fail "process was not stopped" } exec kill -SIGCONT $pid @@ -1208,7 +1228,7 @@ proc system_backtrace_supported {} { return 0 } - set system_name [string tolower [exec uname -s]] + set system_name [get_system_name] if {$system_name eq {darwin}} { return 1 } elseif {$system_name ne {linux}} { diff --git a/tests/unit/moduleapi/async_rm_call.tcl b/tests/unit/moduleapi/async_rm_call.tcl index 7e451cb2aa4..d645de48cee 100644 --- a/tests/unit/moduleapi/async_rm_call.tcl +++ b/tests/unit/moduleapi/async_rm_call.tcl @@ -140,7 +140,7 @@ start_server {tags {"modules external:skip"}} { $rd wait_and_do_rm_call blpop l 0 wait_for_blocked_clients_count 1 - start_server {tags {"external:skip"}} { + start_server {} { test "Connect a replica to the master instance" { r slaveof [srv -1 host] [srv -1 port] wait_for_condition 50 100 { diff --git a/tests/unit/moduleapi/keyspace_events.tcl b/tests/unit/moduleapi/keyspace_events.tcl index a9ce759b86d..5d62a71788d 100644 --- a/tests/unit/moduleapi/keyspace_events.tcl +++ b/tests/unit/moduleapi/keyspace_events.tcl @@ -1,7 +1,7 @@ set testmodule [file normalize tests/modules/keyspace_events.so] tags "modules external:skip" { - start_server [list overrides [list loadmodule "$testmodule"] tags {"external:skip"}] { + start_server [list overrides [list loadmodule "$testmodule"]] { # avoid using shared integers, to increase the chance of detection heap issues r config set maxmemory-policy allkeys-lru @@ -125,7 +125,7 @@ tags "modules external:skip" { } } - start_server {tags {"external:skip"}} { + start_server {} { test {OnLoad failure will handle un-registration} { catch {r module load $testmodule noload} r set x 1 diff --git a/tests/unit/moduleapi/postnotifications.tcl b/tests/unit/moduleapi/postnotifications.tcl index c887f3a3dfc..31a4669413a 100644 --- a/tests/unit/moduleapi/postnotifications.tcl +++ b/tests/unit/moduleapi/postnotifications.tcl @@ -175,7 +175,7 @@ tags "modules external:skip" { set testmodule2 [file normalize tests/modules/keyspace_events.so] tags "modules external:skip" { - start_server {tags {"external:skip"}} { + start_server {} { r module load $testmodule with_key_events r module load $testmodule2 test {Test write on post notification callback} { From 423cc20b57c5da9cfb822d389d43be89500e965c Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Fri, 10 Oct 2025 13:03:11 +0800 Subject: [PATCH 36/71] Set pcmd->flags to 0 after calling command filter --- src/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index 62b3709a95d..560dac5e9d5 100644 --- a/src/module.c +++ b/src/module.c @@ -11051,10 +11051,10 @@ void moduleCallCommandFilters(client *c) { pcmd->argv_len = filter.argv_len; pcmd->cmd = NULL; pcmd->slot = INVALID_CLUSTER_SLOT; + pcmd->flags = 0; /* Reset keys result */ getKeysFreeResult(&pcmd->keys_result); - pcmd->flags &= ~PENDING_CMD_KEYRESULT_VALID; pcmd->keys_result = (getKeysResult)GETKEYS_RESULT_INIT; } } From 2f9983236596c6ae99efb5934a942d0b68d97f3f Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Fri, 10 Oct 2025 14:02:39 +0800 Subject: [PATCH 37/71] Update src/server.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/server.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.h b/src/server.h index a99eb9c24a4..203657c59f1 100644 --- a/src/server.h +++ b/src/server.h @@ -1148,7 +1148,7 @@ typedef struct rdbLoadingCtx { typedef struct pendingCommand pendingCommand; typedef struct multiState { pendingCommand **commands; /* Array of pointers to MULTI commands */ - int executing_cmd; /* The index of the currently exeuted transaction + int executing_cmd; /* The index of the currently executed transaction command (index in commands field) */ int count; /* Total number of MULTI commands */ int cmd_flags; /* The accumulated command flags OR-ed together. From 07e3ab5e138a30af08bf241b0a3ccc8377d470ca Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Fri, 10 Oct 2025 14:03:09 +0800 Subject: [PATCH 38/71] Update src/server.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/server.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.h b/src/server.h index 203657c59f1..b0ee9831cf9 100644 --- a/src/server.h +++ b/src/server.h @@ -2842,7 +2842,7 @@ int moduleIsModuleCommand(void *module_handle, struct redisCommand *cmd); /* pcmd */ void initPendingCommand(pendingCommand *pcmd); void freePendingCommand(client *c, pendingCommand *pcmd); -void addPengingCommand(pendingCommandList *queue, pendingCommand *cmd); +void addPendingCommand(pendingCommandList *queue, pendingCommand *cmd); pendingCommand *popPendingCommandFromHead(pendingCommandList *queue); pendingCommand *popPendingCommandFromTail(pendingCommandList *queue); From 3cc52a6165d9a5a6ece03e8f18d4dc8024cefc8b Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Fri, 10 Oct 2025 14:03:49 +0800 Subject: [PATCH 39/71] spell --- src/aof.c | 2 +- src/networking.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/aof.c b/src/aof.c index e0571290a5a..e9007c21e3e 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1644,7 +1644,7 @@ int loadSingleAppendOnlyFile(char *filename) { * for it to consume */ pendingCommand *pcmd = zmalloc(sizeof(pendingCommand)); initPendingCommand(pcmd); - addPengingCommand(&fakeClient->pending_cmds, pcmd); + addPendingCommand(&fakeClient->pending_cmds, pcmd); pcmd->argc = argc; pcmd->argv_len = argc; diff --git a/src/networking.c b/src/networking.c index 4336dec818e..cf2ec0face9 100644 --- a/src/networking.c +++ b/src/networking.c @@ -3036,7 +3036,7 @@ int processInputBuffer(client *c) { serverPanic("Unknown request type"); } - addPengingCommand(&c->pending_cmds, pcmd); + addPendingCommand(&c->pending_cmds, pcmd); if (unlikely(pcmd->read_error || (pcmd->flags & PENDING_CMD_FLAG_INCOMPLETE))) break; @@ -4932,7 +4932,7 @@ void freePendingCommand(client *c, pendingCommand *pcmd) { } /* Add a command to the tail of the pending command list. */ -void addPengingCommand(pendingCommandList *queue, pendingCommand *cmd) { +void addPendingCommand(pendingCommandList *queue, pendingCommand *cmd) { cmd->next = NULL; cmd->prev = queue->tail; From c5defde4972308e8214d93e439c4ca806ede987a Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Sun, 12 Oct 2025 10:40:00 +0800 Subject: [PATCH 40/71] Skip CLIENT_UNBLOCKED client for processInputBuffer() --- src/networking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index cf2ec0face9..549b4f3920f 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2971,7 +2971,7 @@ int processInputBuffer(client *c) { while ((c->querybuf && c->qb_pos < sdslen(c->querybuf)) || c->pending_cmds.ready_len > 0) { /* Immediately abort if the client is in the middle of something. */ - if (c->flags & CLIENT_BLOCKED) break; + if (c->flags & CLIENT_BLOCKED || c->flags & CLIENT_UNBLOCKED) break; /* Don't process more buffers from clients that have already pending * commands to execute in c->argv. */ From 894938c50cdf792d7400c0f4bc9ee9775cb10207 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Sun, 12 Oct 2025 15:23:53 +0800 Subject: [PATCH 41/71] Remove unused code --- src/server.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/src/server.c b/src/server.c index 13481ade119..d3b609c9f5d 100644 --- a/src/server.c +++ b/src/server.c @@ -4079,25 +4079,6 @@ void preprocessCommand(client *c, pendingCommand *pcmd) { return; } pcmd->flags |= PENDING_CMD_KEYRESULT_VALID; - - if (server.cluster_enabled) { - robj **margv = pcmd->argv; - for (int j = 0; j < pcmd->keys_result.numkeys; j++) { - robj *thiskey = margv[pcmd->keys_result.keys[j].pos]; - int thisslot = (int)keyHashSlot((char*)thiskey->ptr, sdslen(thiskey->ptr)); - - if (pcmd->slot == INVALID_CLUSTER_SLOT) { - pcmd->slot = thisslot; - } else if (pcmd->slot != thisslot) { - serverLog(LL_NOTICE, "preprocessCommand: CROSS SLOT ERROR"); - /* Invalidate the slot to indicate that there is a cross-slot error */ - pcmd->slot = INVALID_CLUSTER_SLOT; - /* Cross slot error. */ - pcmd->read_error = CLIENT_READ_CROSS_SLOT; - break; - } - } - } } /* If this function gets called we already read a whole From 56a71bc7b6c580a10354a70bf5d037cbeea18d19 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Sun, 12 Oct 2025 19:17:12 +0800 Subject: [PATCH 42/71] Fix wrongly last_cmd in preprocessCommand() --- src/server.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/server.c b/src/server.c index d3b609c9f5d..edc143a7002 100644 --- a/src/server.c +++ b/src/server.c @@ -4050,9 +4050,9 @@ void preprocessCommand(client *c, pendingCommand *pcmd) { if (pcmd->argc == 0) return; - /* Check if we can reuse the last command instead of looking it up. - * The last command is either the penultimate pending command (if it exists), or c->lastcmd. */ - struct redisCommand *last_cmd = c->pending_cmds.tail->prev ? c->pending_cmds.head->cmd : c->lastcmd; + /* Check if we can reuse the previous command instead of looking it up. + * The previous command is either the penultimate pending command (if it exists), or c->lastcmd. */ + struct redisCommand *last_cmd = pcmd->prev ? pcmd->prev->cmd : c->lastcmd; if (isCommandReusable(last_cmd, pcmd->argv[0])) pcmd->cmd = last_cmd; From ce3eb458a882b3734af5ed443a50bd2213f9f212 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Tue, 14 Oct 2025 17:23:27 +0800 Subject: [PATCH 43/71] Add pending command pool --- src/networking.c | 95 +++++++++++++++++++++++++++++++++++++++++------- src/server.h | 4 ++ 2 files changed, 85 insertions(+), 14 deletions(-) diff --git a/src/networking.c b/src/networking.c index 549b4f3920f..ec02697ebe4 100644 --- a/src/networking.c +++ b/src/networking.c @@ -33,6 +33,9 @@ static inline int _clientHasPendingRepliesSlave(client *c); static inline int _clientHasPendingRepliesNonSlave(client *c); static inline int _writeToClientNonSlave(client *c, ssize_t *nwritten); static inline int _writeToClientSlave(client *c, ssize_t *nwritten); +static pendingCommand *acquirePendingCommand(client *c); +static void reclaimPendingCommand(client *c, pendingCommand *pcmd); + int ProcessingEventsWhileBlocked = 0; /* See processEventsWhileBlocked(). */ __thread sds thread_reusable_qb = NULL; __thread int thread_reusable_qb_used = 0; /* Avoid multiple clients using reusable query @@ -168,6 +171,8 @@ client *createClient(connection *conn) { c->all_argv_len_sum = 0; c->pending_cmds.head = c->pending_cmds.tail = NULL; c->pending_cmds.len = c->pending_cmds.ready_len = 0; + memset(c->pending_cmds.pool, 0, sizeof(c->pending_cmds.pool)); + c->pending_cmds.pool_size = 0; c->current_pending_cmd = NULL; c->original_argc = 0; c->original_argv = NULL; @@ -1555,10 +1560,22 @@ void freeClientPendingCommands(client *c, int num_pcmds_to_free) { while (num_pcmds_to_free--) { pendingCommand *pcmd = popPendingCommandFromHead(&c->pending_cmds); serverAssert(pcmd); - freePendingCommand(c, pcmd); + reclaimPendingCommand(c, pcmd); } } +/* Discard all pending commands for a client and clear the command pool. */ +void discardClientPendingCommands(client *c) { + freeClientPendingCommands(c, -1); + + /* Free all commands in the pool */ + for (int i = 0; i < c->pending_cmds.pool_size; i++) { + freePendingCommand(c, c->pending_cmds.pool[i]); + c->pending_cmds.pool[i] = NULL; + } + c->pending_cmds.pool_size = 0; +} + /* Close all the slaves connections. This is useful in chained replication * when we resync with our own master and want to force all our slaves to * resync with us as well. */ @@ -1658,7 +1675,7 @@ void unlinkClient(client *c) { c->flags &= ~CLIENT_UNBLOCKED; } - freeClientPendingCommands(c, -1); + discardClientPendingCommands(c); c->argv_len = 0; c->argv = NULL; c->argc = 0; @@ -2478,10 +2495,13 @@ int processInlineBuffer(client *c, pendingCommand *pcmd) { /* Setup argv array on client structure */ if (argc) { - zfree(pcmd->argv); - pcmd->argv = zmalloc(sizeof(robj*)*argc); - pcmd->argv_len = argc; - pcmd->argv_len_sum = 0; + /* Create new argv if space is insufficient. */ + if (argc > pcmd->argv_len) { + zfree(pcmd->argv); + pcmd->argv = zmalloc(sizeof(robj*)*argc); + pcmd->argv_len = argc; + pcmd->argv_len_sum = 0; + } } /* Create redis objects for all arguments. */ @@ -2598,10 +2618,12 @@ static int processMultibulkBuffer(client *c, pendingCommand *pcmd) { c->multibulklen = ll; c->bulklen = -1; - zfree(pcmd->argv); - pcmd->argv_len = min(c->multibulklen, 1024); - pcmd->argv = zmalloc(sizeof(robj*)*(pcmd->argv_len)); - pcmd->argv_len_sum = 0; + if (c->multibulklen > pcmd->argv_len) { + zfree(pcmd->argv); + pcmd->argv_len = min(c->multibulklen, 1024); + pcmd->argv = zmalloc(sizeof(robj*)*(pcmd->argv_len)); + pcmd->argv_len_sum = 0; + } /* Per-slot network bytes-in calculation. * @@ -3009,8 +3031,7 @@ int processInputBuffer(client *c) { pendingCommand *pcmd = NULL; if (c->reqtype == PROTO_REQ_INLINE) { - pcmd = zmalloc(sizeof(pendingCommand)); - initPendingCommand(pcmd); + pcmd = acquirePendingCommand(c); if (processInlineBuffer(c, pcmd) == C_ERR && !pcmd->read_error) { /* If it fails but there are no errors, it means that it might just be * that the desired content cannot be parsed. At this point, we exit and wait for the next time. */ @@ -3022,8 +3043,7 @@ int processInputBuffer(client *c) { if (unlikely(incomplete)) { pcmd = popPendingCommandFromTail(&c->pending_cmds); } else { - pcmd = zmalloc(sizeof(pendingCommand)); - initPendingCommand(pcmd); + pcmd = acquirePendingCommand(c); } if (processMultibulkBuffer(c, pcmd) == C_ERR && !pcmd->read_error) { @@ -4342,6 +4362,7 @@ void replaceClientCommandVector(client *c, int argc, robj **argv) { serverAssert(pcmd->argv == c->argv); pcmd->argv = argv; pcmd->argc = argc; + pcmd->argv_len = argc; } freeClientArgv(c); c->argv = argv; @@ -4908,6 +4929,52 @@ void evictClients(void) { } } +static pendingCommand *acquirePendingCommand(client *c) { + pendingCommandList *list = &c->pending_cmds; + pendingCommand *pcmd = NULL; + + if (list->pool_size > 0) { + /* Get from pool */ + pcmd = list->pool[--list->pool_size]; + list->pool[list->pool_size] = NULL; + } else { + /* Pool is empty, allocate new */ + pcmd = zmalloc(sizeof(pendingCommand)); + initPendingCommand(pcmd); + } + return pcmd; +} + +static void reclaimPendingCommand(client *c, pendingCommand *cmd) { + pendingCommandList *list = &c->pending_cmds; + + /* If pool is not full and command argv is small, add to pool for reuse. + * We avoid pooling commands with large argv arrays to prevent excessive + * memory usage in the pool. */ + if (list->pool_size < PENDING_COMMAND_POOL_SIZE && cmd->argv_len < 16) { + for (int j = 0; j < cmd->argc; j++) + decrRefCount(cmd->argv[j]); + + getKeysFreeResult(&cmd->keys_result); + + serverAssert(c->all_argv_len_sum >= cmd->argv_len_sum); /* assert this doesn't try to go negative */ + c->all_argv_len_sum -= cmd->argv_len_sum; + + /* Reset the pending command while preserving the argv array for reuse. */ + robj **argv = cmd->argv; + int argv_len = cmd->argv_len; + memset(cmd, 0, sizeof(pendingCommand)); + cmd->argv = argv; + cmd->argv_len = argv_len; + cmd->slot = INVALID_CLUSTER_SLOT; + + list->pool[list->pool_size++] = cmd; + } else { + /* Pool is full, free this pending command. */ + freePendingCommand(c, cmd); + } +} + void initPendingCommand(pendingCommand *pcmd) { memset(pcmd, 0, sizeof(pendingCommand)); pcmd->slot = INVALID_CLUSTER_SLOT; diff --git a/src/server.h b/src/server.h index b0ee9831cf9..a01204c7105 100644 --- a/src/server.h +++ b/src/server.h @@ -1208,11 +1208,15 @@ typedef struct readyList { } readyList; /* List of pending commands. */ +#define PENDING_COMMAND_POOL_SIZE 16 typedef struct pendingCommandList { pendingCommand *head; pendingCommand *tail; int len; /* Number of commands in the list */ int ready_len; /* Number of commands that are ready to be processed */ + + pendingCommand *pool[PENDING_COMMAND_POOL_SIZE]; /* Pending command pool */ + int pool_size; /* Current number of objects in pool */ } pendingCommandList; /* This structure represents a Redis user. This is useful for ACLs, the From 199510b44f95128947b23c269f4257bcd68779c9 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Tue, 14 Oct 2025 17:36:39 +0800 Subject: [PATCH 44/71] Free pending command pool if idle for 2 secs --- src/networking.c | 16 +++++++++------- src/server.c | 9 +++++++++ src/server.h | 1 + 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/networking.c b/src/networking.c index ec02697ebe4..23dd8cfaad5 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1564,13 +1564,14 @@ void freeClientPendingCommands(client *c, int num_pcmds_to_free) { } } -/* Discard all pending commands for a client and clear the command pool. */ -void discardClientPendingCommands(client *c) { - freeClientPendingCommands(c, -1); - - /* Free all commands in the pool */ +/* Free all pooled pending commands and reset the pool */ +void freePendingCommandPool(client *c) { for (int i = 0; i < c->pending_cmds.pool_size; i++) { - freePendingCommand(c, c->pending_cmds.pool[i]); + pendingCommand *pcmd = c->pending_cmds.pool[i]; + /* Only need to free the reused argv array and pending command itself, + * other fields were already freed during reclaim process */ + zfree(pcmd->argv); + zfree(pcmd); c->pending_cmds.pool[i] = NULL; } c->pending_cmds.pool_size = 0; @@ -1675,7 +1676,8 @@ void unlinkClient(client *c) { c->flags &= ~CLIENT_UNBLOCKED; } - discardClientPendingCommands(c); + freeClientPendingCommands(c, -1); + freePendingCommandPool(c); c->argv_len = 0; c->argv = NULL; c->argc = 0; diff --git a/src/server.c b/src/server.c index edc143a7002..fd2e0dd4ff3 100644 --- a/src/server.c +++ b/src/server.c @@ -872,6 +872,15 @@ int clientsCronResizeQueryBuffer(client *c) { return 0; } +/* If the client has been idle for too long, free the client's pending command pool. */ +int clientsCronFreePendingCommandPoolIfIdle(client *c) { + /* Free pending command pool if the client has been idle for more than 2 seconds. */ + time_t idletime = server.unixtime - c->lastinteraction; + if (idletime > 2) + freePendingCommandPool(c); + return 0; +} + /* The client output buffer can be adjusted to better fit the memory requirements. * * the logic is: diff --git a/src/server.h b/src/server.h index a01204c7105..c70d0f81ba4 100644 --- a/src/server.h +++ b/src/server.h @@ -2880,6 +2880,7 @@ void resetClientQbufState(client *c); void freeClientOriginalArgv(client *c); void freeClientArgv(client *c); void freeClientPendingCommands(client *c, int num_pcmds_to_free); +void freePendingCommandPool(client *c); void tryDeferFreeClientObject(client *c, robj *o); void freeClientDeferredObjects(client *c, int free_array); void sendReplyToClient(connection *conn); From e3cf5563a80ce2784ef79cfb59a966519b98e7bd Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 15 Oct 2025 19:44:33 +0800 Subject: [PATCH 45/71] Change the client's pending command pool to global pending command pool --- src/networking.c | 90 ++++++++++++++++++++---------------------------- src/server.c | 13 +++---- src/server.h | 7 ++-- 3 files changed, 45 insertions(+), 65 deletions(-) diff --git a/src/networking.c b/src/networking.c index 23dd8cfaad5..69e567af6f9 100644 --- a/src/networking.c +++ b/src/networking.c @@ -33,7 +33,7 @@ static inline int _clientHasPendingRepliesSlave(client *c); static inline int _clientHasPendingRepliesNonSlave(client *c); static inline int _writeToClientNonSlave(client *c, ssize_t *nwritten); static inline int _writeToClientSlave(client *c, ssize_t *nwritten); -static pendingCommand *acquirePendingCommand(client *c); +static pendingCommand *acquirePendingCommand(void); static void reclaimPendingCommand(client *c, pendingCommand *pcmd); int ProcessingEventsWhileBlocked = 0; /* See processEventsWhileBlocked(). */ @@ -171,8 +171,6 @@ client *createClient(connection *conn) { c->all_argv_len_sum = 0; c->pending_cmds.head = c->pending_cmds.tail = NULL; c->pending_cmds.len = c->pending_cmds.ready_len = 0; - memset(c->pending_cmds.pool, 0, sizeof(c->pending_cmds.pool)); - c->pending_cmds.pool_size = 0; c->current_pending_cmd = NULL; c->original_argc = 0; c->original_argv = NULL; @@ -1564,19 +1562,6 @@ void freeClientPendingCommands(client *c, int num_pcmds_to_free) { } } -/* Free all pooled pending commands and reset the pool */ -void freePendingCommandPool(client *c) { - for (int i = 0; i < c->pending_cmds.pool_size; i++) { - pendingCommand *pcmd = c->pending_cmds.pool[i]; - /* Only need to free the reused argv array and pending command itself, - * other fields were already freed during reclaim process */ - zfree(pcmd->argv); - zfree(pcmd); - c->pending_cmds.pool[i] = NULL; - } - c->pending_cmds.pool_size = 0; -} - /* Close all the slaves connections. This is useful in chained replication * when we resync with our own master and want to force all our slaves to * resync with us as well. */ @@ -1677,7 +1662,6 @@ void unlinkClient(client *c) { } freeClientPendingCommands(c, -1); - freePendingCommandPool(c); c->argv_len = 0; c->argv = NULL; c->argc = 0; @@ -3033,7 +3017,7 @@ int processInputBuffer(client *c) { pendingCommand *pcmd = NULL; if (c->reqtype == PROTO_REQ_INLINE) { - pcmd = acquirePendingCommand(c); + pcmd = acquirePendingCommand(); if (processInlineBuffer(c, pcmd) == C_ERR && !pcmd->read_error) { /* If it fails but there are no errors, it means that it might just be * that the desired content cannot be parsed. At this point, we exit and wait for the next time. */ @@ -3045,7 +3029,7 @@ int processInputBuffer(client *c) { if (unlikely(incomplete)) { pcmd = popPendingCommandFromTail(&c->pending_cmds); } else { - pcmd = acquirePendingCommand(c); + pcmd = acquirePendingCommand(); } if (processMultibulkBuffer(c, pcmd) == C_ERR && !pcmd->read_error) { @@ -4931,50 +4915,52 @@ void evictClients(void) { } } -static pendingCommand *acquirePendingCommand(client *c) { - pendingCommandList *list = &c->pending_cmds; +static pendingCommand *acquirePendingCommand(void) { pendingCommand *pcmd = NULL; - if (list->pool_size > 0) { - /* Get from pool */ - pcmd = list->pool[--list->pool_size]; - list->pool[list->pool_size] = NULL; - } else { - /* Pool is empty, allocate new */ - pcmd = zmalloc(sizeof(pendingCommand)); - initPendingCommand(pcmd); + /* Use shared pool when IO threads are not active. */ + if (!server.io_threads_active && server.pending_cmd_pool_size > 0) { + pendingCommand *pcmd = server.pending_cmd_pool[--server.pending_cmd_pool_size]; + server.pending_cmd_pool[server.pending_cmd_pool_size] = NULL; + return pcmd; } + + /* Global pool is empty or IO threads are active, allocate new */ + pcmd = zmalloc(sizeof(pendingCommand)); + initPendingCommand(pcmd); return pcmd; } static void reclaimPendingCommand(client *c, pendingCommand *cmd) { - pendingCommandList *list = &c->pending_cmds; - - /* If pool is not full and command argv is small, add to pool for reuse. - * We avoid pooling commands with large argv arrays to prevent excessive - * memory usage in the pool. */ - if (list->pool_size < PENDING_COMMAND_POOL_SIZE && cmd->argv_len < 16) { - for (int j = 0; j < cmd->argc; j++) - decrRefCount(cmd->argv[j]); + /* Try shared pool first when IO threads are not active. */ + if (!server.io_threads_active) { + /* If global pool is not full and argv isn't too large, add to pool for reuse. */ + if (server.pending_cmd_pool_size < PENDING_COMMAND_POOL_SIZE && + cmd->argv_len < 64) + { + for (int j = 0; j < cmd->argc; j++) + decrRefCount(cmd->argv[j]); - getKeysFreeResult(&cmd->keys_result); + getKeysFreeResult(&cmd->keys_result); - serverAssert(c->all_argv_len_sum >= cmd->argv_len_sum); /* assert this doesn't try to go negative */ - c->all_argv_len_sum -= cmd->argv_len_sum; + serverAssert(c->all_argv_len_sum >= cmd->argv_len_sum); /* assert this doesn't try to go negative */ + c->all_argv_len_sum -= cmd->argv_len_sum; - /* Reset the pending command while preserving the argv array for reuse. */ - robj **argv = cmd->argv; - int argv_len = cmd->argv_len; - memset(cmd, 0, sizeof(pendingCommand)); - cmd->argv = argv; - cmd->argv_len = argv_len; - cmd->slot = INVALID_CLUSTER_SLOT; + /* Reset the pending command while preserving the argv array for reuse. */ + robj **argv = cmd->argv; + int argv_len = cmd->argv_len; + memset(cmd, 0, sizeof(pendingCommand)); + cmd->argv = argv; + cmd->argv_len = argv_len; + cmd->slot = INVALID_CLUSTER_SLOT; - list->pool[list->pool_size++] = cmd; - } else { - /* Pool is full, free this pending command. */ - freePendingCommand(c, cmd); + server.pending_cmd_pool[server.pending_cmd_pool_size++] = cmd; + return; /* Successfully added to shared pool for reuse */ + } } + + /* Global pool is full or IO threads are active, free this pending command. */ + freePendingCommand(c, cmd); } void initPendingCommand(pendingCommand *pcmd) { @@ -5061,7 +5047,7 @@ getKeysResult *getClientCachedKeyResult(client *c) { if (!(pcmd->flags & PENDING_CMD_FLAG_PREPROCESSED)) { preprocessCommand(c, pcmd); pcmd->flags |= PENDING_CMD_FLAG_PREPROCESSED; - } + } /* Return cached result if available */ if (pcmd->flags & PENDING_CMD_KEYRESULT_VALID) diff --git a/src/server.c b/src/server.c index fd2e0dd4ff3..553de713ff1 100644 --- a/src/server.c +++ b/src/server.c @@ -872,15 +872,6 @@ int clientsCronResizeQueryBuffer(client *c) { return 0; } -/* If the client has been idle for too long, free the client's pending command pool. */ -int clientsCronFreePendingCommandPoolIfIdle(client *c) { - /* Free pending command pool if the client has been idle for more than 2 seconds. */ - time_t idletime = server.unixtime - c->lastinteraction; - if (idletime > 2) - freePendingCommandPool(c); - return 0; -} - /* The client output buffer can be adjusted to better fit the memory requirements. * * the logic is: @@ -2924,6 +2915,10 @@ void initServer(void) { server.acl_info.user_auth_failures = 0; server.acl_info.invalid_channel_accesses = 0; + /* Initialize the shared pending command pool. */ + server.pending_cmd_pool_size = 0; + server.pending_cmd_pool = zmalloc(sizeof(pendingCommand*) * PENDING_COMMAND_POOL_SIZE); + /* Create the timer callback, this is our way to process many background * operations incrementally, like clients timeout, eviction of unaccessed * expired keys and so forth. */ diff --git a/src/server.h b/src/server.h index c70d0f81ba4..6e6068afc41 100644 --- a/src/server.h +++ b/src/server.h @@ -1214,9 +1214,6 @@ typedef struct pendingCommandList { pendingCommand *tail; int len; /* Number of commands in the list */ int ready_len; /* Number of commands that are ready to be processed */ - - pendingCommand *pool[PENDING_COMMAND_POOL_SIZE]; /* Pending command pool */ - int pool_size; /* Current number of objects in pool */ } pendingCommandList; /* This structure represents a Redis user. This is useful for ACLs, the @@ -1868,6 +1865,9 @@ struct redisServer { int io_threads_clients_num[IO_THREADS_MAX_NUM]; /* Number of clients assigned to each IO thread. */ int io_threads_do_reads; /* Read and parse from IO threads? */ int io_threads_active; /* Is IO threads currently active? */ + pendingCommand **pending_cmd_pool; /* Shared pool for reusing pendingCommand and argv, + * only when IO threads disabled */ + int pending_cmd_pool_size; /* Current number of reusable pendingCommand objects in shared pool */ int prefetch_batch_max_size;/* Maximum number of keys to prefetch in a single batch */ long long events_processed_while_blocked; /* processEventsWhileBlocked() */ int enable_protected_configs; /* Enable the modification of protected configs, see PROTECTED_ACTION_ALLOWED_* */ @@ -2880,7 +2880,6 @@ void resetClientQbufState(client *c); void freeClientOriginalArgv(client *c); void freeClientArgv(client *c); void freeClientPendingCommands(client *c, int num_pcmds_to_free); -void freePendingCommandPool(client *c); void tryDeferFreeClientObject(client *c, robj *o); void freeClientDeferredObjects(client *c, int free_array); void sendReplyToClient(connection *conn); From 4e99598e562dbc2d62457b46531c43ad99ffaaf7 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 16 Oct 2025 10:24:43 +0800 Subject: [PATCH 46/71] Update comment --- src/networking.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/networking.c b/src/networking.c index 69e567af6f9..f8ba9d004e7 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2604,6 +2604,8 @@ static int processMultibulkBuffer(client *c, pendingCommand *pcmd) { c->multibulklen = ll; c->bulklen = -1; + /* Setup argv array on pending command structure. + * Reallocate argv array when the requested size is greater than current size. */ if (c->multibulklen > pcmd->argv_len) { zfree(pcmd->argv); pcmd->argv_len = min(c->multibulklen, 1024); @@ -4925,7 +4927,7 @@ static pendingCommand *acquirePendingCommand(void) { return pcmd; } - /* Global pool is empty or IO threads are active, allocate new */ + /* Shared pool is empty or IO threads are active, allocate new */ pcmd = zmalloc(sizeof(pendingCommand)); initPendingCommand(pcmd); return pcmd; @@ -4934,7 +4936,7 @@ static pendingCommand *acquirePendingCommand(void) { static void reclaimPendingCommand(client *c, pendingCommand *cmd) { /* Try shared pool first when IO threads are not active. */ if (!server.io_threads_active) { - /* If global pool is not full and argv isn't too large, add to pool for reuse. */ + /* If shared pool is not full and argv isn't too large, add to pool for reuse */ if (server.pending_cmd_pool_size < PENDING_COMMAND_POOL_SIZE && cmd->argv_len < 64) { @@ -4946,7 +4948,7 @@ static void reclaimPendingCommand(client *c, pendingCommand *cmd) { serverAssert(c->all_argv_len_sum >= cmd->argv_len_sum); /* assert this doesn't try to go negative */ c->all_argv_len_sum -= cmd->argv_len_sum; - /* Reset the pending command while preserving the argv array for reuse. */ + /* Reset the pending command while preserving the argv array for shared pool reuse */ robj **argv = cmd->argv; int argv_len = cmd->argv_len; memset(cmd, 0, sizeof(pendingCommand)); @@ -4959,7 +4961,7 @@ static void reclaimPendingCommand(client *c, pendingCommand *cmd) { } } - /* Global pool is full or IO threads are active, free this pending command. */ + /* Shared pool is full or IO threads are active, free this pending command */ freePendingCommand(c, cmd); } From efe3a393fd8fd7810ca7453d2872202be0f9834d Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 16 Oct 2025 10:54:33 +0800 Subject: [PATCH 47/71] Refine comment --- src/networking.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/networking.c b/src/networking.c index f8ba9d004e7..8449eace66c 100644 --- a/src/networking.c +++ b/src/networking.c @@ -4917,24 +4917,31 @@ void evictClients(void) { } } +/* Acquire a pending command from the shared pool or allocate a new one. + * Uses the shared pool when available (only when IO threads are inactive), + * otherwise allocates a new pending command structure. */ static pendingCommand *acquirePendingCommand(void) { - pendingCommand *pcmd = NULL; + /* Ensure pool is empty when IO threads are active to avoid race conditions */ + serverAssert(server.io_threads_active == 0 || server.pending_cmd_pool_size == 0); - /* Use shared pool when IO threads are not active. */ - if (!server.io_threads_active && server.pending_cmd_pool_size > 0) { - pendingCommand *pcmd = server.pending_cmd_pool[--server.pending_cmd_pool_size]; + pendingCommand *pcmd = NULL; + if (server.pending_cmd_pool_size > 0) { + /* Shared pool is available. */ + pcmd = server.pending_cmd_pool[--server.pending_cmd_pool_size]; server.pending_cmd_pool[server.pending_cmd_pool_size] = NULL; - return pcmd; + } else { + /* Shared pool is empty, allocate new pending command. */ + pcmd = zmalloc(sizeof(pendingCommand)); + initPendingCommand(pcmd); } - - /* Shared pool is empty or IO threads are active, allocate new */ - pcmd = zmalloc(sizeof(pendingCommand)); - initPendingCommand(pcmd); return pcmd; } +/* Reclaim a pending command by adding it to the shared pool for reuse or freeing it. + * The shared pool is only used when IO threads are inactive to avoid race conditions + * between multiple clients. Additionally, pool reuse provides minimal benefit in + * multi-threaded scenarios, so we only use it in single-threaded mode. */ static void reclaimPendingCommand(client *c, pendingCommand *cmd) { - /* Try shared pool first when IO threads are not active. */ if (!server.io_threads_active) { /* If shared pool is not full and argv isn't too large, add to pool for reuse */ if (server.pending_cmd_pool_size < PENDING_COMMAND_POOL_SIZE && @@ -5005,6 +5012,8 @@ void addPendingCommand(pendingCommandList *queue, pendingCommand *cmd) { if (!(cmd->flags & PENDING_CMD_FLAG_INCOMPLETE)) queue->ready_len++; } +/* Remove and return the first pending command from the list. + * Returns NULL if the list is empty. */ pendingCommand *popPendingCommandFromHead(pendingCommandList *list) { pendingCommand *cmd = list->head; if (!cmd) return NULL; /* List is empty */ @@ -5023,6 +5032,8 @@ pendingCommand *popPendingCommandFromHead(pendingCommandList *list) { return cmd; } +/* Remove and return the last pending command from the list. + * Returns NULL if the list is empty. */ pendingCommand *popPendingCommandFromTail(pendingCommandList *list) { pendingCommand *cmd = list->tail; if (!cmd) return NULL; /* List is empty */ From c547e8c326b1c1655c30349b69e275887c1d9272 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Fri, 17 Oct 2025 15:48:00 +0800 Subject: [PATCH 48/71] Incr the max size of pendimg command pool, and shrink it regular --- src/networking.c | 66 +++++++++++++++++++++++++++++++++++++++--------- src/server.c | 29 +++++++++++++++++++-- src/server.h | 17 ++++++++++--- 3 files changed, 94 insertions(+), 18 deletions(-) diff --git a/src/networking.c b/src/networking.c index 8449eace66c..a9b1e33d855 100644 --- a/src/networking.c +++ b/src/networking.c @@ -35,6 +35,7 @@ static inline int _writeToClientNonSlave(client *c, ssize_t *nwritten); static inline int _writeToClientSlave(client *c, ssize_t *nwritten); static pendingCommand *acquirePendingCommand(void); static void reclaimPendingCommand(client *c, pendingCommand *pcmd); +static void expandPendingCommandPool(void); int ProcessingEventsWhileBlocked = 0; /* See processEventsWhileBlocked(). */ __thread sds thread_reusable_qb = NULL; @@ -4922,13 +4923,17 @@ void evictClients(void) { * otherwise allocates a new pending command structure. */ static pendingCommand *acquirePendingCommand(void) { /* Ensure pool is empty when IO threads are active to avoid race conditions */ - serverAssert(server.io_threads_active == 0 || server.pending_cmd_pool_size == 0); + serverAssert(server.io_threads_active == 0 || server.cmd_pool.size == 0); pendingCommand *pcmd = NULL; - if (server.pending_cmd_pool_size > 0) { + if (server.cmd_pool.size > 0) { /* Shared pool is available. */ - pcmd = server.pending_cmd_pool[--server.pending_cmd_pool_size]; - server.pending_cmd_pool[server.pending_cmd_pool_size] = NULL; + pcmd = server.cmd_pool.pool[--server.cmd_pool.size]; + server.cmd_pool.pool[server.cmd_pool.size] = NULL; + + /* Track minimum pool size for utilization calculation */ + if (server.cmd_pool.size < server.cmd_pool.min_size) + server.cmd_pool.min_size = server.cmd_pool.size; } else { /* Shared pool is empty, allocate new pending command. */ pcmd = zmalloc(sizeof(pendingCommand)); @@ -4942,11 +4947,17 @@ static pendingCommand *acquirePendingCommand(void) { * between multiple clients. Additionally, pool reuse provides minimal benefit in * multi-threaded scenarios, so we only use it in single-threaded mode. */ static void reclaimPendingCommand(client *c, pendingCommand *cmd) { - if (!server.io_threads_active) { - /* If shared pool is not full and argv isn't too large, add to pool for reuse */ - if (server.pending_cmd_pool_size < PENDING_COMMAND_POOL_SIZE && - cmd->argv_len < 64) + /* Try to add to shared pool for reuse if argv isn't too large */ + if (!server.io_threads_active && cmd->argv_len < 64) { + /* If pool is full but can be expanded, expand it first */ + if (server.cmd_pool.size >= server.cmd_pool.capacity && + server.cmd_pool.capacity < PENDING_COMMAND_POOL_MAX_SIZE) { + expandPendingCommandPool(); + } + + /* Add to pool if there's space available */ + if (server.cmd_pool.size < server.cmd_pool.capacity) { for (int j = 0; j < cmd->argc; j++) decrRefCount(cmd->argv[j]); @@ -4963,7 +4974,7 @@ static void reclaimPendingCommand(client *c, pendingCommand *cmd) { cmd->argv_len = argv_len; cmd->slot = INVALID_CLUSTER_SLOT; - server.pending_cmd_pool[server.pending_cmd_pool_size++] = cmd; + server.cmd_pool.pool[server.cmd_pool.size++] = cmd; return; /* Successfully added to shared pool for reuse */ } } @@ -4986,10 +4997,13 @@ void freePendingCommand(client *c, pendingCommand *pcmd) { if (pcmd->argv) { for (int j = 0; j < pcmd->argc; j++) decrRefCount(pcmd->argv[j]); - zfree(pcmd->argv); - serverAssert(c->all_argv_len_sum >= pcmd->argv_len_sum); /* assert this doesn't try to go negative */ - c->all_argv_len_sum -= pcmd->argv_len_sum; + + /* c may be NULL when called from reclaimPendingCommand */ + if (c) { + serverAssert(c->all_argv_len_sum >= pcmd->argv_len_sum); /* assert this doesn't try to go negative */ + c->all_argv_len_sum -= pcmd->argv_len_sum; + } } zfree(pcmd); @@ -5068,3 +5082,31 @@ getKeysResult *getClientCachedKeyResult(client *c) { } return NULL; } + +/* Expand the pending command pool capacity by doubling it, up to the maximum size */ +void expandPendingCommandPool(void) { + int new_capacity = server.cmd_pool.capacity * 2; + if (new_capacity > PENDING_COMMAND_POOL_MAX_SIZE) + new_capacity = PENDING_COMMAND_POOL_MAX_SIZE; + + server.cmd_pool.pool = zrealloc(server.cmd_pool.pool, sizeof(pendingCommand*) * new_capacity); + server.cmd_pool.capacity = new_capacity; +} + +void shrinkPendingCommandPool(void) { + /* Don't shrink if pool is too small or if it was used recently */ + if (server.cmd_pool.capacity <= PENDING_COMMAND_POOL_SIZE) return; + + /* Free commands until we have half the current size */ + int target_size = server.cmd_pool.size / 2; + while (server.cmd_pool.size > target_size) { + pendingCommand *cmd = server.cmd_pool.pool[--server.cmd_pool.size]; + if (cmd) { + freePendingCommand(NULL, cmd); + server.cmd_pool.pool[server.cmd_pool.size] = NULL; + } + } + + server.cmd_pool.capacity = target_size; + server.cmd_pool.pool = zrealloc(server.cmd_pool.pool, sizeof(pendingCommand*) * target_size); +} diff --git a/src/server.c b/src/server.c index 553de713ff1..1f34b51affe 100644 --- a/src/server.c +++ b/src/server.c @@ -1112,6 +1112,24 @@ int clientsCronRunClient(client *c) { return 0; } +/* Periodic maintenance for the pending command pool. + * This function should be called from serverCron to manage pool size based on utilization patterns. */ +void pendingCommandPoolCron(void) { + /* Only shrink pool when IO threads are not active */ + if (server.io_threads_active) return; + + /* Calculate utilization rate based on minimum pool size reached */ + if (server.cmd_pool.capacity > PENDING_COMMAND_POOL_SIZE) { + /* If utilization is below threshold, shrink the pool */ + double utilization_ratio = 1.0 - (double)server.cmd_pool.min_size / server.cmd_pool.capacity; + if (utilization_ratio < 0.5) + shrinkPendingCommandPool(); + } + + /* Reset tracking for next interval */ + server.cmd_pool.min_size = server.cmd_pool.size; /* Reset to current size */ +} + /* This function is called by serverCron() and is used in order to perform * operations on clients that are important to perform constantly. For instance * we use this function in order to disconnect clients after a timeout, including @@ -1630,6 +1648,11 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { migrateCloseTimedoutSockets(); } + /* Periodically shrink pending command reuse pool */ + run_with_period(2000) { + pendingCommandPoolCron(); + } + /* Resize tracking keys table if needed. This is also done at every * command execution, but we want to be sure that if the last command * executed changes the value via CONFIG SET, the server will perform @@ -2916,8 +2939,10 @@ void initServer(void) { server.acl_info.invalid_channel_accesses = 0; /* Initialize the shared pending command pool. */ - server.pending_cmd_pool_size = 0; - server.pending_cmd_pool = zmalloc(sizeof(pendingCommand*) * PENDING_COMMAND_POOL_SIZE); + server.cmd_pool.size = 0; + server.cmd_pool.capacity = PENDING_COMMAND_POOL_SIZE; + server.cmd_pool.pool = zmalloc(sizeof(pendingCommand*) * PENDING_COMMAND_POOL_SIZE); + server.cmd_pool.min_size = 0; /* Create the timer callback, this is our way to process many background * operations incrementally, like clients timeout, eviction of unaccessed diff --git a/src/server.h b/src/server.h index 6e6068afc41..0f60033c138 100644 --- a/src/server.h +++ b/src/server.h @@ -1208,7 +1208,6 @@ typedef struct readyList { } readyList; /* List of pending commands. */ -#define PENDING_COMMAND_POOL_SIZE 16 typedef struct pendingCommandList { pendingCommand *head; pendingCommand *tail; @@ -1216,6 +1215,16 @@ typedef struct pendingCommandList { int ready_len; /* Number of commands that are ready to be processed */ } pendingCommandList; +/* Pending command pool management structure */ +#define PENDING_COMMAND_POOL_SIZE 16 +#define PENDING_COMMAND_POOL_MAX_SIZE 1024 +typedef struct pendingCommandPool { + pendingCommand **pool; /* Pool array for reusing pendingCommand objects */ + int size; /* Current number of objects in pool */ + int capacity; /* Current capacity of the pool array */ + int min_size; /* Minimum size since last check (indicates peak usage) */ +} pendingCommandPool; + /* This structure represents a Redis user. This is useful for ACLs, the * user is associated to the connection after the connection is authenticated. * If there is no associated user, the connection uses the default user. */ @@ -1865,9 +1874,8 @@ struct redisServer { int io_threads_clients_num[IO_THREADS_MAX_NUM]; /* Number of clients assigned to each IO thread. */ int io_threads_do_reads; /* Read and parse from IO threads? */ int io_threads_active; /* Is IO threads currently active? */ - pendingCommand **pending_cmd_pool; /* Shared pool for reusing pendingCommand and argv, - * only when IO threads disabled */ - int pending_cmd_pool_size; /* Current number of reusable pendingCommand objects in shared pool */ + pendingCommandPool cmd_pool; /* Shared pool for reusing pendingCommand, + * only when IO threads disabled */ int prefetch_batch_max_size;/* Maximum number of keys to prefetch in a single batch */ long long events_processed_while_blocked; /* processEventsWhileBlocked() */ int enable_protected_configs; /* Enable the modification of protected configs, see PROTECTED_ACTION_ALLOWED_* */ @@ -2849,6 +2857,7 @@ void freePendingCommand(client *c, pendingCommand *pcmd); void addPendingCommand(pendingCommandList *queue, pendingCommand *cmd); pendingCommand *popPendingCommandFromHead(pendingCommandList *queue); pendingCommand *popPendingCommandFromTail(pendingCommandList *queue); +void shrinkPendingCommandPool(void); /* Utils */ long long ustime(void); From 1d8c292f349371f25655915759d6af5ac3b1b5c1 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Mon, 20 Oct 2025 11:20:33 +0800 Subject: [PATCH 49/71] Simplify the code --- src/networking.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/src/networking.c b/src/networking.c index a9b1e33d855..50b3fe1eb98 100644 --- a/src/networking.c +++ b/src/networking.c @@ -35,7 +35,6 @@ static inline int _writeToClientNonSlave(client *c, ssize_t *nwritten); static inline int _writeToClientSlave(client *c, ssize_t *nwritten); static pendingCommand *acquirePendingCommand(void); static void reclaimPendingCommand(client *c, pendingCommand *pcmd); -static void expandPendingCommandPool(void); int ProcessingEventsWhileBlocked = 0; /* See processEventsWhileBlocked(). */ __thread sds thread_reusable_qb = NULL; @@ -4950,10 +4949,16 @@ static void reclaimPendingCommand(client *c, pendingCommand *cmd) { /* Try to add to shared pool for reuse if argv isn't too large */ if (!server.io_threads_active && cmd->argv_len < 64) { /* If pool is full but can be expanded, expand it first */ - if (server.cmd_pool.size >= server.cmd_pool.capacity && - server.cmd_pool.capacity < PENDING_COMMAND_POOL_MAX_SIZE) + if (unlikely(server.cmd_pool.size >= server.cmd_pool.capacity && + server.cmd_pool.capacity < PENDING_COMMAND_POOL_MAX_SIZE)) { - expandPendingCommandPool(); + /* Expand the pending command pool capacity by doubling it, up to the maximum size */ + int new_capacity = server.cmd_pool.capacity * 2; + if (new_capacity > PENDING_COMMAND_POOL_MAX_SIZE) + new_capacity = PENDING_COMMAND_POOL_MAX_SIZE; + + server.cmd_pool.pool = zrealloc(server.cmd_pool.pool, sizeof(pendingCommand*) * new_capacity); + server.cmd_pool.capacity = new_capacity; } /* Add to pool if there's space available */ @@ -5083,16 +5088,6 @@ getKeysResult *getClientCachedKeyResult(client *c) { return NULL; } -/* Expand the pending command pool capacity by doubling it, up to the maximum size */ -void expandPendingCommandPool(void) { - int new_capacity = server.cmd_pool.capacity * 2; - if (new_capacity > PENDING_COMMAND_POOL_MAX_SIZE) - new_capacity = PENDING_COMMAND_POOL_MAX_SIZE; - - server.cmd_pool.pool = zrealloc(server.cmd_pool.pool, sizeof(pendingCommand*) * new_capacity); - server.cmd_pool.capacity = new_capacity; -} - void shrinkPendingCommandPool(void) { /* Don't shrink if pool is too small or if it was used recently */ if (server.cmd_pool.capacity <= PENDING_COMMAND_POOL_SIZE) return; From 1cb020a5b3d4238aad93efe836cb80c458c61ed0 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Mon, 20 Oct 2025 11:32:36 +0800 Subject: [PATCH 50/71] indentation --- src/server.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/server.h b/src/server.h index 0f60033c138..5d7283c10f9 100644 --- a/src/server.h +++ b/src/server.h @@ -1219,10 +1219,10 @@ typedef struct pendingCommandList { #define PENDING_COMMAND_POOL_SIZE 16 #define PENDING_COMMAND_POOL_MAX_SIZE 1024 typedef struct pendingCommandPool { - pendingCommand **pool; /* Pool array for reusing pendingCommand objects */ - int size; /* Current number of objects in pool */ - int capacity; /* Current capacity of the pool array */ - int min_size; /* Minimum size since last check (indicates peak usage) */ + pendingCommand **pool; /* Pool array for reusing pendingCommand objects */ + int size; /* Current number of objects in pool */ + int capacity; /* Current capacity of the pool array */ + int min_size; /* Minimum size since last check (indicates peak usage) */ } pendingCommandPool; /* This structure represents a Redis user. This is useful for ACLs, the From 45d3cd434b47f1b96d90a622b75783a1b912f642 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Mon, 20 Oct 2025 15:11:09 +0800 Subject: [PATCH 51/71] code style --- src/memory_prefetch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memory_prefetch.c b/src/memory_prefetch.c index 3ba2c8a9924..0eeb8567f10 100644 --- a/src/memory_prefetch.c +++ b/src/memory_prefetch.c @@ -395,7 +395,7 @@ int addCommandToBatch(client *c) { batch->key_count++; } pcmd = pcmd->next; - } + } return C_OK; } From 7ca5db773468da9e3f6e2b5528785f8f2cc10c95 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Tue, 21 Oct 2025 09:54:57 +0800 Subject: [PATCH 52/71] Defensively prevent the cmd pool from shrinking too small Co-authored-by: moticless --- src/networking.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/networking.c b/src/networking.c index 50b3fe1eb98..aefd74d0756 100644 --- a/src/networking.c +++ b/src/networking.c @@ -5089,11 +5089,12 @@ getKeysResult *getClientCachedKeyResult(client *c) { } void shrinkPendingCommandPool(void) { - /* Don't shrink if pool is too small or if it was used recently */ + /* Don't shrink if pool is too small. */ if (server.cmd_pool.capacity <= PENDING_COMMAND_POOL_SIZE) return; - /* Free commands until we have half the current size */ - int target_size = server.cmd_pool.size / 2; + /* Free commands until we have half the current size, but not below minimum. */ + int target_size = max(server.cmd_pool.size / 2, PENDING_COMMAND_POOL_SIZE); + while (server.cmd_pool.size > target_size) { pendingCommand *cmd = server.cmd_pool.pool[--server.cmd_pool.size]; if (cmd) { From a0674a1b8ac22d18986de366a037dabe52be87f6 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Tue, 21 Oct 2025 11:08:37 +0800 Subject: [PATCH 53/71] Apply suggestion from @ShooterIT Co-authored-by: Yuan Wang --- src/server.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/server.h b/src/server.h index 5d7283c10f9..1af7a64c04e 100644 --- a/src/server.h +++ b/src/server.h @@ -3404,7 +3404,6 @@ void preprocessCommand(client *c, pendingCommand *pcmd); int processCommand(client *c); void commandProcessed(client *c); void prepareForNextCommand(client *c, int update_slot_stats); - int processPendingCommandAndInputBuffer(client *c); int processCommandAndResetClient(client *c); int areCommandKeysInSameSlot(client *c, int *hashslot); From a5eee022d0de414261b72bf3d36c56b50a6d4da3 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Tue, 21 Oct 2025 11:09:20 +0800 Subject: [PATCH 54/71] Apply suggestion from @ShooterIT Co-authored-by: Yuan Wang --- src/server.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.h b/src/server.h index 1af7a64c04e..91f54ccd009 100644 --- a/src/server.h +++ b/src/server.h @@ -2381,7 +2381,7 @@ struct pendingCommand { unsigned long long input_bytes; struct redisCommand *cmd; getKeysResult keys_result; - long long reploff; /* c->reploff should be set to this value when the command is processed */ + long long reploff; /* c->reploff should be set to this value when the command is processed */ int flags; int slot; /* The slot the command is executing against. Set to INVALID_CLUSTER_SLOT if no slot is being used or if the command has a cross slot error */ From b97101d63a063230d65b87d307672c385b8edbe0 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Tue, 21 Oct 2025 11:09:52 +0800 Subject: [PATCH 55/71] Apply suggestion from @ShooterIT Co-authored-by: Yuan Wang --- src/server.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server.h b/src/server.h index 91f54ccd009..4626991394d 100644 --- a/src/server.h +++ b/src/server.h @@ -2383,8 +2383,8 @@ struct pendingCommand { getKeysResult keys_result; long long reploff; /* c->reploff should be set to this value when the command is processed */ int flags; - int slot; /* The slot the command is executing against. Set to INVALID_CLUSTER_SLOT if no slot is being used or if - the command has a cross slot error */ + int slot; /* The slot the command is executing against. Set to INVALID_CLUSTER_SLOT + * if no slot is being used or if the command has a cross slot error */ uint8_t read_error; struct pendingCommand *next; From 8984656cbe91394795f62c8626b6fad986f2aae8 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Tue, 21 Oct 2025 15:21:56 +0800 Subject: [PATCH 56/71] Avoid partial prefetch and incr client if success Co-authored-by: Yuan Wang --- src/iothread.c | 3 ++- src/memory_prefetch.c | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/iothread.c b/src/iothread.c index 3d6c1b1c7e6..48f37802806 100644 --- a/src/iothread.c +++ b/src/iothread.c @@ -354,11 +354,12 @@ int prefetchIOThreadCommands(IOThread *t) { listIter li; listNode *ln; listRewind(mainThreadProcessingClients[t->id], &li); - while((ln = listNext(&li)) && clients++ < to_prefetch) { + while((ln = listNext(&li)) && clients < to_prefetch) { client *c = listNodeValue(ln); /* A single command may contain multiple keys. If the batch is full, * we stop adding clients to it. */ if (addCommandToBatch(c) == C_ERR) break; + clients++; } /* Prefetch the commands in the batch. */ diff --git a/src/memory_prefetch.c b/src/memory_prefetch.c index 0eeb8567f10..75c48558fbc 100644 --- a/src/memory_prefetch.c +++ b/src/memory_prefetch.c @@ -380,6 +380,17 @@ int addCommandToBatch(client *c) { return C_ERR; } + /* Avoid partial prefetching: if the batch already has keys and adding this + * client's ready commands would likely exceed the batch size limit, reject + * the entire client. This is a conservative estimate using command count as + * a proxy for key count to ensure all keys from a client are either fully + * prefetched together or not prefetched at all. */ + if (batch->key_count > 0 && + c->pending_cmds.ready_len + batch->key_count > batch->max_prefetch_size) + { + return C_ERR; + } + batch->clients[batch->client_count++] = c; pendingCommand *pcmd = c->pending_cmds.head; From 3f1c108c8c286a27712e5da739fe1b1101a23905 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Tue, 21 Oct 2025 19:17:34 +0800 Subject: [PATCH 57/71] Fix the missing of handing the CLIENT_READ_CROSS_SLOT read error Co-authored-by: moticless --- src/cluster.c | 5 ++++- src/cluster.h | 3 ++- src/module.c | 2 +- src/script.c | 2 +- src/server.c | 7 ++++++- 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index aaa89332af0..4a747efd704 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1132,7 +1132,9 @@ int extractSlotFromKeysResult(robj **argv, getKeysResult *keys_result) { * * CLUSTER_REDIR_DOWN_STATE and CLUSTER_REDIR_DOWN_RO_STATE if the cluster is * down but the user attempts to execute a command that addresses one or more keys. */ -clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, getKeysResult *keys_result, uint64_t cmd_flags, int *error_code) { +clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, + getKeysResult *keys_result, uint8_t read_error, uint64_t cmd_flags, int *error_code) +{ clusterNode *myself = getMyClusterNode(); clusterNode *n = NULL; robj *firstkey = NULL; @@ -1176,6 +1178,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in mc.argc = argc; mc.cmd = cmd; mc.slot = hashslot ? *hashslot : INVALID_CLUSTER_SLOT; + mc.read_error = read_error; if (keys_result) { mc.keys_result = *keys_result; mc.flags |= PENDING_CMD_KEYRESULT_VALID; diff --git a/src/cluster.h b/src/cluster.h index 888396c4aa7..b636e278de3 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -156,7 +156,8 @@ unsigned int countKeysInSlot(unsigned int slot); int getSlotOrReply(client *c, robj *o); /* functions with shared implementations */ -clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, getKeysResult *result, uint64_t cmd_flags, int *error_code); +clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, + getKeysResult *result, uint8_t read_error, uint64_t cmd_flags, int *error_code); int extractSlotFromKeysResult(robj **argv, getKeysResult *keys_result); int clusterRedirectBlockedClientIfNeeded(client *c); void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code); diff --git a/src/module.c b/src/module.c index 560dac5e9d5..51c5b787d4d 100644 --- a/src/module.c +++ b/src/module.c @@ -6657,7 +6657,7 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch c->flags &= ~(CLIENT_READONLY|CLIENT_ASKING); c->flags |= ctx->client->flags & (CLIENT_READONLY|CLIENT_ASKING); const uint64_t cmd_flags = getCommandFlags(c); - if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,NULL,cmd_flags,&error_code) != + if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,NULL,0,cmd_flags,&error_code) != getMyClusterNode()) { sds msg = NULL; diff --git a/src/script.c b/src/script.c index fbb63852d3d..fb815241a84 100644 --- a/src/script.c +++ b/src/script.c @@ -486,7 +486,7 @@ static int scriptVerifyClusterState(scriptRunCtx *run_ctx, client *c, client *or c->flags |= original_c->flags & (CLIENT_READONLY | CLIENT_ASKING); const uint64_t cmd_flags = getCommandFlags(c); int hashslot = -1; - if (getNodeByQuery(c, c->cmd, c->argv, c->argc, &hashslot, NULL, cmd_flags, &error_code) != getMyClusterNode()) { + if (getNodeByQuery(c, c->cmd, c->argv, c->argc, &hashslot, NULL, 0, cmd_flags, &error_code) != getMyClusterNode()) { if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) { *err = sdsnew( "Script attempted to execute a write command while the " diff --git a/src/server.c b/src/server.c index 1f34b51affe..21b34bf1691 100644 --- a/src/server.c +++ b/src/server.c @@ -4106,6 +4106,11 @@ void preprocessCommand(client *c, pendingCommand *pcmd) { if (num_keys < 0) { /* We skip the checks below since We expect the command to be rejected in this case */ return; + } else if (num_keys > 0) { + /* If the command has keys but the slot is invalid, it means + * there is a cross-slot case. */ + if (pcmd->slot == INVALID_CLUSTER_SLOT) + pcmd->read_error = CLIENT_READ_CROSS_SLOT; } pcmd->flags |= PENDING_CMD_KEYRESULT_VALID; } @@ -4261,7 +4266,7 @@ int processCommand(client *c) { { int error_code; clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc, - &c->slot,getClientCachedKeyResult(c),cmd_flags,&error_code); + &c->slot,getClientCachedKeyResult(c),c->read_error,cmd_flags,&error_code); if (n == NULL || !clusterNodeIsMyself(n)) { if (c->cmd->proc == execCommand) { discardTransaction(c); From 0c8c77ce25eab1c91cea73cf83fecb9db86fd3a3 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Tue, 21 Oct 2025 19:17:53 +0800 Subject: [PATCH 58/71] Update src/networking.c Co-authored-by: Yuan Wang --- src/networking.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index aefd74d0756..09e7dbdbec2 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2979,7 +2979,8 @@ int processInputBuffer(client *c) { /* Keep processing while there is something in the input buffer */ while ((c->querybuf && c->qb_pos < sdslen(c->querybuf)) || - c->pending_cmds.ready_len > 0) { + c->pending_cmds.ready_len > 0) + { /* Immediately abort if the client is in the middle of something. */ if (c->flags & CLIENT_BLOCKED || c->flags & CLIENT_UNBLOCKED) break; From 6d3454395873a19fcfdb845ac0cf8545dea71c1f Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 22 Oct 2025 10:25:11 +0800 Subject: [PATCH 59/71] defer releasing pending command --- src/db.c | 9 +--- src/iothread.c | 2 +- src/networking.c | 120 ++++++++++++++++++++++++++++++++--------------- src/server.h | 4 +- 4 files changed, 85 insertions(+), 50 deletions(-) diff --git a/src/db.c b/src/db.c index c41bd696a9c..c6c9c8a5742 100644 --- a/src/db.c +++ b/src/db.c @@ -552,14 +552,7 @@ static void dbSetValue(redisDb *db, robj *key, robj **valref, dictEntryLink link } } - if (server.io_threads_num > 1 && old->encoding == OBJ_ENCODING_RAW) { - /* In multi-threaded mode, the OBJ_ENCODING_RAW string object usually is - * allocated in the IO thread, so we defer the free to the IO thread. - * Besides, we never free a string object in BIO threads, so, even with - * lazyfree-lazy-server-del enabled, a fallback to main thread freeing - * due to defer free failure doesn't go against the config intention. */ - tryDeferFreeClientObject(server.current_client, old); - } else if (server.lazyfree_lazy_server_del) { + if (server.lazyfree_lazy_server_del) { freeObjAsync(key, old, db->id); } else { decrRefCount(old); diff --git a/src/iothread.c b/src/iothread.c index 48f37802806..3ecc98bf98e 100644 --- a/src/iothread.c +++ b/src/iothread.c @@ -174,7 +174,7 @@ void assignClientToIOThread(client *c) { server.io_threads_clients_num[min_id]++; /* The client running in IO thread needs to have deferred objects array. */ - c->deferred_objects = zmalloc(sizeof(robj*) * CLIENT_MAX_DEFERRED_OBJECTS); + c->deferred_pending_cmds = zmalloc(sizeof(pendingCommand*) * CLIENT_MAX_DEFERRED_OBJECTS); /* Unbind connection of client from main thread event loop, disable read and * write, and then put it in the list, main thread will send these clients diff --git a/src/networking.c b/src/networking.c index 09e7dbdbec2..7e6a61f89de 100644 --- a/src/networking.c +++ b/src/networking.c @@ -174,7 +174,7 @@ client *createClient(connection *conn) { c->current_pending_cmd = NULL; c->original_argc = 0; c->original_argv = NULL; - c->deferred_objects = NULL; + c->deferred_pending_cmds = NULL; c->deferred_objects_num = 0; c->cmd = c->lastcmd = c->realcmd = c->lookedcmd = NULL; c->cur_script = NULL; @@ -1488,31 +1488,31 @@ void acceptCommonHandler(connection *conn, int flags, char *ip) { /* Attempt to defer freeing the object to the IO thread. We usually call this since * we know the object is allocated in the IO thread, to avoid memory arena contention, * and also reducing the load of the main thread. */ -void tryDeferFreeClientObject(client *c, robj *o) { - if (!c || c->tid == IOTHREAD_MAIN_THREAD_ID || o->refcount > 1) { - decrRefCount(o); +void tryDeferFreeClientPendingCommand(client *c, pendingCommand *pcmd) { + if (!c || c->tid == IOTHREAD_MAIN_THREAD_ID) { + freePendingCommand(c, pcmd); return; } /* Put the object in the deferred objects array. */ - if (c->deferred_objects && c->deferred_objects_num < CLIENT_MAX_DEFERRED_OBJECTS) { - c->deferred_objects[c->deferred_objects_num++] = o; + if (c->deferred_pending_cmds && c->deferred_objects_num < CLIENT_MAX_DEFERRED_OBJECTS) { + c->deferred_pending_cmds[c->deferred_objects_num++] = pcmd; } else { - decrRefCount(o); + freePendingCommand(c, pcmd); } } -/* Free the objects in the deferred_objects array. If free_array is true +/* Free the objects in the deferred_pending_cmds array. If free_array is true * then free the array itself as well. */ void freeClientDeferredObjects(client *c, int free_array) { for (int j = 0; j < c->deferred_objects_num; j++) { - decrRefCount(c->deferred_objects[j]); + freePendingCommand(c, c->deferred_pending_cmds[j]); } c->deferred_objects_num = 0; if (free_array) { - zfree(c->deferred_objects); - c->deferred_objects = NULL; + zfree(c->deferred_pending_cmds); + c->deferred_pending_cmds = NULL; } } @@ -1529,13 +1529,8 @@ void freeClientOriginalArgv(client *c) { static inline void freeClientArgvInternal(client *c, int free_argv) { int j; - if (c->tid == IOTHREAD_MAIN_THREAD_ID) { - for (j = 0; j < c->argc; j++) - decrRefCount(c->argv[j]); - } else { - for (j = 0; j < c->argc; j++) - tryDeferFreeClientObject(c, c->argv[j]); - } + for (j = 0; j < c->argc; j++) + decrRefCount(c->argv[j]); c->argc = 0; c->cmd = NULL; c->lookedcmd = NULL; @@ -4942,35 +4937,54 @@ static pendingCommand *acquirePendingCommand(void) { return pcmd; } +/* Try to expand the pending command pool capacity. + * Returns 1 if expansion succeeded or wasn't needed, 0 if expansion failed. */ +static int tryExpandPendingCommandPool(void) { + /* Check if expansion is needed */ + if (server.cmd_pool.size < server.cmd_pool.capacity) { + return 1; /* No expansion needed */ + } + + /* Check if we can expand further */ + if (server.cmd_pool.capacity >= PENDING_COMMAND_POOL_MAX_SIZE) { + return 0; /* Already at maximum capacity */ + } + + /* Expand the pending command pool capacity by doubling it, up to the maximum size */ + int new_capacity = server.cmd_pool.capacity * 2; + if (new_capacity > PENDING_COMMAND_POOL_MAX_SIZE) + new_capacity = PENDING_COMMAND_POOL_MAX_SIZE; + + server.cmd_pool.pool = zrealloc(server.cmd_pool.pool, sizeof(pendingCommand*) * new_capacity); + server.cmd_pool.capacity = new_capacity; + return 1; /* Expansion succeeded */ +} + /* Reclaim a pending command by adding it to the shared pool for reuse or freeing it. * The shared pool is only used when IO threads are inactive to avoid race conditions * between multiple clients. Additionally, pool reuse provides minimal benefit in * multi-threaded scenarios, so we only use it in single-threaded mode. */ static void reclaimPendingCommand(client *c, pendingCommand *cmd) { - /* Try to add to shared pool for reuse if argv isn't too large */ - if (!server.io_threads_active && cmd->argv_len < 64) { - /* If pool is full but can be expanded, expand it first */ - if (unlikely(server.cmd_pool.size >= server.cmd_pool.capacity && - server.cmd_pool.capacity < PENDING_COMMAND_POOL_MAX_SIZE)) - { - /* Expand the pending command pool capacity by doubling it, up to the maximum size */ - int new_capacity = server.cmd_pool.capacity * 2; - if (new_capacity > PENDING_COMMAND_POOL_MAX_SIZE) - new_capacity = PENDING_COMMAND_POOL_MAX_SIZE; - - server.cmd_pool.pool = zrealloc(server.cmd_pool.pool, sizeof(pendingCommand*) * new_capacity); - server.cmd_pool.capacity = new_capacity; - } + if (!server.io_threads_active) { + /* Try to add to shared pool for reuse if argv isn't too large */ + if (likely(cmd->argv_len < 64)) { + /* Check if pool needs expansion before attempting to add */ + if (!tryExpandPendingCommandPool()) { + /* Pool is at maximum capacity, can't expand further */ + goto free_command; + } - /* Add to pool if there's space available */ - if (server.cmd_pool.size < server.cmd_pool.capacity) { + /* Clean up command resources before adding to pool */ for (int j = 0; j < cmd->argc; j++) decrRefCount(cmd->argv[j]); getKeysFreeResult(&cmd->keys_result); - serverAssert(c->all_argv_len_sum >= cmd->argv_len_sum); /* assert this doesn't try to go negative */ - c->all_argv_len_sum -= cmd->argv_len_sum; + if (c) { + serverAssert(c->all_argv_len_sum >= cmd->argv_len_sum); /* assert this doesn't try to go negative */ + c->all_argv_len_sum -= cmd->argv_len_sum; + cmd->argv_len_sum = 0; + } /* Reset the pending command while preserving the argv array for shared pool reuse */ robj **argv = cmd->argv; @@ -4983,9 +4997,33 @@ static void reclaimPendingCommand(client *c, pendingCommand *cmd) { server.cmd_pool.pool[server.cmd_pool.size++] = cmd; return; /* Successfully added to shared pool for reuse */ } + } else { + /* IO threads are active, handle thread-specific cleanup */ + if (c && c->tid != IOTHREAD_MAIN_THREAD_ID) { + /* Partial cleanup for IO thread commands to avoid race issues. + * To avoid robj that may already be referenced elsewhere, we should + * decrease the reference count to release our reference to it. */ + for (int j = 0; j < cmd->argc; j++) { + robj *o = cmd->argv[j]; + if (o && o->refcount > 1) { + decrRefCount(o); + cmd->argv[j] = NULL; + } + } + + if (c) { + serverAssert(c->all_argv_len_sum >= cmd->argv_len_sum); /* assert this doesn't try to go negative */ + c->all_argv_len_sum -= cmd->argv_len_sum; + cmd->argv_len_sum = 0; + } + + tryDeferFreeClientPendingCommand(c, cmd); + return; + } } - /* Shared pool is full or IO threads are active, free this pending command */ +free_command: + /* Shared pool is full or command argv is too large, free this pending command */ freePendingCommand(c, cmd); } @@ -5001,8 +5039,12 @@ void freePendingCommand(client *c, pendingCommand *pcmd) { getKeysFreeResult(&pcmd->keys_result); if (pcmd->argv) { - for (int j = 0; j < pcmd->argc; j++) - decrRefCount(pcmd->argv[j]); + for (int j = 0; j < pcmd->argc; j++) { + robj *o = pcmd->argv[j]; + if (!o) continue; /* TODO */ + decrRefCount(o); + } + zfree(pcmd->argv); /* c may be NULL when called from reclaimPendingCommand */ diff --git a/src/server.h b/src/server.h index 4626991394d..283056adce4 100644 --- a/src/server.h +++ b/src/server.h @@ -1367,7 +1367,7 @@ typedef struct client { size_t all_argv_len_sum; /* Sum of lengths of objects in all pendingCommand argv lists */ pendingCommandList pending_cmds; /* List of parsed pending commands */ pendingCommand *current_pending_cmd; - robj **deferred_objects; /* Array of deferred objects to free. */ + pendingCommand **deferred_pending_cmds; /* Array of deferred pending commands to free. */ int deferred_objects_num; /* Number of deferred objects to free. */ struct redisCommand *cmd, *lastcmd; /* Last command executed. */ struct redisCommand *lookedcmd; /* Command looked up in lookahead. */ @@ -2889,7 +2889,7 @@ void resetClientQbufState(client *c); void freeClientOriginalArgv(client *c); void freeClientArgv(client *c); void freeClientPendingCommands(client *c, int num_pcmds_to_free); -void tryDeferFreeClientObject(client *c, robj *o); +void tryDeferFreeClientPendingCommand(client *c, pendingCommand *pcmd); void freeClientDeferredObjects(client *c, int free_array); void sendReplyToClient(connection *conn); void *addReplyDeferredLen(client *c); From e6f606c7f03f8d9913ae51d3725b6d5362bb8807 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 22 Oct 2025 12:49:48 +0800 Subject: [PATCH 60/71] Refine --- src/cluster.c | 11 ++++++----- src/memory_prefetch.c | 2 +- src/networking.c | 2 +- src/server.c | 2 +- src/server.h | 2 +- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 4a747efd704..20dad9e3725 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1181,7 +1181,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in mc.read_error = read_error; if (keys_result) { mc.keys_result = *keys_result; - mc.flags |= PENDING_CMD_KEYRESULT_VALID; + mc.flags |= PENDING_CMD_KEYS_RESULT_VALID; } } @@ -1206,8 +1206,9 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in pubsubshard_included = 1; } + int use_cache_keys_result = pcmd->flags & PENDING_CMD_KEYS_RESULT_VALID; getKeysResult result = GETKEYS_RESULT_INIT; - if (likely(pcmd->flags & PENDING_CMD_KEYRESULT_VALID)) + if (use_cache_keys_result) result = pcmd->keys_result; else getKeysFromCommand(mcmd,margv,margc,&result); @@ -1239,7 +1240,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in * not trapped earlier in processCommand(). Report the same * error to the client. */ if (n == NULL) { - if (!keys_result) getKeysFreeResult(&result); + if (!use_cache_keys_result) getKeysFreeResult(&result); if (error_code) *error_code = CLUSTER_REDIR_DOWN_UNBOUND; return NULL; @@ -1262,7 +1263,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in * the same key/channel as the first we saw. */ if (slot != thisslot) { /* Error: multiple keys from different slots. */ - if (!keys_result) getKeysFreeResult(&result); + if (!use_cache_keys_result) getKeysFreeResult(&result); if (error_code) *error_code = CLUSTER_REDIR_CROSS_SLOT; return NULL; @@ -1287,7 +1288,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in else existing_keys++; } } - if (!keys_result) getKeysFreeResult(&result); + if (!use_cache_keys_result) getKeysFreeResult(&result); } /* No key at all in command? then we can serve the request diff --git a/src/memory_prefetch.c b/src/memory_prefetch.c index 75c48558fbc..651312d5bc9 100644 --- a/src/memory_prefetch.c +++ b/src/memory_prefetch.c @@ -398,7 +398,7 @@ int addCommandToBatch(client *c) { /* Skip commands that have not been preprocessed, or have errors. */ if ((pcmd->flags & PENDING_CMD_FLAG_INCOMPLETE) || !pcmd->cmd || pcmd->read_error) break; - serverAssert(pcmd->flags & PENDING_CMD_KEYRESULT_VALID); + serverAssert(pcmd->flags & PENDING_CMD_KEYS_RESULT_VALID); for (int i = 0; i < pcmd->keys_result.numkeys && batch->key_count < batch->max_prefetch_size; i++) { batch->keys[batch->key_count] = pcmd->argv[pcmd->keys_result.keys[i].pos]; batch->keys_dicts[batch->key_count] = diff --git a/src/networking.c b/src/networking.c index 7e6a61f89de..c5bc8d8ecdb 100644 --- a/src/networking.c +++ b/src/networking.c @@ -5125,7 +5125,7 @@ getKeysResult *getClientCachedKeyResult(client *c) { } /* Return cached result if available */ - if (pcmd->flags & PENDING_CMD_KEYRESULT_VALID) + if (pcmd->flags & PENDING_CMD_KEYS_RESULT_VALID) return &c->current_pending_cmd->keys_result; } return NULL; diff --git a/src/server.c b/src/server.c index 21b34bf1691..ad9086f961e 100644 --- a/src/server.c +++ b/src/server.c @@ -4112,7 +4112,7 @@ void preprocessCommand(client *c, pendingCommand *pcmd) { if (pcmd->slot == INVALID_CLUSTER_SLOT) pcmd->read_error = CLIENT_READ_CROSS_SLOT; } - pcmd->flags |= PENDING_CMD_KEYRESULT_VALID; + pcmd->flags |= PENDING_CMD_KEYS_RESULT_VALID; } /* If this function gets called we already read a whole diff --git a/src/server.h b/src/server.h index 283056adce4..5de1dfdedbf 100644 --- a/src/server.h +++ b/src/server.h @@ -2369,7 +2369,7 @@ typedef struct { enum { PENDING_CMD_FLAG_INCOMPLETE = 1 << 0, /* Command parsing is incomplete, still waiting for more data */ PENDING_CMD_FLAG_PREPROCESSED = 1 << 1, /* This command has passed pre-processing */ - PENDING_CMD_KEYRESULT_VALID = 1 << 2, /* Command's keys_result is valid and cached */ + PENDING_CMD_KEYS_RESULT_VALID = 1 << 2, /* Command's keys_result is valid and cached */ }; /* Parser state and parse result of a command from a client's input buffer. */ From 8f7041e61a012576c340a9a3de2be923d7cfcfb5 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 22 Oct 2025 13:03:13 +0800 Subject: [PATCH 61/71] Add comment --- src/cluster.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cluster.c b/src/cluster.c index 20dad9e3725..17a85f3c1ff 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1206,6 +1206,8 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in pubsubshard_included = 1; } + /* If we have a cached keys result from preprocessCommand(), use it. + * Otherwise, extract keys result. */ int use_cache_keys_result = pcmd->flags & PENDING_CMD_KEYS_RESULT_VALID; getKeysResult result = GETKEYS_RESULT_INIT; if (use_cache_keys_result) From f7c8e38f4e41dd4164cbfe6d4d460bcf5f649ed5 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 22 Oct 2025 14:00:46 +0800 Subject: [PATCH 62/71] Add test for CROSSSLOT Keys --- tests/unit/cluster/misc.tcl | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/unit/cluster/misc.tcl b/tests/unit/cluster/misc.tcl index cd66697c498..62bdcf7db4e 100644 --- a/tests/unit/cluster/misc.tcl +++ b/tests/unit/cluster/misc.tcl @@ -22,5 +22,15 @@ start_cluster 2 2 {tags {external:skip cluster}} { R 0 flushall assert_equal {OK} [R 0 CLUSTER flushslots] } -} + test "CROSSSLOT error for keys in different slots" { + # Test MSET with keys in different slots + assert_error {*CROSSSLOT Keys in request don't hash to the same slot*} {R 0 MSET foo bar baz qux} + + # Test DEL with keys in different slots + assert_error {*CROSSSLOT Keys in request don't hash to the same slot*} {R 0 DEL foo bar} + + # Test MGET with keys in different slots + assert_error {*CROSSSLOT Keys in request don't hash to the same slot*} {R 0 MGET foo bar} + } +} From a538c224084be45b665ee108d4914eab4745f506 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 22 Oct 2025 15:04:05 +0800 Subject: [PATCH 63/71] Add test for the expansion and shrinking of pending command pool --- src/networking.c | 2 ++ tests/unit/networking.tcl | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/src/networking.c b/src/networking.c index c5bc8d8ecdb..354e81b014b 100644 --- a/src/networking.c +++ b/src/networking.c @@ -5146,6 +5146,8 @@ void shrinkPendingCommandPool(void) { } } + int old_capacity = server.cmd_pool.capacity; server.cmd_pool.capacity = target_size; server.cmd_pool.pool = zrealloc(server.cmd_pool.pool, sizeof(pendingCommand*) * target_size); + serverLog(LL_DEBUG, "Shrunk pending command pool: capacity %d->%d", old_capacity, server.cmd_pool.capacity); } diff --git a/tests/unit/networking.tcl b/tests/unit/networking.tcl index 4f63f4e012a..5f425a9c066 100644 --- a/tests/unit/networking.tcl +++ b/tests/unit/networking.tcl @@ -375,3 +375,38 @@ start_server {tags {"timeout external:skip"}} { assert_equal "PONG" [r ping] } } + +test {Pending command pool expansion and shrinking} { + start_server {overrides {loglevel debug}} { + set rd1 [redis_deferring_client] + set rd2 [redis_deferring_client] + + # Client1 sends 16 commands in pipeline, ans was blocked at the first command + set buf "" + append buf "blpop mylist 0\r\n" + for {set i 1} {$i < 16} {incr i} { + append buf "set key$i value$i\r\n" + } + $rd1 write $buf + $rd1 flush + wait_for_blocked_clients_count 1 + + # Client2 sends 1 command, this will trigger pending command pool expansion + # from 16 to 32 since A client has used up all 16 commands in the command pool. + $rd2 set bkey bvalue + assert_equal {OK} [$rd2 read] + + # Unblock client1, allowing it to return all pending commands back to the pool. + r lpush mylist unblock_value + assert_equal {mylist unblock_value} [$rd1 read] + for {set i 1} {$i < 16} {incr i} { + assert_equal {OK} [$rd1 read] + } + + # Wait for the pending command pool to shrink back to 16 due to low utilization. + wait_for_log_messages 0 {"*Shrunk pending command pool: capacity 32->16*"} 0 10 1000 + + $rd1 close + $rd2 close + } +} From b92cce8086ae85df3fa5a5435e8f16ca9b3b6252 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 22 Oct 2025 15:11:12 +0800 Subject: [PATCH 64/71] Fix spell and test failure --- tests/unit/networking.tcl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/networking.tcl b/tests/unit/networking.tcl index 5f425a9c066..accd64fa697 100644 --- a/tests/unit/networking.tcl +++ b/tests/unit/networking.tcl @@ -377,11 +377,11 @@ start_server {tags {"timeout external:skip"}} { } test {Pending command pool expansion and shrinking} { - start_server {overrides {loglevel debug}} { + start_server {overrides {loglevel debug} tags {external:skip}} { set rd1 [redis_deferring_client] set rd2 [redis_deferring_client] - # Client1 sends 16 commands in pipeline, ans was blocked at the first command + # Client1 sends 16 commands in pipeline, and was blocked at the first command set buf "" append buf "blpop mylist 0\r\n" for {set i 1} {$i < 16} {incr i} { From 0779f1f8f51787b11f5a200b9695b1240e9e7be1 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 22 Oct 2025 22:58:35 +0800 Subject: [PATCH 65/71] eliminate CLIENT_IN_MEMORY_PREFETCH --- src/iothread.c | 13 +++++++------ src/networking.c | 4 +--- src/server.h | 1 - 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/iothread.c b/src/iothread.c index 21854145b72..a2a0e51f9c9 100644 --- a/src/iothread.c +++ b/src/iothread.c @@ -424,10 +424,13 @@ int processClientsFromIOThread(IOThread *t) { listNode *node = NULL; while (listLength(mainThreadProcessingClients[t->id])) { - /* Prefetch the commands if no clients in the batch. */ - if (prefetch_clients <= 0) prefetch_clients = prefetchIOThreadCommands(t); - /* Reset the prefetching batch if we have processed all clients. */ - if (--prefetch_clients <= 0) resetCommandsBatch(); + if (prefetch_clients <= 0) { + /* Reset the prefetching batch if we have processed all clients. */ + resetCommandsBatch(); + /* Prefetch the commands if no clients in the batch. */ + prefetch_clients = prefetchIOThreadCommands(t); + } + prefetch_clients--; /* Each time we pop up only the first client to process to guarantee * reentrancy safety. */ @@ -469,12 +472,10 @@ int processClientsFromIOThread(IOThread *t) { /* Process the pending command and input buffer. */ if (!isClientReadErrorFatal(c) && c->io_flags & CLIENT_IO_PENDING_COMMAND) { c->flags |= CLIENT_PENDING_COMMAND; - c->flags |= CLIENT_IN_MEMORY_PREFETCH; if (processPendingCommandAndInputBuffer(c) == C_ERR) { /* If the client is no longer valid, it must be freed safely. */ continue; } - c->flags &= ~CLIENT_IN_MEMORY_PREFETCH; } /* We may have pending replies if io thread may not finish writing diff --git a/src/networking.c b/src/networking.c index 78ee72b4d5c..443fa7b7af1 100644 --- a/src/networking.c +++ b/src/networking.c @@ -3084,9 +3084,7 @@ int processInputBuffer(client *c) { /* Prefetch the command only when more commands have been parsed and we * are in the main thread. If running in an IO thread, prefetch will be * deferred until the client is processed by the main thread. */ - if (parse_more && c->running_tid == IOTHREAD_MAIN_THREAD_ID && - !(c->flags & CLIENT_IN_MEMORY_PREFETCH)) - { + if (parse_more && c->running_tid == IOTHREAD_MAIN_THREAD_ID) { /* Prefetch the commands. */ resetCommandsBatch(); addCommandToBatch(c); diff --git a/src/server.h b/src/server.h index c9b6dcac5a9..852cbd658db 100644 --- a/src/server.h +++ b/src/server.h @@ -436,7 +436,6 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; #define CLIENT_INTERNAL (1ULL<<52) /* Internal client connection */ #define CLIENT_ASM_MIGRATING (1ULL<<53) /* Client is migrating RDB/stream data during atomic slot migration. */ #define CLIENT_ASM_IMPORTING (1ULL<<54) /* Client is importing RDB/stream data during atomic slot migration. */ -#define CLIENT_IN_MEMORY_PREFETCH (1ULL<<53) /* The client is in the prefetching batch to avoid nested prefetch. */ /* Any flag that does not let optimize FLUSH SYNC to run it in bg as blocking client ASYNC */ #define CLIENT_AVOID_BLOCKING_ASYNC_FLUSH (CLIENT_DENY_BLOCKING|CLIENT_MULTI|CLIENT_LUA_DEBUG|CLIENT_LUA_DEBUG_SYNC|CLIENT_MODULE) From 1804bbc9e904a4e814d91a705a165052b0fd6be4 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 22 Oct 2025 23:23:26 +0800 Subject: [PATCH 66/71] General deferred free object --- src/db.c | 9 +++++- src/iothread.c | 2 +- src/networking.c | 78 ++++++++++++++++++++++++++++-------------------- src/server.h | 14 +++++++-- 4 files changed, 66 insertions(+), 37 deletions(-) diff --git a/src/db.c b/src/db.c index 9feb35edbb3..f82729d3b3b 100644 --- a/src/db.c +++ b/src/db.c @@ -561,7 +561,14 @@ static void dbSetValue(redisDb *db, robj *key, robj **valref, dictEntryLink link } } - if (server.lazyfree_lazy_server_del) { + if (server.io_threads_num > 1 && old->encoding == OBJ_ENCODING_RAW) { + /* In multi-threaded mode, the OBJ_ENCODING_RAW string object usually is + * allocated in the IO thread, so we defer the free to the IO thread. + * Besides, we never free a string object in BIO threads, so, even with + * lazyfree-lazy-server-del enabled, a fallback to main thread freeing + * due to defer free failure doesn't go against the config intention. */ + tryDeferFreeClientObject(server.current_client, DEFERRED_OBJECT_TYPE_ROBJ, old); + } else if (server.lazyfree_lazy_server_del) { freeObjAsync(key, old, db->id); } else { decrRefCount(old); diff --git a/src/iothread.c b/src/iothread.c index a2a0e51f9c9..81014b3d0ca 100644 --- a/src/iothread.c +++ b/src/iothread.c @@ -175,7 +175,7 @@ void assignClientToIOThread(client *c) { server.io_threads_clients_num[min_id]++; /* The client running in IO thread needs to have deferred objects array. */ - c->deferred_pending_cmds = zmalloc(sizeof(pendingCommand*) * CLIENT_MAX_DEFERRED_OBJECTS); + c->deferred_objects = zmalloc(sizeof(deferredObject) * CLIENT_MAX_DEFERRED_OBJECTS); /* Unbind connection of client from main thread event loop, disable read and * write, and then put it in the list, main thread will send these clients diff --git a/src/networking.c b/src/networking.c index 443fa7b7af1..ff4dfb9c548 100644 --- a/src/networking.c +++ b/src/networking.c @@ -175,7 +175,7 @@ client *createClient(connection *conn) { c->current_pending_cmd = NULL; c->original_argc = 0; c->original_argv = NULL; - c->deferred_pending_cmds = NULL; + c->deferred_objects = NULL; c->deferred_objects_num = 0; c->cmd = c->lastcmd = c->realcmd = c->lookedcmd = NULL; c->cur_script = NULL; @@ -1493,20 +1493,31 @@ void acceptCommonHandler(connection *conn, int flags, char *ip) { } } +static void freeClientDeferredObject(client *c, int type, void *ptr) { + if (type == DEFERRED_OBJECT_TYPE_PENDING_COMMAND) { + freePendingCommand(c, ptr); + } else if (type == DEFERRED_OBJECT_TYPE_ROBJ) { + decrRefCount(ptr); + } else { + serverPanic("Unknown deferred object type"); + } +} + /* Attempt to defer freeing the object to the IO thread. We usually call this since * we know the object is allocated in the IO thread, to avoid memory arena contention, * and also reducing the load of the main thread. */ -void tryDeferFreeClientPendingCommand(client *c, pendingCommand *pcmd) { +void tryDeferFreeClientObject(client *c, int type, void *ptr) { if (!c || c->tid == IOTHREAD_MAIN_THREAD_ID) { - freePendingCommand(c, pcmd); + freeClientDeferredObject(c, type, ptr); return; } /* Put the object in the deferred objects array. */ - if (c->deferred_pending_cmds && c->deferred_objects_num < CLIENT_MAX_DEFERRED_OBJECTS) { - c->deferred_pending_cmds[c->deferred_objects_num++] = pcmd; + if (c->deferred_objects && c->deferred_objects_num < CLIENT_MAX_DEFERRED_OBJECTS) { + c->deferred_objects[c->deferred_objects_num++].type = type; + c->deferred_objects[c->deferred_objects_num++].ptr = ptr; } else { - freePendingCommand(c, pcmd); + freeClientDeferredObject(c, type, ptr); } } @@ -1514,13 +1525,14 @@ void tryDeferFreeClientPendingCommand(client *c, pendingCommand *pcmd) { * then free the array itself as well. */ void freeClientDeferredObjects(client *c, int free_array) { for (int j = 0; j < c->deferred_objects_num; j++) { - freePendingCommand(c, c->deferred_pending_cmds[j]); + deferredObject *obj = &c->deferred_objects[j]; + freeClientDeferredObject(c, obj->type, obj->ptr); } c->deferred_objects_num = 0; if (free_array) { - zfree(c->deferred_pending_cmds); - c->deferred_pending_cmds = NULL; + zfree(c->deferred_objects); + c->deferred_objects = NULL; } } @@ -5005,10 +5017,10 @@ static int tryExpandPendingCommandPool(void) { * The shared pool is only used when IO threads are inactive to avoid race conditions * between multiple clients. Additionally, pool reuse provides minimal benefit in * multi-threaded scenarios, so we only use it in single-threaded mode. */ -static void reclaimPendingCommand(client *c, pendingCommand *cmd) { +static void reclaimPendingCommand(client *c, pendingCommand *pcmd) { if (!server.io_threads_active) { /* Try to add to shared pool for reuse if argv isn't too large */ - if (likely(cmd->argv_len < 64)) { + if (likely(pcmd->argv_len < 64)) { /* Check if pool needs expansion before attempting to add */ if (!tryExpandPendingCommandPool()) { /* Pool is at maximum capacity, can't expand further */ @@ -5016,26 +5028,26 @@ static void reclaimPendingCommand(client *c, pendingCommand *cmd) { } /* Clean up command resources before adding to pool */ - for (int j = 0; j < cmd->argc; j++) - decrRefCount(cmd->argv[j]); + for (int j = 0; j < pcmd->argc; j++) + decrRefCount(pcmd->argv[j]); - getKeysFreeResult(&cmd->keys_result); + getKeysFreeResult(&pcmd->keys_result); if (c) { - serverAssert(c->all_argv_len_sum >= cmd->argv_len_sum); /* assert this doesn't try to go negative */ - c->all_argv_len_sum -= cmd->argv_len_sum; - cmd->argv_len_sum = 0; + serverAssert(c->all_argv_len_sum >= pcmd->argv_len_sum); /* assert this doesn't try to go negative */ + c->all_argv_len_sum -= pcmd->argv_len_sum; + pcmd->argv_len_sum = 0; } /* Reset the pending command while preserving the argv array for shared pool reuse */ - robj **argv = cmd->argv; - int argv_len = cmd->argv_len; - memset(cmd, 0, sizeof(pendingCommand)); - cmd->argv = argv; - cmd->argv_len = argv_len; - cmd->slot = INVALID_CLUSTER_SLOT; - - server.cmd_pool.pool[server.cmd_pool.size++] = cmd; + robj **argv = pcmd->argv; + int argv_len = pcmd->argv_len; + memset(pcmd, 0, sizeof(pendingCommand)); + pcmd->argv = argv; + pcmd->argv_len = argv_len; + pcmd->slot = INVALID_CLUSTER_SLOT; + + server.cmd_pool.pool[server.cmd_pool.size++] = pcmd; return; /* Successfully added to shared pool for reuse */ } } else { @@ -5044,28 +5056,28 @@ static void reclaimPendingCommand(client *c, pendingCommand *cmd) { /* Partial cleanup for IO thread commands to avoid race issues. * To avoid robj that may already be referenced elsewhere, we should * decrease the reference count to release our reference to it. */ - for (int j = 0; j < cmd->argc; j++) { - robj *o = cmd->argv[j]; + for (int j = 0; j < pcmd->argc; j++) { + robj *o = pcmd->argv[j]; if (o && o->refcount > 1) { decrRefCount(o); - cmd->argv[j] = NULL; + pcmd->argv[j] = NULL; } } if (c) { - serverAssert(c->all_argv_len_sum >= cmd->argv_len_sum); /* assert this doesn't try to go negative */ - c->all_argv_len_sum -= cmd->argv_len_sum; - cmd->argv_len_sum = 0; + serverAssert(c->all_argv_len_sum >= pcmd->argv_len_sum); /* assert this doesn't try to go negative */ + c->all_argv_len_sum -= pcmd->argv_len_sum; + pcmd->argv_len_sum = 0; } - tryDeferFreeClientPendingCommand(c, cmd); + tryDeferFreeClientObject(c, DEFERRED_OBJECT_TYPE_PENDING_COMMAND, pcmd); return; } } free_command: /* Shared pool is full or command argv is too large, free this pending command */ - freePendingCommand(c, cmd); + freePendingCommand(c, pcmd); } void initPendingCommand(pendingCommand *pcmd) { diff --git a/src/server.h b/src/server.h index 852cbd658db..b03b35682c1 100644 --- a/src/server.h +++ b/src/server.h @@ -1321,6 +1321,16 @@ typedef struct { size_t mem_usage_sum; } clientMemUsageBucket; +#define DEFERRED_OBJECT_TYPE_PENDING_COMMAND 1 +#define DEFERRED_OBJECT_TYPE_ROBJ 2 +/* Structure to hold objects that need to be freed later by IO threads. + * This allows the main thread to defer memory cleanup operations to + * IO threads to avoid blocking the main event loop. */ +typedef struct deferredObject { + int type; /* Pointer to the object to be freed */ + void *ptr; /* Type of object: DEFERRED_OBJECT_TYPE_* */ +} deferredObject; + #define SHOULD_CLUSTER_COMPATIBILITY_SAMPLE() \ (server.cluster_compatibility_sample_ratio == 100 || \ (double)rand()/RAND_MAX * 100 < server.cluster_compatibility_sample_ratio) @@ -1374,7 +1384,7 @@ typedef struct client { size_t all_argv_len_sum; /* Sum of lengths of objects in all pendingCommand argv lists */ pendingCommandList pending_cmds; /* List of parsed pending commands */ pendingCommand *current_pending_cmd; - pendingCommand **deferred_pending_cmds; /* Array of deferred pending commands to free. */ + deferredObject *deferred_objects; /* Array of deferred objects to free. */ int deferred_objects_num; /* Number of deferred objects to free. */ struct redisCommand *cmd, *lastcmd; /* Last command executed. */ struct redisCommand *lookedcmd; /* Command looked up in lookahead. */ @@ -2915,7 +2925,7 @@ void resetClientQbufState(client *c); void freeClientOriginalArgv(client *c); void freeClientArgv(client *c); void freeClientPendingCommands(client *c, int num_pcmds_to_free); -void tryDeferFreeClientPendingCommand(client *c, pendingCommand *pcmd); +void tryDeferFreeClientObject(client *c, int type, void *ptr); void freeClientDeferredObjects(client *c, int free_array); void sendReplyToClient(connection *conn); void *addReplyDeferredLen(client *c); From 7b976d8f3c9dbe37a91d9bd478a63bda6af96ea3 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 22 Oct 2025 23:30:26 +0800 Subject: [PATCH 67/71] Fix wrongly c->deferred_objects_num++ --- src/networking.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/networking.c b/src/networking.c index ff4dfb9c548..458823b547e 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1499,7 +1499,7 @@ static void freeClientDeferredObject(client *c, int type, void *ptr) { } else if (type == DEFERRED_OBJECT_TYPE_ROBJ) { decrRefCount(ptr); } else { - serverPanic("Unknown deferred object type"); + serverPanic("Unknown deferred object type: %d", type); } } @@ -1514,8 +1514,9 @@ void tryDeferFreeClientObject(client *c, int type, void *ptr) { /* Put the object in the deferred objects array. */ if (c->deferred_objects && c->deferred_objects_num < CLIENT_MAX_DEFERRED_OBJECTS) { - c->deferred_objects[c->deferred_objects_num++].type = type; - c->deferred_objects[c->deferred_objects_num++].ptr = ptr; + c->deferred_objects[c->deferred_objects_num].type = type; + c->deferred_objects[c->deferred_objects_num].ptr = ptr; + c->deferred_objects_num++; } else { freeClientDeferredObject(c, type, ptr); } From f15646034d5a2b20d90123d56061c9f68bfefe7e Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 22 Oct 2025 23:31:46 +0800 Subject: [PATCH 68/71] unnecessary change --- src/blocked.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/blocked.c b/src/blocked.c index adef73f6018..4f518c9a5a4 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -202,6 +202,7 @@ void unblockClient(client *c, int queue_for_reprocessing) { serverPanic("Unknown btype in unblockClient()."); } + /* Clear the flags, and put the client in the unblocked list so that * we'll process new commands in its query buffer ASAP. */ if (!(c->flags & CLIENT_MODULE)) server.blocked_clients--; /* We count blocked client stats on regular clients and not on module clients */ From 6a5d7eba0d4a9ab9f113c72cb8843646130fa6bc Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 22 Oct 2025 23:33:02 +0800 Subject: [PATCH 69/71] code style --- src/db.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/db.c b/src/db.c index f82729d3b3b..902f49a0422 100644 --- a/src/db.c +++ b/src/db.c @@ -563,10 +563,10 @@ static void dbSetValue(redisDb *db, robj *key, robj **valref, dictEntryLink link if (server.io_threads_num > 1 && old->encoding == OBJ_ENCODING_RAW) { /* In multi-threaded mode, the OBJ_ENCODING_RAW string object usually is - * allocated in the IO thread, so we defer the free to the IO thread. - * Besides, we never free a string object in BIO threads, so, even with - * lazyfree-lazy-server-del enabled, a fallback to main thread freeing - * due to defer free failure doesn't go against the config intention. */ + * allocated in the IO thread, so we defer the free to the IO thread. + * Besides, we never free a string object in BIO threads, so, even with + * lazyfree-lazy-server-del enabled, a fallback to main thread freeing + * due to defer free failure doesn't go against the config intention. */ tryDeferFreeClientObject(server.current_client, DEFERRED_OBJECT_TYPE_ROBJ, old); } else if (server.lazyfree_lazy_server_del) { freeObjAsync(key, old, db->id); From 0d34e6a2c6cf8904b66f2a912eaf53c705da0ca1 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Wed, 22 Oct 2025 23:35:08 +0800 Subject: [PATCH 70/71] Rename freeClientDeferredObject to freeDeferredObject to avoid confusion --- src/networking.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/networking.c b/src/networking.c index 458823b547e..148e9ec54b8 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1493,7 +1493,7 @@ void acceptCommonHandler(connection *conn, int flags, char *ip) { } } -static void freeClientDeferredObject(client *c, int type, void *ptr) { +static void freeDeferredObject(client *c, int type, void *ptr) { if (type == DEFERRED_OBJECT_TYPE_PENDING_COMMAND) { freePendingCommand(c, ptr); } else if (type == DEFERRED_OBJECT_TYPE_ROBJ) { @@ -1508,7 +1508,7 @@ static void freeClientDeferredObject(client *c, int type, void *ptr) { * and also reducing the load of the main thread. */ void tryDeferFreeClientObject(client *c, int type, void *ptr) { if (!c || c->tid == IOTHREAD_MAIN_THREAD_ID) { - freeClientDeferredObject(c, type, ptr); + freeDeferredObject(c, type, ptr); return; } @@ -1518,7 +1518,7 @@ void tryDeferFreeClientObject(client *c, int type, void *ptr) { c->deferred_objects[c->deferred_objects_num].ptr = ptr; c->deferred_objects_num++; } else { - freeClientDeferredObject(c, type, ptr); + freeDeferredObject(c, type, ptr); } } @@ -1527,7 +1527,7 @@ void tryDeferFreeClientObject(client *c, int type, void *ptr) { void freeClientDeferredObjects(client *c, int free_array) { for (int j = 0; j < c->deferred_objects_num; j++) { deferredObject *obj = &c->deferred_objects[j]; - freeClientDeferredObject(c, obj->type, obj->ptr); + freeDeferredObject(c, obj->type, obj->ptr); } c->deferred_objects_num = 0; From 1a874f14d19ec26b502bcba9bb6b501f0a7295f3 Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 23 Oct 2025 00:00:04 +0800 Subject: [PATCH 71/71] code review Co-authored-by: Yuan Wang --- src/networking.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/networking.c b/src/networking.c index 148e9ec54b8..747d5479e74 100644 --- a/src/networking.c +++ b/src/networking.c @@ -3096,8 +3096,11 @@ int processInputBuffer(client *c) { /* Prefetch the command only when more commands have been parsed and we * are in the main thread. If running in an IO thread, prefetch will be - * deferred until the client is processed by the main thread. */ - if (parse_more && c->running_tid == IOTHREAD_MAIN_THREAD_ID) { + * deferred until the client is processed by the main thread. Skip prefetch + * if there are too few commands to avoid meaningless prefetching. */ + if (parse_more && c->running_tid == IOTHREAD_MAIN_THREAD_ID && + c->pending_cmds.ready_len > 1) + { /* Prefetch the commands. */ resetCommandsBatch(); addCommandToBatch(c); @@ -5065,11 +5068,9 @@ static void reclaimPendingCommand(client *c, pendingCommand *pcmd) { } } - if (c) { - serverAssert(c->all_argv_len_sum >= pcmd->argv_len_sum); /* assert this doesn't try to go negative */ - c->all_argv_len_sum -= pcmd->argv_len_sum; - pcmd->argv_len_sum = 0; - } + serverAssert(c->all_argv_len_sum >= pcmd->argv_len_sum); /* assert this doesn't try to go negative */ + c->all_argv_len_sum -= pcmd->argv_len_sum; + pcmd->argv_len_sum = 0; tryDeferFreeClientObject(c, DEFERRED_OBJECT_TYPE_PENDING_COMMAND, pcmd); return;