From 77ba706a7347b2b9d776bf7c52a34cfba7a123de Mon Sep 17 00:00:00 2001 From: ston3lu Date: Mon, 29 Dec 2025 03:20:04 +0000 Subject: [PATCH 1/4] Fix peer connection handling by checking for existing entries --- src/client/messenger.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/client/messenger.cpp b/src/client/messenger.cpp index 0325d297..61632f85 100644 --- a/src/client/messenger.cpp +++ b/src/client/messenger.cpp @@ -583,7 +583,13 @@ void osd_messenger_t::handle_peer_epoll(int peer_fd, int epoll_events) void osd_messenger_t::on_connect_peer(osd_num_t peer_osd, int peer_fd) { - auto & wp = wanted_peers.at(peer_osd); + auto wp_it = wanted_peers.find(peer_osd); + if (wp_it == wanted_peers.end()) + { + fprintf(stderr, "on_connect_peer: no wanted peer entry for OSD %ju\n", peer_osd); + return; + } + auto & wp = wp_it->second; wp.connecting = false; if (peer_fd < 0) { From 141fbe1054ea78b995869c1ca002de51c6a250f7 Mon Sep 17 00:00:00 2001 From: ston3lu Date: Mon, 29 Dec 2025 08:52:21 +0000 Subject: [PATCH 2/4] clean RDMA entry --- src/client/msgr_stop.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/client/msgr_stop.cpp b/src/client/msgr_stop.cpp index aa47d238..e6c7f39f 100644 --- a/src/client/msgr_stop.cpp +++ b/src/client/msgr_stop.cpp @@ -85,6 +85,16 @@ void osd_messenger_t::stop_client(int peer_fd, bool force, bool force_delete) osd_peer_fds.erase(osd_it); } } +#ifdef WITH_RDMA + if (cl->rdma_conn && cl->rdma_conn->cmid) + { + auto rdma_it = rdmacm_connections.find(cl->rdma_conn->cmid); + if (rdma_it != rdmacm_connections.end() && rdma_it->second == cl) + { + rdmacm_connections.erase(rdma_it); + } + } +#endif #ifndef __MOCK__ // Then remove FD from the eventloop so we don't accidentally read something tfd->set_fd_handler(peer_fd, false, NULL); From 477ae56287b12a4db5f57ca25ac8e0edbbc5aa5c Mon Sep 17 00:00:00 2001 From: ston3lu Date: Wed, 31 Dec 2025 07:31:42 +0000 Subject: [PATCH 3/4] Handle missing peer clients in secondary paths --- src/osd/osd.h | 4 ++-- src/osd/osd_secondary.cpp | 26 ++++++++++++++++++-------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/osd/osd.h b/src/osd/osd.h index 9d202803..3574b3e1 100644 --- a/src/osd/osd.h +++ b/src/osd/osd.h @@ -328,8 +328,8 @@ class osd_t void exec_show_config(osd_op_t *cur_op); void exec_secondary(osd_op_t *cur_op); void exec_secondary_real(osd_op_t *cur_op); - void exec_sec_read_bmp(osd_op_t *cur_op); - void exec_sec_lock(osd_op_t *cur_op); + void exec_sec_read_bmp(osd_op_t *cur_op, osd_client_t *cl); + void exec_sec_lock(osd_op_t *cur_op, osd_client_t *cl); void secondary_op_callback(osd_op_t *cur_op); // primary ops diff --git a/src/osd/osd_secondary.cpp b/src/osd/osd_secondary.cpp index ec5c28a6..7f802e2b 100644 --- a/src/osd/osd_secondary.cpp +++ b/src/osd/osd_secondary.cpp @@ -112,6 +112,13 @@ bool osd_t::sec_check_pg_lock(osd_num_t primary_osd, const object_id &oid, uint3 void osd_t::exec_secondary_real(osd_op_t *cur_op) { + auto cl_it = msgr.clients.find(cur_op->peer_fd); + if (cl_it == msgr.clients.end()) + { + finish_op(cur_op, -EPIPE); + return; + } + auto cl = cl_it->second; if (cur_op->req.hdr.opcode == OSD_OP_SEC_LIST && (cur_op->req.sec_list.flags & OSD_LIST_PRIMARY)) { @@ -120,15 +127,14 @@ void osd_t::exec_secondary_real(osd_op_t *cur_op) } if (cur_op->req.hdr.opcode == OSD_OP_SEC_READ_BMP) { - exec_sec_read_bmp(cur_op); + exec_sec_read_bmp(cur_op, cl); return; } else if (cur_op->req.hdr.opcode == OSD_OP_SEC_LOCK) { - exec_sec_lock(cur_op); + exec_sec_lock(cur_op, cl); return; } - auto cl = msgr.clients.at(cur_op->peer_fd); cur_op->bs_op = new blockstore_op_t(); cur_op->bs_op->callback = [this, cur_op](blockstore_op_t* bs_op) { secondary_op_callback(cur_op); }; cur_op->bs_op->opcode = (cur_op->req.hdr.opcode == OSD_OP_SEC_READ ? BS_OP_READ @@ -247,9 +253,8 @@ void osd_t::exec_secondary_real(osd_op_t *cur_op) #endif } -void osd_t::exec_sec_read_bmp(osd_op_t *cur_op) +void osd_t::exec_sec_read_bmp(osd_op_t *cur_op, osd_client_t *cl) { - auto cl = msgr.clients.at(cur_op->peer_fd); int n = cur_op->req.sec_read_bmp.len / sizeof(obj_ver_id); if (n > 0) { @@ -275,10 +280,9 @@ void osd_t::exec_sec_read_bmp(osd_op_t *cur_op) } // Lock/Unlock PG -void osd_t::exec_sec_lock(osd_op_t *cur_op) +void osd_t::exec_sec_lock(osd_op_t *cur_op, osd_client_t *cl) { cur_op->reply.sec_lock.cur_primary = 0; - auto cl = msgr.clients.at(cur_op->peer_fd); if (!cl->in_osd_num || cur_op->req.sec_lock.flags != OSD_SEC_LOCK_PG && cur_op->req.sec_lock.flags != OSD_SEC_UNLOCK_PG || @@ -340,7 +344,13 @@ void osd_t::exec_show_config(osd_op_t *cur_op) ? json11::Json::parse(std::string((char *)cur_op->buf), json_err) : json11::Json(); auto peer_osd_num = req_json["osd_num"].uint64_value(); - auto cl = msgr.clients.at(cur_op->peer_fd); + auto cl_it = msgr.clients.find(cur_op->peer_fd); + if (cl_it == msgr.clients.end()) + { + finish_op(cur_op, -EPIPE); + return; + } + auto cl = cl_it->second; cl->in_osd_num = peer_osd_num; if (req_json["features"]["check_sequencing"].bool_value()) { From 2ccda97d0b9b931ec4256490a4a64b7f1ac6f9fb Mon Sep 17 00:00:00 2001 From: ston3lu Date: Tue, 27 Jan 2026 06:30:50 +0000 Subject: [PATCH 4/4] Fix ref leak on read path when SQE allocation fails --- src/client/msgr_receive.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/client/msgr_receive.cpp b/src/client/msgr_receive.cpp index abc38e1b..19922779 100644 --- a/src/client/msgr_receive.cpp +++ b/src/client/msgr_receive.cpp @@ -42,6 +42,7 @@ void osd_messenger_t::read_requests() } if (!sqe) { + cl->refs--; cl->read_msg.msg_iovlen = 0; read_ready_clients.erase(read_ready_clients.begin(), read_ready_clients.begin() + i); return;