From 539ba7769624a4c1a7056c7f1aac1faeb310cfee Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Tue, 17 Sep 2024 17:58:06 +0300 Subject: [PATCH 01/26] dummy drop for cleanup handlers --- commons/zenoh-shm/src/cleanup.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/commons/zenoh-shm/src/cleanup.rs b/commons/zenoh-shm/src/cleanup.rs index ac29dfe08f..e77fe8782f 100644 --- a/commons/zenoh-shm/src/cleanup.rs +++ b/commons/zenoh-shm/src/cleanup.rs @@ -12,7 +12,7 @@ // ZettaScale Zenoh Team, // -use signal_hook::consts::signal::*; +use signal_hook::{consts::signal::*, SigId}; use static_init::dynamic; /// A global cleanup, that is guaranteed to be dropped at normal program exit and that will @@ -23,12 +23,14 @@ pub(crate) static mut CLEANUP: Cleanup = Cleanup::new(); /// An RAII object that calls all registered routines upon destruction pub(crate) struct Cleanup { cleanups: lockfree::queue::Queue>>, + handlers: Vec, } impl Cleanup { fn new() -> Self { // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signal handlers // that execute std::terminate instead of exit + let mut handlers: Vec = Default::default(); for signal in [ #[cfg(not(target_os = "windows"))] SIGHUP, @@ -37,15 +39,18 @@ impl Cleanup { #[cfg(not(target_os = "windows"))] SIGQUIT, ] { - unsafe { - let _ = signal_hook::low_level::register(signal, || { + if let Ok(sigid) = unsafe { + signal_hook::low_level::register(signal, || { std::process::exit(0); - }); + }) + } { + handlers.push(sigid); } } Self { cleanups: Default::default(), + handlers, } } @@ -64,6 +69,9 @@ impl Cleanup { impl Drop for Cleanup { fn drop(&mut self) { + for handler in &self.handlers { + signal_hook::low_level::unregister(*handler); + } self.cleanup(); } } From ff534884064bed3f4f2b6f1985d7c58c1313ab77 Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Wed, 18 Sep 2024 13:50:25 +0300 Subject: [PATCH 02/26] migrate to ctrlc crate --- Cargo.lock | 76 +++++++++++++++++++++----------- Cargo.toml | 2 +- commons/zenoh-shm/Cargo.toml | 2 +- commons/zenoh-shm/src/cleanup.rs | 40 +++++++---------- 4 files changed, 69 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3ebe1688e2..2bd1f064d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1078,6 +1078,16 @@ dependencies = [ "cipher 0.2.5", ] +[[package]] +name = "ctrlc" +version = "3.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90eeab0aa92f3f9b4e87f258c72b139c207d251f9cbc1080a0086b86a8870dd3" +dependencies = [ + "nix 0.29.0", + "windows-sys 0.59.0", +] + [[package]] name = "data-encoding" version = "2.4.0" @@ -5125,7 +5135,16 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.0", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", ] [[package]] @@ -5145,17 +5164,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.0", - "windows_aarch64_msvc 0.52.0", - "windows_i686_gnu 0.52.0", - "windows_i686_msvc 0.52.0", - "windows_x86_64_gnu 0.52.0", - "windows_x86_64_gnullvm 0.52.0", - "windows_x86_64_msvc 0.52.0", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] [[package]] @@ -5166,9 +5186,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" @@ -5184,9 +5204,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" @@ -5202,9 +5222,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.0" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" @@ -5220,9 +5246,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" @@ -5238,9 +5264,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" @@ -5250,9 +5276,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" @@ -5268,9 +5294,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.0" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" @@ -5852,13 +5878,13 @@ version = "1.0.0-dev" dependencies = [ "async-trait", "crc", + "ctrlc", "libc", "lockfree", "num-traits", "num_cpus", "rand 0.8.5", "shared_memory", - "signal-hook", "stabby", "static_init", "thread-priority", diff --git a/Cargo.toml b/Cargo.toml index 54a25e4c81..34848bb788 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -162,7 +162,7 @@ stabby = "36.1.1" sha3 = "0.10.8" shared_memory = "0.12.4" shellexpand = "3.1.0" -signal-hook = { version = "0.3.17", default-features = false } +ctrlc = { version = "3.4.5" } socket2 = { version = "0.5.7", features = ["all"] } stop-token = "0.7.0" syn = "2.0" diff --git a/commons/zenoh-shm/Cargo.toml b/commons/zenoh-shm/Cargo.toml index c5b2a0a628..679202f500 100644 --- a/commons/zenoh-shm/Cargo.toml +++ b/commons/zenoh-shm/Cargo.toml @@ -48,7 +48,7 @@ num_cpus = { workspace = true, optional = true } thread-priority = { workspace = true } lockfree = { workspace = true } stabby = { workspace = true } -signal-hook = { workspace = true } +ctrlc = { workspace = true, features = ["termination"] } [dev-dependencies] libc = { workspace = true } diff --git a/commons/zenoh-shm/src/cleanup.rs b/commons/zenoh-shm/src/cleanup.rs index e77fe8782f..304f92e3da 100644 --- a/commons/zenoh-shm/src/cleanup.rs +++ b/commons/zenoh-shm/src/cleanup.rs @@ -12,8 +12,8 @@ // ZettaScale Zenoh Team, // -use signal_hook::{consts::signal::*, SigId}; use static_init::dynamic; +use zenoh_core::zerror; /// A global cleanup, that is guaranteed to be dropped at normal program exit and that will /// execute all registered cleanup routines at this moment @@ -23,34 +23,29 @@ pub(crate) static mut CLEANUP: Cleanup = Cleanup::new(); /// An RAII object that calls all registered routines upon destruction pub(crate) struct Cleanup { cleanups: lockfree::queue::Queue>>, - handlers: Vec, } impl Cleanup { fn new() -> Self { - // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signal handlers - // that execute std::terminate instead of exit - let mut handlers: Vec = Default::default(); - for signal in [ - #[cfg(not(target_os = "windows"))] - SIGHUP, - SIGTERM, - SIGINT, - #[cfg(not(target_os = "windows"))] - SIGQUIT, - ] { - if let Ok(sigid) = unsafe { - signal_hook::low_level::register(signal, || { - std::process::exit(0); - }) - } { - handlers.push(sigid); + // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signals + if let Err(e) = ctrlc::set_handler(|| std::process::exit(0)) { + match e { + ctrlc::Error::NoSuchSignal(signal_type) => { + zerror!( + "Error registering cleanup handler for signal {:?}: no such signal!", + signal_type + ); + } + ctrlc::Error::MultipleHandlers => { + zerror!("Error registering cleanup handler: already registered!"); + } + ctrlc::Error::System(error) => { + zerror!("Error registering cleanup handler: system error: {error}"); + } } } - Self { cleanups: Default::default(), - handlers, } } @@ -69,9 +64,6 @@ impl Cleanup { impl Drop for Cleanup { fn drop(&mut self) { - for handler in &self.handlers { - signal_hook::low_level::unregister(*handler); - } self.cleanup(); } } From 50b6e6cb072b25119ab63f189a86c5656e91ec92 Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Wed, 18 Sep 2024 14:32:52 +0300 Subject: [PATCH 03/26] add force_cleanup_before_exit routine --- commons/zenoh-shm/src/api/cleanup.rs | 35 ++++++++++++++++++++++++++++ commons/zenoh-shm/src/api/mod.rs | 1 + commons/zenoh-shm/src/cleanup.rs | 10 +++++--- zenoh/src/lib.rs | 1 + 4 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 commons/zenoh-shm/src/api/cleanup.rs diff --git a/commons/zenoh-shm/src/api/cleanup.rs b/commons/zenoh-shm/src/api/cleanup.rs new file mode 100644 index 0000000000..dda3637a07 --- /dev/null +++ b/commons/zenoh-shm/src/api/cleanup.rs @@ -0,0 +1,35 @@ +// +// Copyright (c) 2024 ZettaScale Technology +// +// This program and the accompanying materials are made available under the +// terms of the Eclipse Public License 2.0 which is available at +// http://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0 +// which is available at https://www.apache.org/licenses/LICENSE-2.0. +// +// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 +// +// Contributors: +// ZettaScale Zenoh Team, +// + +use crate::cleanup::CLEANUP; + +/// Make forced cleanup +/// NOTE: this is a part of a temporary on-exit-cleanup workaround and it will be very likely removed in the future. +/// WARN: The improper usage can break the application logic, impacting SHM-utilizing Sessions in other processes. +/// Cleanup unlinks SHM segments _created_ by current process from filesystem with the following consequences: +/// - Sessions that are not linked to this segment will fail to link it if they try. Such kind of errors are properly handled. +/// - Already linked processes will still have this shared memory mapped and safely accessible +/// - The actual memory will be reclaimed by the OS only after last process using it will close it or exit +/// +/// In order to properly cleanup some SHM internals upon process exit, Zenoh installs exit handlers (see atexit() API). +/// The atexit handler is executed only on process exit(), the inconvenience is that terminating signal handlers +/// (like SIGINT) bypass it and terminate the process without cleanup. To eliminate this effect, Zenoh overrides +/// SIGHUP, SIGTERM, SIGINT and SIGQUIT handlers and calls exit() inside to make graceful shutdown. If user is going to +/// override these Zenoh's handlers, the workaround will break, and there are two ways to keep this workaround working: +/// - execute overridden Zenoh handlers in overriding handler code +/// - call force_cleanup_before_exit() anywhere at any time before terminating the process +#[zenoh_macros::unstable_doc] +pub fn force_cleanup_before_exit() { + CLEANUP.read().cleanup(); +} diff --git a/commons/zenoh-shm/src/api/mod.rs b/commons/zenoh-shm/src/api/mod.rs index a87188da29..8802b1eb58 100644 --- a/commons/zenoh-shm/src/api/mod.rs +++ b/commons/zenoh-shm/src/api/mod.rs @@ -13,6 +13,7 @@ // pub mod buffer; +pub mod cleanup; pub mod client; pub mod client_storage; pub mod common; diff --git a/commons/zenoh-shm/src/cleanup.rs b/commons/zenoh-shm/src/cleanup.rs index b0097db520..d87bbf534f 100644 --- a/commons/zenoh-shm/src/cleanup.rs +++ b/commons/zenoh-shm/src/cleanup.rs @@ -52,10 +52,8 @@ impl Cleanup { pub(crate) fn register_cleanup(&self, cleanup_fn: Box) { self.cleanups.push(Some(cleanup_fn)); } -} -impl Drop for Cleanup { - fn drop(&mut self) { + pub(crate) fn cleanup(&self) { while let Some(cleanup) = self.cleanups.pop() { if let Some(f) = cleanup { f(); @@ -63,3 +61,9 @@ impl Drop for Cleanup { } } } + +impl Drop for Cleanup { + fn drop(&mut self) { + self.cleanup(); + } +} diff --git a/zenoh/src/lib.rs b/zenoh/src/lib.rs index 0275cbe8be..4b53ceabd1 100644 --- a/zenoh/src/lib.rs +++ b/zenoh/src/lib.rs @@ -427,6 +427,7 @@ pub mod shm { zshm::{zshm, ZShm}, zshmmut::{zshmmut, ZShmMut}, }, + cleanup::force_cleanup_before_exit, client::{shm_client::ShmClient, shm_segment::ShmSegment}, client_storage::{ShmClientStorage, GLOBAL_CLIENT_STORAGE}, common::types::{ChunkID, ProtocolID, SegmentID}, From a717dd8e9e51fab67307b40993e442acbb52f82c Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Wed, 18 Sep 2024 19:41:41 +0300 Subject: [PATCH 04/26] use ctrlc2 with other Cleanup finalization seq --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- commons/zenoh-shm/Cargo.toml | 2 +- commons/zenoh-shm/src/cleanup.rs | 12 ++++++++---- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fa18c47cbc..263f129720 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1071,10 +1071,10 @@ dependencies = [ ] [[package]] -name = "ctrlc" -version = "3.4.5" +name = "ctrlc2" +version = "3.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90eeab0aa92f3f9b4e87f258c72b139c207d251f9cbc1080a0086b86a8870dd3" +checksum = "383ab70e5dc6ffdfe5545b6e6dea714938594be13ecaec73ebacc372ed4e292d" dependencies = [ "nix 0.29.0", "windows-sys 0.59.0", @@ -5932,7 +5932,7 @@ version = "1.0.0-dev" dependencies = [ "async-trait", "crc", - "ctrlc", + "ctrlc2", "libc", "lockfree", "num-traits", diff --git a/Cargo.toml b/Cargo.toml index 0f1b72c559..e996c0a343 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -163,7 +163,7 @@ stabby = "36.1.1" sha3 = "0.10.8" shared_memory = "0.12.4" shellexpand = "3.1.0" -ctrlc = { version = "3.4.5" } +ctrlc2 = { version = "3.5.8" } socket2 = { version = "0.5.7", features = ["all"] } stop-token = "0.7.0" syn = "2.0" diff --git a/commons/zenoh-shm/Cargo.toml b/commons/zenoh-shm/Cargo.toml index 679202f500..683a4d42c0 100644 --- a/commons/zenoh-shm/Cargo.toml +++ b/commons/zenoh-shm/Cargo.toml @@ -48,7 +48,7 @@ num_cpus = { workspace = true, optional = true } thread-priority = { workspace = true } lockfree = { workspace = true } stabby = { workspace = true } -ctrlc = { workspace = true, features = ["termination"] } +ctrlc2 = { workspace = true, features = ["termination"] } [dev-dependencies] libc = { workspace = true } diff --git a/commons/zenoh-shm/src/cleanup.rs b/commons/zenoh-shm/src/cleanup.rs index d87bbf534f..af1b4c2b8d 100644 --- a/commons/zenoh-shm/src/cleanup.rs +++ b/commons/zenoh-shm/src/cleanup.rs @@ -28,22 +28,26 @@ pub(crate) struct Cleanup { impl Cleanup { fn new() -> Self { // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signals - if let Err(e) = ctrlc::set_handler(|| std::process::exit(0)) { + if let Err(e) = ctrlc2::set_handler(|| { + tokio::task::spawn_blocking(|| std::process::exit(0)); + false + }) { match e { - ctrlc::Error::NoSuchSignal(signal_type) => { + ctrlc2::Error::NoSuchSignal(signal_type) => { zerror!( "Error registering cleanup handler for signal {:?}: no such signal!", signal_type ); } - ctrlc::Error::MultipleHandlers => { + ctrlc2::Error::MultipleHandlers => { zerror!("Error registering cleanup handler: already registered!"); } - ctrlc::Error::System(error) => { + ctrlc2::Error::System(error) => { zerror!("Error registering cleanup handler: system error: {error}"); } } } + Self { cleanups: Default::default(), } From 9b4877671cac0b20c98c2d5d1bb9813bb3fde5c2 Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Wed, 18 Sep 2024 21:25:57 +0300 Subject: [PATCH 05/26] detach cleanup thread before joining --- commons/zenoh-shm/src/cleanup.rs | 39 ++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/commons/zenoh-shm/src/cleanup.rs b/commons/zenoh-shm/src/cleanup.rs index af1b4c2b8d..8cf1da3b98 100644 --- a/commons/zenoh-shm/src/cleanup.rs +++ b/commons/zenoh-shm/src/cleanup.rs @@ -12,6 +12,8 @@ // ZettaScale Zenoh Team, // +use std::thread::JoinHandle; + use static_init::dynamic; use zenoh_core::zerror; @@ -23,33 +25,39 @@ pub(crate) static mut CLEANUP: Cleanup = Cleanup::new(); /// An RAII object that calls all registered routines upon destruction pub(crate) struct Cleanup { cleanups: lockfree::queue::Queue>>, + handle: Option>, } impl Cleanup { fn new() -> Self { // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signals - if let Err(e) = ctrlc2::set_handler(|| { + let handle = match ctrlc2::set_handler(|| { tokio::task::spawn_blocking(|| std::process::exit(0)); - false + true }) { - match e { - ctrlc2::Error::NoSuchSignal(signal_type) => { - zerror!( - "Error registering cleanup handler for signal {:?}: no such signal!", - signal_type - ); - } - ctrlc2::Error::MultipleHandlers => { - zerror!("Error registering cleanup handler: already registered!"); - } - ctrlc2::Error::System(error) => { - zerror!("Error registering cleanup handler: system error: {error}"); + Ok(h) => Some(h), + Err(e) => { + match e { + ctrlc2::Error::NoSuchSignal(signal_type) => { + zerror!( + "Error registering cleanup handler for signal {:?}: no such signal!", + signal_type + ); + } + ctrlc2::Error::MultipleHandlers => { + zerror!("Error registering cleanup handler: already registered!"); + } + ctrlc2::Error::System(error) => { + zerror!("Error registering cleanup handler: system error: {error}"); + } } + None } - } + }; Self { cleanups: Default::default(), + handle, } } @@ -68,6 +76,7 @@ impl Cleanup { impl Drop for Cleanup { fn drop(&mut self) { + self.handle.take(); self.cleanup(); } } From d3872cb54977f0d801b6e2fd05469bb4c9dd870a Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Wed, 18 Sep 2024 22:53:24 +0300 Subject: [PATCH 06/26] change cleanup task handle lifetime --- commons/zenoh-shm/src/cleanup.rs | 65 +++++++++++++++++++------------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/commons/zenoh-shm/src/cleanup.rs b/commons/zenoh-shm/src/cleanup.rs index 8cf1da3b98..9c7cb17ae1 100644 --- a/commons/zenoh-shm/src/cleanup.rs +++ b/commons/zenoh-shm/src/cleanup.rs @@ -25,39 +25,15 @@ pub(crate) static mut CLEANUP: Cleanup = Cleanup::new(); /// An RAII object that calls all registered routines upon destruction pub(crate) struct Cleanup { cleanups: lockfree::queue::Queue>>, - handle: Option>, } impl Cleanup { fn new() -> Self { // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signals - let handle = match ctrlc2::set_handler(|| { - tokio::task::spawn_blocking(|| std::process::exit(0)); - true - }) { - Ok(h) => Some(h), - Err(e) => { - match e { - ctrlc2::Error::NoSuchSignal(signal_type) => { - zerror!( - "Error registering cleanup handler for signal {:?}: no such signal!", - signal_type - ); - } - ctrlc2::Error::MultipleHandlers => { - zerror!("Error registering cleanup handler: already registered!"); - } - ctrlc2::Error::System(error) => { - zerror!("Error registering cleanup handler: system error: {error}"); - } - } - None - } - }; + let _ = SIGNAL_CLEANUP.read(); Self { cleanups: Default::default(), - handle, } } @@ -76,7 +52,44 @@ impl Cleanup { impl Drop for Cleanup { fn drop(&mut self) { - self.handle.take(); self.cleanup(); } } + +#[dynamic(lazy)] +static mut SIGNAL_CLEANUP: SignalCleanup = SignalCleanup::new(); + +struct SignalCleanup { + _handle: Option>, +} + +impl SignalCleanup { + fn new() -> Self { + // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signals + let _handle = match ctrlc2::set_handler(|| { + tokio::task::spawn_blocking(|| std::process::exit(0)); + false + }) { + Ok(h) => Some(h), + Err(e) => { + match e { + ctrlc2::Error::NoSuchSignal(signal_type) => { + zerror!( + "Error registering cleanup handler for signal {:?}: no such signal!", + signal_type + ); + } + ctrlc2::Error::MultipleHandlers => { + zerror!("Error registering cleanup handler: already registered!"); + } + ctrlc2::Error::System(error) => { + zerror!("Error registering cleanup handler: system error: {error}"); + } + } + None + } + }; + + Self { _handle } + } +} From 0246d62c4592d39f934cfd312763a00596748454 Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Fri, 20 Sep 2024 18:08:54 +0300 Subject: [PATCH 07/26] - completely different SHM clenup mechanism --- Cargo.lock | 11 -- Cargo.toml | 1 - commons/zenoh-shm/Cargo.toml | 1 - commons/zenoh-shm/src/api/cleanup.rs | 30 ++--- commons/zenoh-shm/src/cleanup.rs | 52 ++------ commons/zenoh-shm/src/posix_shm/cleanup.rs | 134 +++++++++++++++++++++ commons/zenoh-shm/src/posix_shm/mod.rs | 1 + commons/zenoh-shm/src/posix_shm/segment.rs | 2 +- zenoh/src/lib.rs | 2 +- 9 files changed, 157 insertions(+), 77 deletions(-) create mode 100644 commons/zenoh-shm/src/posix_shm/cleanup.rs diff --git a/Cargo.lock b/Cargo.lock index 6f8d208cb4..33b5367eb7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1070,16 +1070,6 @@ dependencies = [ "cipher 0.2.5", ] -[[package]] -name = "ctrlc2" -version = "3.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "383ab70e5dc6ffdfe5545b6e6dea714938594be13ecaec73ebacc372ed4e292d" -dependencies = [ - "nix 0.29.0", - "windows-sys 0.59.0", -] - [[package]] name = "data-encoding" version = "2.6.0" @@ -5919,7 +5909,6 @@ version = "1.0.0-dev" dependencies = [ "async-trait", "crc", - "ctrlc2", "libc", "lockfree", "num-traits", diff --git a/Cargo.toml b/Cargo.toml index 760ebc033c..2fcaaa9cd5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -163,7 +163,6 @@ stabby = "36.1.1" sha3 = "0.10.8" shared_memory = "0.12.4" shellexpand = "3.1.0" -ctrlc2 = { version = "3.5.8" } socket2 = { version = "0.5.7", features = ["all"] } stop-token = "0.7.0" syn = "2.0" diff --git a/commons/zenoh-shm/Cargo.toml b/commons/zenoh-shm/Cargo.toml index 683a4d42c0..0b79fd2549 100644 --- a/commons/zenoh-shm/Cargo.toml +++ b/commons/zenoh-shm/Cargo.toml @@ -48,7 +48,6 @@ num_cpus = { workspace = true, optional = true } thread-priority = { workspace = true } lockfree = { workspace = true } stabby = { workspace = true } -ctrlc2 = { workspace = true, features = ["termination"] } [dev-dependencies] libc = { workspace = true } diff --git a/commons/zenoh-shm/src/api/cleanup.rs b/commons/zenoh-shm/src/api/cleanup.rs index dda3637a07..45d78db9ef 100644 --- a/commons/zenoh-shm/src/api/cleanup.rs +++ b/commons/zenoh-shm/src/api/cleanup.rs @@ -12,24 +12,20 @@ // ZettaScale Zenoh Team, // -use crate::cleanup::CLEANUP; +use crate::posix_shm::cleanup::cleanup_orphaned_segments; -/// Make forced cleanup -/// NOTE: this is a part of a temporary on-exit-cleanup workaround and it will be very likely removed in the future. -/// WARN: The improper usage can break the application logic, impacting SHM-utilizing Sessions in other processes. -/// Cleanup unlinks SHM segments _created_ by current process from filesystem with the following consequences: -/// - Sessions that are not linked to this segment will fail to link it if they try. Such kind of errors are properly handled. -/// - Already linked processes will still have this shared memory mapped and safely accessible -/// - The actual memory will be reclaimed by the OS only after last process using it will close it or exit +/// UNIX: Trigger cleanup for orphaned SHM segments +/// If process that created named SHM segment crashes or exits by a signal, the segment persists in the system +/// disregarding if it is used by other Zenoh processes or not. This is the detail of POSIX specification for +/// shared memory that is hard to bypass. To deal with this we developed a cleanup routine that enumerates all +/// segments and tries to find processes that are using it. If no such process found, segment will be removed. +/// There is no ideal signal to trigger this cleanup, so by default, zenoh triggers it in the following moments: +/// - first POSIX SHM segment creation +/// - process exit via exit() call or return from maint function +/// It is OK to additionally trigger this function at any time, but be aware that this can be costly. /// -/// In order to properly cleanup some SHM internals upon process exit, Zenoh installs exit handlers (see atexit() API). -/// The atexit handler is executed only on process exit(), the inconvenience is that terminating signal handlers -/// (like SIGINT) bypass it and terminate the process without cleanup. To eliminate this effect, Zenoh overrides -/// SIGHUP, SIGTERM, SIGINT and SIGQUIT handlers and calls exit() inside to make graceful shutdown. If user is going to -/// override these Zenoh's handlers, the workaround will break, and there are two ways to keep this workaround working: -/// - execute overridden Zenoh handlers in overriding handler code -/// - call force_cleanup_before_exit() anywhere at any time before terminating the process +/// For non-unix platforms this function currently does nothing #[zenoh_macros::unstable_doc] -pub fn force_cleanup_before_exit() { - CLEANUP.read().cleanup(); +pub fn cleanup_orphaned_shm_segments() { + cleanup_orphaned_segments(); } diff --git a/commons/zenoh-shm/src/cleanup.rs b/commons/zenoh-shm/src/cleanup.rs index 9c7cb17ae1..a5c3aacc4f 100644 --- a/commons/zenoh-shm/src/cleanup.rs +++ b/commons/zenoh-shm/src/cleanup.rs @@ -12,10 +12,9 @@ // ZettaScale Zenoh Team, // -use std::thread::JoinHandle; - use static_init::dynamic; -use zenoh_core::zerror; + +use crate::posix_shm::cleanup::cleanup_orphaned_segments; /// A global cleanup, that is guaranteed to be dropped at normal program exit and that will /// execute all registered cleanup routines at this moment @@ -29,9 +28,8 @@ pub(crate) struct Cleanup { impl Cleanup { fn new() -> Self { - // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signals - let _ = SIGNAL_CLEANUP.read(); - + // on first cleanup subsystem touch we perform zenoh segment cleanup + cleanup_orphaned_segments(); Self { cleanups: Default::default(), } @@ -41,7 +39,7 @@ impl Cleanup { self.cleanups.push(Some(cleanup_fn)); } - pub(crate) fn cleanup(&self) { + fn cleanup(&self) { while let Some(cleanup) = self.cleanups.pop() { if let Some(f) = cleanup { f(); @@ -52,44 +50,8 @@ impl Cleanup { impl Drop for Cleanup { fn drop(&mut self) { + // on finalization stage we perform zenoh segment cleanup + cleanup_orphaned_segments(); self.cleanup(); } } - -#[dynamic(lazy)] -static mut SIGNAL_CLEANUP: SignalCleanup = SignalCleanup::new(); - -struct SignalCleanup { - _handle: Option>, -} - -impl SignalCleanup { - fn new() -> Self { - // todo: this is a workaround to make sure Cleanup will be executed even if process terminates via signals - let _handle = match ctrlc2::set_handler(|| { - tokio::task::spawn_blocking(|| std::process::exit(0)); - false - }) { - Ok(h) => Some(h), - Err(e) => { - match e { - ctrlc2::Error::NoSuchSignal(signal_type) => { - zerror!( - "Error registering cleanup handler for signal {:?}: no such signal!", - signal_type - ); - } - ctrlc2::Error::MultipleHandlers => { - zerror!("Error registering cleanup handler: already registered!"); - } - ctrlc2::Error::System(error) => { - zerror!("Error registering cleanup handler: system error: {error}"); - } - } - None - } - }; - - Self { _handle } - } -} diff --git a/commons/zenoh-shm/src/posix_shm/cleanup.rs b/commons/zenoh-shm/src/posix_shm/cleanup.rs new file mode 100644 index 0000000000..821fb790f2 --- /dev/null +++ b/commons/zenoh-shm/src/posix_shm/cleanup.rs @@ -0,0 +1,134 @@ +// +// Copyright (c) 2023 ZettaScale Technology +// +// This program and the accompanying materials are made available under the +// terms of the Eclipse Public License 2.0 which is available at +// http://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0 +// which is available at https://www.apache.org/licenses/LICENSE-2.0. +// +// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 +// +// Contributors: +// ZettaScale Zenoh Team, +// + +pub(crate) use platform::cleanup_orphaned_segments; + +#[cfg(not(unix))] +mod platform { + pub(crate) fn cleanup_orphaned_segments() {} +} + +#[cfg(unix)] +mod platform { + use std::{borrow::Borrow, collections::HashSet, fs, path::PathBuf}; + + use zenoh_result::ZResult; + + #[derive(PartialEq, Eq, Hash)] + struct ProcFdDir(PathBuf); + + impl ProcFdDir { + fn enumerate_fds(&self) -> ZResult> { + let fds = self.0.read_dir()?; + let fd_map: HashSet = fds + .filter_map(Result::ok) + .map(|f| std::convert::Into::::into(f.path())) + .collect(); + Ok(fd_map) + } + } + + impl From for ProcFdDir { + fn from(value: PathBuf) -> Self { + Self(value) + } + } + + #[derive(PartialEq, Eq, Hash)] + struct FdFile(PathBuf); + + impl From for FdFile { + fn from(value: PathBuf) -> Self { + Self(value) + } + } + + #[derive(PartialEq, Eq, Hash)] + struct ShmFile(PathBuf); + + impl ShmFile { + fn cleanup_file(self) { + let _ = std::fs::remove_file(self.0); + } + } + + impl Borrow for ShmFile { + fn borrow(&self) -> &PathBuf { + &self.0 + } + } + + impl From for ShmFile { + fn from(value: PathBuf) -> Self { + Self(value) + } + } + + pub(crate) fn cleanup_orphaned_segments() { + if let Err(e) = cleanup_orphaned_segments_inner() { + tracing::error!("Error performing orphaned SHM segments cleanup: {e}") + } + } + + fn enumerate_shm_files() -> ZResult> { + let shm_files = fs::read_dir("/dev/shm")?; + Ok(shm_files + .filter_map(Result::ok) + .filter_map(|f| { + if let Some(ext) = f.path().extension() { + if ext == "zenoh" { + return Some(std::convert::Into::::into(f.path())); + } + } + None + }) + .collect()) + } + + fn enumerate_proc_dirs() -> ZResult> { + let proc_dirs = fs::read_dir("/proc")?; + Ok(proc_dirs + .filter_map(Result::ok) + .map(|f| std::convert::Into::::into(f.path().join("fd"))) + .collect()) + } + + fn enumerate_proc_fds() -> ZResult> { + let mut fds = HashSet::default(); + let dirs = enumerate_proc_dirs()?; + for dir in dirs { + if let Ok(dir_fds) = dir.enumerate_fds() { + fds.extend(dir_fds); + } + } + Ok(fds) + } + + fn cleanup_orphaned_segments_inner() -> ZResult<()> { + let fd_map = enumerate_proc_fds()?; + let mut shm_map = enumerate_shm_files()?; + + for fd_file in fd_map { + if let Ok(resolved_link) = fd_file.0.read_link() { + shm_map.remove(&resolved_link); + } + } + + for shm_file_to_cleanup in shm_map { + shm_file_to_cleanup.cleanup_file(); + } + + Ok(()) + } +} diff --git a/commons/zenoh-shm/src/posix_shm/mod.rs b/commons/zenoh-shm/src/posix_shm/mod.rs index a63b1c9e6d..495077f9f0 100644 --- a/commons/zenoh-shm/src/posix_shm/mod.rs +++ b/commons/zenoh-shm/src/posix_shm/mod.rs @@ -14,3 +14,4 @@ pub mod array; tested_crate_module!(segment); +pub(crate) mod cleanup; diff --git a/commons/zenoh-shm/src/posix_shm/segment.rs b/commons/zenoh-shm/src/posix_shm/segment.rs index 6a34506029..0f82be5beb 100644 --- a/commons/zenoh-shm/src/posix_shm/segment.rs +++ b/commons/zenoh-shm/src/posix_shm/segment.rs @@ -106,7 +106,7 @@ where fn os_id(id: ID, id_prefix: &str) -> String { let os_id_str = format!("{id_prefix}_{id}"); let crc_os_id_str = ECMA.checksum(os_id_str.as_bytes()); - format!("{:x}", crc_os_id_str) + format!("{:x}.zenoh", crc_os_id_str) } pub fn as_ptr(&self) -> *mut u8 { diff --git a/zenoh/src/lib.rs b/zenoh/src/lib.rs index 092607e484..2eed181543 100644 --- a/zenoh/src/lib.rs +++ b/zenoh/src/lib.rs @@ -427,7 +427,7 @@ pub mod shm { zshm::{zshm, ZShm}, zshmmut::{zshmmut, ZShmMut}, }, - cleanup::force_cleanup_before_exit, + cleanup::cleanup_orphaned_shm_segments, client::{shm_client::ShmClient, shm_segment::ShmSegment}, client_storage::{ShmClientStorage, GLOBAL_CLIENT_STORAGE}, common::types::{ChunkID, ProtocolID, SegmentID}, From a0c8b31db61dd62718c97d722ee4093bdd111d5f Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Thu, 26 Sep 2024 14:18:35 +0300 Subject: [PATCH 08/26] Support Linux only for a while --- commons/zenoh-shm/src/api/cleanup.rs | 4 ++-- commons/zenoh-shm/src/posix_shm/cleanup.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/commons/zenoh-shm/src/api/cleanup.rs b/commons/zenoh-shm/src/api/cleanup.rs index 45d78db9ef..7178af29c8 100644 --- a/commons/zenoh-shm/src/api/cleanup.rs +++ b/commons/zenoh-shm/src/api/cleanup.rs @@ -14,7 +14,7 @@ use crate::posix_shm::cleanup::cleanup_orphaned_segments; -/// UNIX: Trigger cleanup for orphaned SHM segments +/// Linux: Trigger cleanup for orphaned SHM segments /// If process that created named SHM segment crashes or exits by a signal, the segment persists in the system /// disregarding if it is used by other Zenoh processes or not. This is the detail of POSIX specification for /// shared memory that is hard to bypass. To deal with this we developed a cleanup routine that enumerates all @@ -24,7 +24,7 @@ use crate::posix_shm::cleanup::cleanup_orphaned_segments; /// - process exit via exit() call or return from maint function /// It is OK to additionally trigger this function at any time, but be aware that this can be costly. /// -/// For non-unix platforms this function currently does nothing +/// For non-linux platforms this function currently does nothing #[zenoh_macros::unstable_doc] pub fn cleanup_orphaned_shm_segments() { cleanup_orphaned_segments(); diff --git a/commons/zenoh-shm/src/posix_shm/cleanup.rs b/commons/zenoh-shm/src/posix_shm/cleanup.rs index 821fb790f2..26efed6686 100644 --- a/commons/zenoh-shm/src/posix_shm/cleanup.rs +++ b/commons/zenoh-shm/src/posix_shm/cleanup.rs @@ -14,12 +14,12 @@ pub(crate) use platform::cleanup_orphaned_segments; -#[cfg(not(unix))] +#[cfg(not(linux))] mod platform { pub(crate) fn cleanup_orphaned_segments() {} } -#[cfg(unix)] +#[cfg(linux)] mod platform { use std::{borrow::Borrow, collections::HashSet, fs, path::PathBuf}; From 99fac5fe54ec2c62100d17ea78f92596d2dbc375 Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Thu, 26 Sep 2024 14:47:53 +0300 Subject: [PATCH 09/26] unix and not macos --- commons/zenoh-shm/src/posix_shm/cleanup.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commons/zenoh-shm/src/posix_shm/cleanup.rs b/commons/zenoh-shm/src/posix_shm/cleanup.rs index 26efed6686..9b74f24a22 100644 --- a/commons/zenoh-shm/src/posix_shm/cleanup.rs +++ b/commons/zenoh-shm/src/posix_shm/cleanup.rs @@ -14,12 +14,12 @@ pub(crate) use platform::cleanup_orphaned_segments; -#[cfg(not(linux))] +#[cfg(not(all(unix, not(target_os = "macos"))))] mod platform { pub(crate) fn cleanup_orphaned_segments() {} } -#[cfg(linux)] +#[cfg(all(unix, not(target_os = "macos")))] mod platform { use std::{borrow::Borrow, collections::HashSet, fs, path::PathBuf}; From 59621387db502ef412645892d35b288ce14543f7 Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Thu, 26 Sep 2024 15:09:05 +0300 Subject: [PATCH 10/26] do not use Linux Ephemeral ports in tests --- zenoh/tests/interceptors.rs | 4 ++-- zenoh/tests/unicity.rs | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/zenoh/tests/interceptors.rs b/zenoh/tests/interceptors.rs index 76c0ccff41..25dac7ddb5 100644 --- a/zenoh/tests/interceptors.rs +++ b/zenoh/tests/interceptors.rs @@ -145,7 +145,7 @@ fn downsampling_test( fn downsampling_by_keyexpr_impl(flow: InterceptorFlow) { let ke_prefix = "test/downsamples_by_keyexp"; - let locator = "tcp/127.0.0.1:38446"; + let locator = "tcp/127.0.0.1:31446"; let ke_10hz: KeyExpr = format!("{ke_prefix}/10hz").try_into().unwrap(); let ke_20hz: KeyExpr = format!("{ke_prefix}/20hz").try_into().unwrap(); @@ -198,7 +198,7 @@ fn downsampling_by_keyexpr() { #[cfg(unix)] fn downsampling_by_interface_impl(flow: InterceptorFlow) { let ke_prefix = "test/downsamples_by_interface"; - let locator = "tcp/127.0.0.1:38447"; + let locator = "tcp/127.0.0.1:31447"; let ke_10hz: KeyExpr = format!("{ke_prefix}/10hz").try_into().unwrap(); let ke_no_effect: KeyExpr = format!("{ke_prefix}/no_effect").try_into().unwrap(); diff --git a/zenoh/tests/unicity.rs b/zenoh/tests/unicity.rs index d6a8c8f621..eefead014e 100644 --- a/zenoh/tests/unicity.rs +++ b/zenoh/tests/unicity.rs @@ -82,7 +82,7 @@ async fn open_router_session() -> Session { config .listen .endpoints - .set(vec!["tcp/127.0.0.1:37447".parse().unwrap()]) + .set(vec!["tcp/127.0.0.1:30447".parse().unwrap()]) .unwrap(); config.scouting.multicast.set_enabled(Some(false)).unwrap(); println!("[ ][00a] Opening router session"); @@ -100,7 +100,7 @@ async fn open_client_sessions() -> (Session, Session, Session) { config.set_mode(Some(WhatAmI::Client)).unwrap(); config .connect - .set_endpoints(ModeDependentValue::Unique(vec!["tcp/127.0.0.1:37447" + .set_endpoints(ModeDependentValue::Unique(vec!["tcp/127.0.0.1:30447" .parse::() .unwrap()])) .unwrap(); @@ -111,7 +111,7 @@ async fn open_client_sessions() -> (Session, Session, Session) { config.set_mode(Some(WhatAmI::Client)).unwrap(); config .connect - .set_endpoints(ModeDependentValue::Unique(vec!["tcp/127.0.0.1:37447" + .set_endpoints(ModeDependentValue::Unique(vec!["tcp/127.0.0.1:30447" .parse::() .unwrap()])) .unwrap(); @@ -122,7 +122,7 @@ async fn open_client_sessions() -> (Session, Session, Session) { config.set_mode(Some(WhatAmI::Client)).unwrap(); config .connect - .set_endpoints(ModeDependentValue::Unique(vec!["tcp/127.0.0.1:37447" + .set_endpoints(ModeDependentValue::Unique(vec!["tcp/127.0.0.1:30447" .parse::() .unwrap()])) .unwrap(); From 4f835a087283a582304206fd182dbc2b88191de9 Mon Sep 17 00:00:00 2001 From: yellowhatter Date: Thu, 26 Sep 2024 16:39:44 +0300 Subject: [PATCH 11/26] remove more ephemeral ports --- zenoh/tests/authentication.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/zenoh/tests/authentication.rs b/zenoh/tests/authentication.rs index a49261c9d1..3d6579d6a4 100644 --- a/zenoh/tests/authentication.rs +++ b/zenoh/tests/authentication.rs @@ -41,10 +41,10 @@ mod test { create_new_files(TESTFILES_PATH.to_path_buf()) .await .unwrap(); - test_pub_sub_deny_then_allow_usrpswd(37447).await; - test_pub_sub_allow_then_deny_usrpswd(37447).await; - test_get_qbl_allow_then_deny_usrpswd(37447).await; - test_get_qbl_deny_then_allow_usrpswd(37447).await; + test_pub_sub_deny_then_allow_usrpswd(29447).await; + test_pub_sub_allow_then_deny_usrpswd(29447).await; + test_get_qbl_allow_then_deny_usrpswd(29447).await; + test_get_qbl_deny_then_allow_usrpswd(29447).await; } #[tokio::test(flavor = "multi_thread", worker_threads = 4)] @@ -53,10 +53,10 @@ mod test { create_new_files(TESTFILES_PATH.to_path_buf()) .await .unwrap(); - test_pub_sub_deny_then_allow_tls(37448, false).await; - test_pub_sub_allow_then_deny_tls(37449).await; - test_get_qbl_allow_then_deny_tls(37450).await; - test_get_qbl_deny_then_allow_tls(37451).await; + test_pub_sub_deny_then_allow_tls(29448, false).await; + test_pub_sub_allow_then_deny_tls(29449).await; + test_get_qbl_allow_then_deny_tls(29450).await; + test_get_qbl_deny_then_allow_tls(29451).await; } #[tokio::test(flavor = "multi_thread", worker_threads = 4)] @@ -65,10 +65,10 @@ mod test { create_new_files(TESTFILES_PATH.to_path_buf()) .await .unwrap(); - test_pub_sub_deny_then_allow_quic(37452).await; - test_pub_sub_allow_then_deny_quic(37453).await; - test_get_qbl_deny_then_allow_quic(37454).await; - test_get_qbl_allow_then_deny_quic(37455).await; + test_pub_sub_deny_then_allow_quic(29452).await; + test_pub_sub_allow_then_deny_quic(29453).await; + test_get_qbl_deny_then_allow_quic(29454).await; + test_get_qbl_allow_then_deny_quic(29455).await; } #[tokio::test(flavor = "multi_thread", worker_threads = 4)] @@ -78,7 +78,7 @@ mod test { create_new_files(TESTFILES_PATH.to_path_buf()) .await .unwrap(); - test_pub_sub_deny_then_allow_tls(37456, true).await; + test_pub_sub_deny_then_allow_tls(29456, true).await; } #[tokio::test(flavor = "multi_thread", worker_threads = 4)] @@ -87,8 +87,8 @@ mod test { create_new_files(TESTFILES_PATH.to_path_buf()) .await .unwrap(); - test_deny_allow_combination(37457).await; - test_allow_deny_combination(37458).await; + test_deny_allow_combination(29457).await; + test_allow_deny_combination(29458).await; } #[allow(clippy::all)] From 5db24c83c875a1dfb2203925b74cc1ab2ec42f43 Mon Sep 17 00:00:00 2001 From: Julien Loudet Date: Tue, 24 Sep 2024 12:32:12 +0200 Subject: [PATCH 12/26] refactor(storage-manager): keep configuration in StorageService The `StorageService` was splitting the `StorageConfig` that was used to create it. In addition to adding noise, this prevented separating the creation of the structure from spawning the subscriber and queryable associated with a Storage. This commit changes the fields of the `StorageService` structure to keep the entire `StorageConfig` -- thus allowing separating the creation from spawning the queryable and subscriber. * plugins/zenoh-plugin-storage-manager/src/storages_mgt/service.rs: - Removed the following fields from the `StorageService` structure: - `key_expr`, - `complete`, - `strip_prefix`. - Added the field `configuration` to keep track of the associated `StorageConfig`. - Changed the signature of the `start_storage_queryable_subscriber` removing the `GarbageConfig` as it is now contained in `&self`. - Updated the calls to access `key_expr`, `complete` and `strip_prefix`. - Removed an `unwrap()` and instead log an error. Signed-off-by: Julien Loudet --- .../src/storages_mgt/service.rs | 98 +++++++++++-------- 1 file changed, 56 insertions(+), 42 deletions(-) diff --git a/plugins/zenoh-plugin-storage-manager/src/storages_mgt/service.rs b/plugins/zenoh-plugin-storage-manager/src/storages_mgt/service.rs index 42e82eae94..4d493ec8b2 100644 --- a/plugins/zenoh-plugin-storage-manager/src/storages_mgt/service.rs +++ b/plugins/zenoh-plugin-storage-manager/src/storages_mgt/service.rs @@ -61,10 +61,8 @@ struct Update { #[derive(Clone)] pub struct StorageService { session: Arc, - key_expr: OwnedKeyExpr, - complete: bool, + configuration: StorageConfig, name: String, - strip_prefix: Option, pub(crate) storage: Arc>>, capability: Capability, tombstones: Arc>>, @@ -84,10 +82,8 @@ impl StorageService { ) -> Self { let storage_service = StorageService { session, - key_expr: config.key_expr, - complete: config.complete, + configuration: config, name: name.to_string(), - strip_prefix: config.strip_prefix, storage, capability, tombstones: Arc::new(RwLock::new(KeBoxTree::default())), @@ -123,20 +119,18 @@ impl StorageService { } storage_service .clone() - .start_storage_queryable_subscriber(rx, config.garbage_collection_config) + .start_storage_queryable_subscriber(rx) .await; storage_service } - async fn start_storage_queryable_subscriber( - self, - mut rx: Receiver, - gc_config: GarbageCollectionConfig, - ) { + async fn start_storage_queryable_subscriber(self, mut rx: Receiver) { // start periodic GC event let t = Timer::default(); + let gc_config = self.configuration.garbage_collection_config.clone(); + let latest_updates = if self.cache_latest.replication_log.is_none() { Some(self.cache_latest.latest_updates.clone()) } else { @@ -154,8 +148,10 @@ impl StorageService { ); t.add_async(gc).await; + let storage_key_expr = &self.configuration.key_expr; + // subscribe on key_expr - let storage_sub = match self.session.declare_subscriber(&self.key_expr).await { + let storage_sub = match self.session.declare_subscriber(storage_key_expr).await { Ok(storage_sub) => storage_sub, Err(e) => { tracing::error!("Error starting storage '{}': {}", self.name, e); @@ -166,8 +162,8 @@ impl StorageService { // answer to queries on key_expr let storage_queryable = match self .session - .declare_queryable(&self.key_expr) - .complete(self.complete) + .declare_queryable(storage_key_expr) + .complete(self.configuration.complete) .await { Ok(storage_queryable) => storage_queryable, @@ -249,6 +245,8 @@ impl StorageService { matching_keys ); + let prefix = self.configuration.strip_prefix.as_ref(); + for k in matching_keys { if self.is_deleted(&k, sample_timestamp).await { tracing::trace!("Skipping Sample < {} > deleted later on", k); @@ -297,13 +295,12 @@ impl StorageService { } }; - let stripped_key = - match crate::strip_prefix(self.strip_prefix.as_ref(), sample_to_store.key_expr()) { - Ok(stripped) => stripped, - Err(e) => { - bail!("{e:?}"); - } - }; + let stripped_key = match crate::strip_prefix(prefix, sample_to_store.key_expr()) { + Ok(stripped) => stripped, + Err(e) => { + bail!("{e:?}"); + } + }; // If the Storage was declared as only keeping the Latest value, we ensure that, for // each received Sample, it is indeed the Latest value that is processed. @@ -436,19 +433,21 @@ impl StorageService { let wildcards = self.wildcard_updates.read().await; let mut ts = timestamp; let mut update = None; + + let prefix = self.configuration.strip_prefix.as_ref(); + for node in wildcards.intersecting_keys(key_expr) { let weight = wildcards.weight_at(&node); if weight.is_some() && weight.unwrap().data.timestamp > *ts { // if the key matches a wild card update, check whether it was saved in storage // remember that wild card updates change only existing keys - let stripped_key = - match crate::strip_prefix(self.strip_prefix.as_ref(), &key_expr.into()) { - Ok(stripped) => stripped, - Err(e) => { - tracing::error!("{}", e); - break; - } - }; + let stripped_key = match crate::strip_prefix(prefix, &key_expr.into()) { + Ok(stripped) => stripped, + Err(e) => { + tracing::error!("{}", e); + break; + } + }; let mut storage = self.storage.lock().await; match storage.get(stripped_key, "").await { Ok(stored_data) => { @@ -531,20 +530,22 @@ impl StorageService { } }; tracing::trace!("[STORAGE] Processing query on key_expr: {}", q.key_expr()); + + let prefix = self.configuration.strip_prefix.as_ref(); + if q.key_expr().is_wild() { // resolve key expr into individual keys let matching_keys = self.get_matching_keys(q.key_expr()).await; let mut storage = self.storage.lock().await; for key in matching_keys { - let stripped_key = - match crate::strip_prefix(self.strip_prefix.as_ref(), &key.clone().into()) { - Ok(k) => k, - Err(e) => { - tracing::error!("{}", e); - // @TODO: return error when it is supported - return; - } - }; + let stripped_key = match crate::strip_prefix(prefix, &key.clone().into()) { + Ok(k) => k, + Err(e) => { + tracing::error!("{}", e); + // @TODO: return error when it is supported + return; + } + }; match storage.get(stripped_key, q.parameters().as_str()).await { Ok(stored_data) => { for entry in stored_data { @@ -569,7 +570,7 @@ impl StorageService { } drop(storage); } else { - let stripped_key = match crate::strip_prefix(self.strip_prefix.as_ref(), q.key_expr()) { + let stripped_key = match crate::strip_prefix(prefix, q.key_expr()) { Ok(k) => k, Err(e) => { tracing::error!("{}", e); @@ -606,13 +607,26 @@ impl StorageService { let mut result = Vec::new(); // @TODO: if cache exists, use that to get the list let storage = self.storage.lock().await; + + let prefix = self.configuration.strip_prefix.as_ref(); + match storage.get_all_entries().await { Ok(entries) => { for (k, _ts) in entries { // @TODO: optimize adding back the prefix (possible inspiration from https://github.com/eclipse-zenoh/zenoh/blob/0.5.0-beta.9/backends/traits/src/utils.rs#L79) let full_key = match k { - Some(key) => crate::prefix(self.strip_prefix.as_ref(), &key), - None => self.strip_prefix.clone().unwrap(), + Some(key) => crate::prefix(prefix, &key), + None => { + let Some(prefix) = prefix else { + // TODO Check if we have anything in place that would prevent such + // an error from happening. + tracing::error!( + "Internal bug: empty key with no `strip_prefix` configured" + ); + continue; + }; + prefix.clone() + } }; if key_expr.intersects(&full_key.clone()) { result.push(full_key); From 532cf6ec025cf4845bceeba1753b686ede7ba85f Mon Sep 17 00:00:00 2001 From: Julien Loudet Date: Tue, 24 Sep 2024 13:19:12 +0200 Subject: [PATCH 13/26] refactor(storage-manager): separate creation/start of StorageService This commit separates creating the `StorageService` from starting it. This change is motivated by the Replication feature: when performing the initial alignment we want to delay the Storage from answering queries until after the initial alignment has been performed. In order to have this functionality we need to be able to dissociate creating the `StorageService` from starting it. As the method `start_storage_queryable_subscriber` takes ownership of the `StorageService`, it became mandatory to first create the `StorageService`, then start the Replication and lastly start the Storage. Because of this, as the Replication code was inside a task, the code to create and start the Storage was also moved inside the task. * plugins/zenoh-plugin-storage-manager/src/replication/service.rs: - Take a reference over the `StorageService` structure as it is only needed before spawning the different Replication tasks. The StorageService is still needed to call `process_sample`. - Clone the `Arc` of the underlying Storage before spawning the Replication tasks. * plugins/zenoh-plugin-storage-manager/src/storages_mgt/mod.rs: - Move the logging until starting the Storage. - Move the code starting the Storage inside the task. - Start the `StorageService` after having started the `ReplicationService`. * plugins/zenoh-plugin-storage-manager/src/storages_mgt/service.rs: - Renamed the function `start` to `new` as it now only creates an instance of the `StorageService`. - Removed the parameter `rx` from the call to `new` as it no longer also starts it. - Removed the call to `start_storage_queryable_subscriber` from `new`. - Changed the visibility of the method `start_storage_queryable_subscriber` to `pub(crate)` as it is called from outside the `service` module. - Added logging information before the Storage "loop" is started (to help confirm, with the logs, the order in which the different elements are started). Signed-off-by: Julien Loudet --- .../src/replication/service.rs | 6 +- .../src/storages_mgt/mod.rs | 67 ++++++++++--------- .../src/storages_mgt/service.rs | 15 +++-- 3 files changed, 46 insertions(+), 42 deletions(-) diff --git a/plugins/zenoh-plugin-storage-manager/src/replication/service.rs b/plugins/zenoh-plugin-storage-manager/src/replication/service.rs index 06ec31d9a2..f3b87c6c3f 100644 --- a/plugins/zenoh-plugin-storage-manager/src/replication/service.rs +++ b/plugins/zenoh-plugin-storage-manager/src/replication/service.rs @@ -44,7 +44,7 @@ impl ReplicationService { /// received. pub async fn spawn_start( zenoh_session: Arc, - storage_service: StorageService, + storage_service: &StorageService, storage_key_expr: OwnedKeyExpr, replication_log: Arc>, latest_updates: Arc>, @@ -98,13 +98,15 @@ impl ReplicationService { ); } + let storage = storage_service.storage.clone(); + tokio::task::spawn(async move { let replication = Replication { zenoh_session, replication_log, storage_key_expr, latest_updates, - storage: storage_service.storage.clone(), + storage, }; let replication_service = Self { diff --git a/plugins/zenoh-plugin-storage-manager/src/storages_mgt/mod.rs b/plugins/zenoh-plugin-storage-manager/src/storages_mgt/mod.rs index 7dcc60bf32..3e5a2e1f09 100644 --- a/plugins/zenoh-plugin-storage-manager/src/storages_mgt/mod.rs +++ b/plugins/zenoh-plugin-storage-manager/src/storages_mgt/mod.rs @@ -68,9 +68,8 @@ pub(crate) async fn create_and_start_storage( let storage_name = parts[7]; let name = format!("{uuid}/{storage_name}"); - tracing::trace!("Start storage '{}' on keyexpr '{}'", name, config.key_expr); - let (tx, rx_storage) = tokio::sync::broadcast::channel(1); + let rx_replication = tx.subscribe(); let mut entries = match storage.get_all_entries().await { Ok(entries) => entries @@ -111,50 +110,52 @@ pub(crate) async fn create_and_start_storage( let latest_updates = Arc::new(RwLock::new(latest_updates)); let storage = Arc::new(Mutex::new(storage)); - let storage_service = StorageService::start( - zenoh_session.clone(), - config.clone(), - &name, - storage, - capability, - rx_storage, - CacheLatest::new(latest_updates.clone(), replication_log.clone()), - ) - .await; - - // Testing if the `replication_log` is set is equivalent to testing if the `replication` is - // set: the `replication_log` is only set when the latter is. - if let Some(replication_log) = replication_log { - let rx_replication = tx.subscribe(); - - // NOTE Although the function `ReplicationService::spawn_start` spawns its own tasks, we - // still need to call it within a dedicated task because the Zenoh routing tables are - // populated only after the plugins have been loaded. - // - // If we don't wait for the routing tables to be populated the initial alignment - // (i.e. querying any Storage on the network handling the same key expression), will - // never work. - // - // TODO Do we really want to perform such an initial alignment? Because this query will - // target any Storage that matches the same key expression, regardless of if they have - // been configured to be replicated. - tokio::task::spawn(async move { + + // NOTE The StorageService method `start_storage_queryable_subscriber` does not spawn its own + // task to loop/wait on the Subscriber and Queryable it creates. Thus we spawn the task + // here. + // + // Doing so also allows us to return early from the creation of the Storage, creation which + // blocks populating the routing tables. + // + // TODO Do we really want to perform such an initial alignment? Because this query will + // target any Storage that matches the same key expression, regardless of if they have + // been configured to be replicated. + tokio::task::spawn(async move { + let storage_service = StorageService::new( + zenoh_session.clone(), + config.clone(), + &name, + storage, + capability, + CacheLatest::new(latest_updates.clone(), replication_log.clone()), + ) + .await; + + // Testing if the `replication_log` is set is equivalent to testing if the `replication` is + // set: the `replication_log` is only set when the latter is. + if let Some(replication_log) = replication_log { tracing::debug!( "Starting replication of storage '{}' on keyexpr '{}'", name, config.key_expr, ); + ReplicationService::spawn_start( zenoh_session, - storage_service, + &storage_service, config.key_expr, replication_log, latest_updates, rx_replication, ) .await; - }); - } + } + + storage_service + .start_storage_queryable_subscriber(rx_storage) + .await; + }); Ok(tx) } diff --git a/plugins/zenoh-plugin-storage-manager/src/storages_mgt/service.rs b/plugins/zenoh-plugin-storage-manager/src/storages_mgt/service.rs index 4d493ec8b2..e96320051f 100644 --- a/plugins/zenoh-plugin-storage-manager/src/storages_mgt/service.rs +++ b/plugins/zenoh-plugin-storage-manager/src/storages_mgt/service.rs @@ -71,13 +71,12 @@ pub struct StorageService { } impl StorageService { - pub async fn start( + pub async fn new( session: Arc, config: StorageConfig, name: &str, storage: Arc>>, capability: Capability, - rx: Receiver, cache_latest: CacheLatest, ) -> Self { let storage_service = StorageService { @@ -117,15 +116,11 @@ impl StorageService { } } } - storage_service - .clone() - .start_storage_queryable_subscriber(rx) - .await; storage_service } - async fn start_storage_queryable_subscriber(self, mut rx: Receiver) { + pub(crate) async fn start_storage_queryable_subscriber(self, mut rx: Receiver) { // start periodic GC event let t = Timer::default(); @@ -173,6 +168,12 @@ impl StorageService { } }; + tracing::debug!( + "Starting storage '{}' on keyexpr '{}'", + self.name, + storage_key_expr + ); + tokio::task::spawn(async move { loop { tokio::select!( From f1b956dff91a0898cec28f991bfcbdfed5f96101 Mon Sep 17 00:00:00 2001 From: Julien Loudet Date: Wed, 25 Sep 2024 14:44:18 +0200 Subject: [PATCH 14/26] refactor(storage-manager): use hash in Replication key expressions As we were using the key expression of the Storage to generate the key expressions used in the Replication, it was possible to receive Digest emitted by Replicas that were operating on a subset of the key space of the Storage. This commit changes the way the key expressions for the Replication are generated by using the hash of the configuration of the Replication: this renders these key expressions unique, hence avoiding the issue just described. This property is interesting for the initial Alignment: had we not made that change, we would have had to ensure that we perform that Alignment on a Replica operating on exactly the same key space (and not a subset) and the same configuration (in particular, the `strip_prefix`). NOTE: This does not solve the initial alignment step that will still contact Storage that are operating on a subset (if there is no better match on the network). * plugins/zenoh-plugin-storage-manager/src/replication/core.rs: - Renamed `storage_ke` to `hash_configuration` in the key expression formatters of the Digest and the Aligner. - Removed the unnecessary clones when spawning the Digest Publisher + fixed the different call sites. - Removed the scope to access the configuration as we clone it earlier in the code + fixed the different call sites. - Used the hash of the configuration to generate the key expression for the Digest. - Removed the unnecessary clones when spawning the Digest Subscriber + fixed the different call sites. - Used the hash of the configuration to generate the key expression for the Digest. - Removed the unnecessary clones when spawning the Digest Publisher + fixed the different call sites. - Used the hash of the configuration to generate the key expression for the Digest. - Removed the unnecessary clones when spawning the Aligner + fixed the different call sites. - Used the hash of the configuration to generate the key expression for the Aligner Queryable. Signed-off-by: Julien Loudet --- .../src/replication/core.rs | 105 +++++++++--------- 1 file changed, 55 insertions(+), 50 deletions(-) diff --git a/plugins/zenoh-plugin-storage-manager/src/replication/core.rs b/plugins/zenoh-plugin-storage-manager/src/replication/core.rs index 0ef0824c65..522385938c 100644 --- a/plugins/zenoh-plugin-storage-manager/src/replication/core.rs +++ b/plugins/zenoh-plugin-storage-manager/src/replication/core.rs @@ -43,8 +43,8 @@ use super::{ use crate::{replication::aligner::AlignmentQuery, storages_mgt::LatestUpdates}; kedefine!( - pub digest_key_expr_formatter: "@-digest/${zid:*}/${storage_ke:**}", - pub aligner_key_expr_formatter: "@zid/${zid:*}/${storage_ke:**}/aligner", + pub digest_key_expr_formatter: "@-digest/${zid:*}/${hash_configuration:*}", + pub aligner_key_expr_formatter: "@zid/${zid:*}/${hash_configuration:*}/aligner", ); #[derive(Clone)] @@ -69,16 +69,20 @@ impl Replication { /// /// [Log]: crate::replication::log::LogLatest pub(crate) fn spawn_digest_publisher(&self) -> JoinHandle<()> { - let zenoh_session = self.zenoh_session.clone(); - let storage_key_expr = self.storage_key_expr.clone(); - let replication_log = self.replication_log.clone(); - let latest_updates = self.latest_updates.clone(); + let replication = self.clone(); tokio::task::spawn(async move { + let configuration = replication + .replication_log + .read() + .await + .configuration + .clone(); + let digest_key_put = match keformat!( digest_key_expr_formatter::formatter(), - zid = zenoh_session.zid(), - storage_ke = storage_key_expr + zid = replication.zenoh_session.zid(), + hash_configuration = *configuration.fingerprint(), ) { Ok(key) => key, Err(e) => { @@ -89,25 +93,14 @@ impl Replication { } }; - // Scope to not forget to release the lock. - let (publication_interval, propagation_delay, last_elapsed_interval) = { - let replication_log_guard = replication_log.read().await; - let configuration = replication_log_guard.configuration(); - let last_elapsed_interval = match configuration.last_elapsed_interval() { - Ok(idx) => idx, - Err(e) => { - tracing::error!( - "Fatal error, call to `last_elapsed_interval` failed with: {e:?}" - ); - return; - } - }; - - ( - configuration.interval, - configuration.propagation_delay, - last_elapsed_interval, - ) + let last_elapsed_interval = match configuration.last_elapsed_interval() { + Ok(idx) => idx, + Err(e) => { + tracing::error!( + "Fatal error, call to `last_elapsed_interval` failed with: {e:?}" + ); + return; + } }; // We have no control over when a replica is going to be started. The purpose is here @@ -115,7 +108,7 @@ impl Replication { // at every interval (+ δ). let duration_until_next_interval = { let millis_last_elapsed = - *last_elapsed_interval as u128 * publication_interval.as_millis(); + *last_elapsed_interval as u128 * configuration.interval.as_millis(); if millis_last_elapsed > u64::MAX as u128 { tracing::error!( @@ -138,7 +131,7 @@ impl Replication { }; Duration::from_millis( - (publication_interval.as_millis() - (millis_since_now - millis_last_elapsed)) + (configuration.interval.as_millis() - (millis_since_now - millis_last_elapsed)) as u64, ) }; @@ -148,7 +141,7 @@ impl Replication { let mut events = HashMap::default(); // Internal delay to avoid an "update storm". - let max_publication_delay = (publication_interval.as_millis() / 3) as u64; + let max_publication_delay = (configuration.interval.as_millis() / 3) as u64; let mut digest_update_start: Instant; let mut digest: Digest; @@ -160,15 +153,15 @@ impl Replication { // Except that we want to take into account the time it takes for a publication to // reach this Zenoh node. Hence, we sleep for `propagation_delay` to, hopefully, // catch the publications that are in transit. - tokio::time::sleep(propagation_delay).await; + tokio::time::sleep(configuration.propagation_delay).await; { - let mut latest_updates_guard = latest_updates.write().await; + let mut latest_updates_guard = replication.latest_updates.write().await; std::mem::swap(&mut events, &mut latest_updates_guard); } { - let mut replication_guard = replication_log.write().await; + let mut replication_guard = replication.replication_log.write().await; replication_guard.update(events.drain().map(|(_, event)| event)); digest = match replication_guard.digest() { Ok(digest) => digest, @@ -194,7 +187,8 @@ impl Replication { // buffer that, hopefully, has enough memory. let buffer_capacity = serialization_buffer.capacity(); - match zenoh_session + match replication + .zenoh_session .put( &digest_key_put, std::mem::replace( @@ -209,17 +203,17 @@ impl Replication { } let digest_update_duration = digest_update_start.elapsed(); - if digest_update_duration > publication_interval { + if digest_update_duration > configuration.interval { tracing::warn!( "The duration it took to update and publish the Digest is superior to the \ duration of an Interval ({} ms), we recommend increasing the duration of \ the latter. Digest update: {} ms (incl. delay: {} ms)", - publication_interval.as_millis(), + configuration.interval.as_millis(), digest_update_duration.as_millis(), - publication_delay + propagation_delay.as_millis() as u64 + publication_delay + configuration.propagation_delay.as_millis() as u64 ); } else { - tokio::time::sleep(publication_interval - digest_update_duration).await; + tokio::time::sleep(configuration.interval - digest_update_duration).await; } } }) @@ -233,16 +227,20 @@ impl Replication { /// /// [DigestDiff]: super::digest::DigestDiff pub(crate) fn spawn_digest_subscriber(&self) -> JoinHandle<()> { - let zenoh_session = self.zenoh_session.clone(); - let storage_key_expr = self.storage_key_expr.clone(); - let replication_log = self.replication_log.clone(); let replication = self.clone(); tokio::task::spawn(async move { + let configuration = replication + .replication_log + .read() + .await + .configuration + .clone(); + let digest_key_sub = match keformat!( digest_key_expr_formatter::formatter(), zid = "*", - storage_ke = &storage_key_expr + hash_configuration = *configuration.fingerprint() ) { Ok(key) => key, Err(e) => { @@ -257,7 +255,8 @@ impl Replication { let mut retry = 0; let subscriber = loop { - match zenoh_session + match replication + .zenoh_session .declare_subscriber(&digest_key_sub) // NOTE: We need to explicitly set the locality to `Remote` as otherwise the // Digest subscriber will also receive the Digest published by its own @@ -325,7 +324,7 @@ impl Replication { tracing::debug!("Replication digest received"); - let digest = match replication_log.read().await.digest() { + let digest = match replication.replication_log.read().await.digest() { Ok(digest) => digest, Err(e) => { tracing::error!( @@ -340,7 +339,7 @@ impl Replication { let replica_aligner_ke = match keformat!( aligner_key_expr_formatter::formatter(), - storage_ke = &storage_key_expr, + hash_configuration = *configuration.fingerprint(), zid = source_zid, ) { Ok(key) => key, @@ -373,15 +372,20 @@ impl Replication { /// responsible for fetching in the Replication Log or in the Storage the relevant information /// to send to the Replica such that it can align its own Storage. pub(crate) fn spawn_aligner_queryable(&self) -> JoinHandle<()> { - let zenoh_session = self.zenoh_session.clone(); - let storage_key_expr = self.storage_key_expr.clone(); let replication = self.clone(); tokio::task::spawn(async move { + let configuration = replication + .replication_log + .read() + .await + .configuration + .clone(); + let aligner_ke = match keformat!( aligner_key_expr_formatter::formatter(), - zid = zenoh_session.zid(), - storage_ke = storage_key_expr, + zid = replication.zenoh_session.zid(), + hash_configuration = *configuration.fingerprint(), ) { Ok(ke) => ke, Err(e) => { @@ -395,7 +399,8 @@ impl Replication { let mut retry = 0; let queryable = loop { - match zenoh_session + match replication + .zenoh_session .declare_queryable(&aligner_ke) .allowed_origin(Locality::Remote) .await From 049fdd8400e9d5b7de5a0fd89017fb2ec8c06854 Mon Sep 17 00:00:00 2001 From: Julien Loudet Date: Wed, 25 Sep 2024 16:03:36 +0200 Subject: [PATCH 15/26] refactor(storage-manager): remove unnecessary wait/retry policy It does not bring anything to wait and retry on error when attempting to declare a Queryable or a Subscriber: either the Session is established and these operations will succeed or the Session is no longer existing in which case we should terminate. * plugins/zenoh-plugin-storage-manager/src/replication/core.rs: - The `MAX_RETRY` and `WAIT_PERIODS_SECS` constants are no longer needed. - Removed the wait/retry loops when creating the Digest Subscriber and Aligner Queryable. Signed-off-by: Julien Loudet --- .../src/replication/core.rs | 74 ++++++------------- 1 file changed, 23 insertions(+), 51 deletions(-) diff --git a/plugins/zenoh-plugin-storage-manager/src/replication/core.rs b/plugins/zenoh-plugin-storage-manager/src/replication/core.rs index 522385938c..bd96912a4b 100644 --- a/plugins/zenoh-plugin-storage-manager/src/replication/core.rs +++ b/plugins/zenoh-plugin-storage-manager/src/replication/core.rs @@ -35,11 +35,7 @@ use zenoh::{ }; use zenoh_backend_traits::Storage; -use super::{ - digest::Digest, - log::LogLatest, - service::{MAX_RETRY, WAIT_PERIOD_SECS}, -}; +use super::{digest::Digest, log::LogLatest}; use crate::{replication::aligner::AlignmentQuery, storages_mgt::LatestUpdates}; kedefine!( @@ -253,9 +249,7 @@ impl Replication { } }; - let mut retry = 0; - let subscriber = loop { - match replication + let subscriber = match replication .zenoh_session .declare_subscriber(&digest_key_sub) // NOTE: We need to explicitly set the locality to `Remote` as otherwise the @@ -263,24 +257,14 @@ impl Replication { // Digest publisher. .allowed_origin(Locality::Remote) .await - { - Ok(subscriber) => break subscriber, - Err(e) => { - if retry < MAX_RETRY { - retry += 1; - tracing::warn!( - "Failed to declare Digest subscriber: {e:?}. Attempt \ - {retry}/{MAX_RETRY}." - ); - tokio::time::sleep(Duration::from_secs(WAIT_PERIOD_SECS)).await; - } else { - tracing::error!( - "Could not declare Digest subscriber. The storage will not \ - receive the Replication Digest of other replicas." - ); - return; - } - } + { + Ok(subscriber) => subscriber, + Err(e) => { + tracing::error!( + "Could not declare Digest subscriber: {e:?}. The storage will not receive \ + the Replication Digest of other replicas." + ); + return; } }; @@ -397,31 +381,19 @@ impl Replication { } }; - let mut retry = 0; - let queryable = loop { - match replication - .zenoh_session - .declare_queryable(&aligner_ke) - .allowed_origin(Locality::Remote) - .await - { - Ok(queryable) => break queryable, - Err(e) => { - if retry < MAX_RETRY { - retry += 1; - tracing::warn!( - "Failed to declare the Aligner queryable: {e:?}. Attempt \ - {retry}/{MAX_RETRY}." - ); - tokio::time::sleep(Duration::from_secs(WAIT_PERIOD_SECS)).await; - } else { - tracing::error!( - "Could not declare the Aligner queryable. This storage will NOT \ - align with other replicas." - ); - return; - } - } + let queryable = match replication + .zenoh_session + .declare_queryable(&aligner_ke) + .allowed_origin(Locality::Remote) + .await + { + Ok(queryable) => queryable, + Err(e) => { + tracing::error!( + "Could not declare the Aligner queryable: {e:?}. This storage will NOT \ + align with other replicas." + ); + return; } }; From e9ef0309ef6d3abc44b83e3d0059a2859da5c2fa Mon Sep 17 00:00:00 2001 From: Julien Loudet Date: Wed, 25 Sep 2024 16:43:11 +0200 Subject: [PATCH 16/26] refactor(storage-manager): initial alignment on empty Storage This commit changes the way a replicated Storage starts: if it is empty and configured to be replicated, it will attempt to align with an active and compatible (i.e. same configuration) Replica before anything. The previous behaviour made a query on the key expression of the Storage. Although, it could, in some cases, actually perform the same initial alignment, it could not guarantee to only query a Storage that was configured to be replicated. To perform this controlled initial alignment, new variants to the `AlignmentQuery` and `AlignmentReply` enumerations were added: `Discovery` to discover an active Replica and reply with its `ZenohId`, `All` to request the data from the discovered Replica. To avoid contention, this transfer is performed by batch, one `Interval` at a time. * plugins/zenoh-plugin-storage-manager/src/replication/aligner.rs: - Added new variants `AlignmentQuery::All` and `AlignmentQuery::Discovery`. - Added new variant `AlignmentReply::Discovery`. - Updated the `aligner` method to: - Send the `ZenohId` as a reply to an `AlignmentQuery::Discovery`. - Send all the data of the Storage as a reply to an `AlignmentQuery::All`. This leverages the already existing `reply_events` method. - Updated the `reply_events` method to not attempt to fetch the content of the Storage if the action is set to `delete`. Before this commit, the only time this method was called was during an alignment which filters out the deleted events (hence not requiring this code). - Updated the `spawn_query_replica_aligner` method: - It now returns the handle of the newly created task as we want to wait for it to finish when performing the initial alignment. - Changed the consolidation to `ConsolidationMode::Monotonic` when sending an `AlignmentQuery::Discovery`: we want to contact the fastest answering Replica (hopefully the closest). - Stopped processing replies when processing an `AlignmentQuery::Discovery` as we only want to perform the initial alignment once. - Updated the `process_alignment_reply`: - Process an `AlignmentReply::Discovery` by sending a follow-up `AlignmentQuery::All` to retrieve the content of the Storage of the discovered Replica. - It does not attempt to delete an entry in the Storage when processing an `AlignmentReply::Retrieval`. This could only happen when performing an initial alignment in which case the receiving Storage is empty. We basically only need to record the fact that a delete was performed in the Replication Log. * plugins/zenoh-plugin-storage-manager/src/replication/core.rs: implemented the `initial_alignment` method that attempts to discover a Replica by sending out an `AlignmentQuery::Discovery` on the Aligner Queryable for all Replicas. Before making this query we wait a small delay to give enough time for Zenoh to propagate the routing tables. * plugins/zenoh-plugin-storage-manager/src/replication/service.rs: - Removed the constants `MAX_RETRY` and `WAIT_PERIOD_SECS` as they were no longer needed. - Updated the documentation of the `spawn_start` function. - Removed the previous implementation of the initial alignment that made a query on the key expression of the Storage. - Added a check after creating the `Replication` structure: if the Replication Log is empty, which indicates an empty Storage, then perform an initial alignment. Signed-off-by: Julien Loudet --- .../src/replication/aligner.rs | 157 +++++++++++++++--- .../src/replication/core.rs | 57 +++++++ .../src/replication/service.rs | 91 ++++------ 3 files changed, 220 insertions(+), 85 deletions(-) diff --git a/plugins/zenoh-plugin-storage-manager/src/replication/aligner.rs b/plugins/zenoh-plugin-storage-manager/src/replication/aligner.rs index e805bda26c..b957fc50f6 100644 --- a/plugins/zenoh-plugin-storage-manager/src/replication/aligner.rs +++ b/plugins/zenoh-plugin-storage-manager/src/replication/aligner.rs @@ -18,18 +18,20 @@ use std::{ }; use serde::{Deserialize, Serialize}; +use tokio::task::JoinHandle; use zenoh::{ bytes::ZBytes, internal::Value, - key_expr::OwnedKeyExpr, + key_expr::{format::keformat, OwnedKeyExpr}, query::{ConsolidationMode, Query, Selector}, sample::{Sample, SampleKind}, + session::ZenohId, }; use zenoh_backend_traits::StorageInsertionResult; use super::{ classification::{IntervalIdx, SubIntervalIdx}, - core::Replication, + core::{aligner_key_expr_formatter, Replication}, digest::{DigestDiff, Fingerprint}, log::EventMetadata, }; @@ -51,6 +53,8 @@ use super::{ /// hence directly skipping to the `SubIntervals` variant. #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] pub(crate) enum AlignmentQuery { + Discovery, + All, Diff(DigestDiff), Intervals(HashSet), SubIntervals(HashMap>), @@ -67,6 +71,7 @@ pub(crate) enum AlignmentQuery { /// Not all replies are made, it depends on the Era when a misalignment was detected. #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] pub(crate) enum AlignmentReply { + Discovery(ZenohId), Intervals(HashMap), SubIntervals(HashMap>), Events(Vec), @@ -101,6 +106,47 @@ impl Replication { }; match alignment_query { + AlignmentQuery::Discovery => { + tracing::trace!("Processing `AlignmentQuery::Discovery`"); + reply_to_query( + &query, + AlignmentReply::Discovery(self.zenoh_session.zid()), + None, + ) + .await; + } + AlignmentQuery::All => { + tracing::trace!("Processing `AlignmentQuery::All`"); + + let idx_intervals = self + .replication_log + .read() + .await + .intervals + .keys() + .copied() + .collect::>(); + + for interval_idx in idx_intervals { + let mut events_to_send = Vec::default(); + if let Some(interval) = self + .replication_log + .read() + .await + .intervals + .get(&interval_idx) + { + interval.sub_intervals.values().for_each(|sub_interval| { + events_to_send.extend(sub_interval.events.values().map(Into::into)); + }); + } + + // NOTE: As we took the lock in the `if let` block, it is released here, + // diminishing contention. + + self.reply_events(&query, events_to_send).await; + } + } AlignmentQuery::Diff(digest_diff) => { tracing::trace!("Processing `AlignmentQuery::Diff`"); if digest_diff.cold_eras_differ { @@ -262,6 +308,11 @@ impl Replication { /// is the reason why we need the consolidation to set to be `None` (⚠️). pub(crate) async fn reply_events(&self, query: &Query, events_to_retrieve: Vec) { for event_metadata in events_to_retrieve { + if event_metadata.action == SampleKind::Delete { + reply_to_query(query, AlignmentReply::Retrieval(event_metadata), None).await; + continue; + } + let stored_data = { let mut storage = self.storage.lock().await; match storage.get(event_metadata.stripped_key.clone(), "").await { @@ -323,7 +374,7 @@ impl Replication { &self, replica_aligner_ke: OwnedKeyExpr, alignment_query: AlignmentQuery, - ) { + ) -> JoinHandle<()> { let replication = self.clone(); tokio::task::spawn(async move { let attachment = match bincode::serialize(&alignment_query) { @@ -334,17 +385,29 @@ impl Replication { } }; + // NOTE: We need to put the Consolidation to `None` as otherwise if multiple replies are + // sent, they will be "consolidated" and only one of them will make it through. + // + // When we retrieve Samples from a Replica, each Sample is sent in a separate + // reply. Hence the need to have no consolidation. + let mut consolidation = ConsolidationMode::None; + + if matches!(alignment_query, AlignmentQuery::Discovery) { + // NOTE: `Monotonic` means that Zenoh will forward the first answer it receives (and + // ensure that later answers are with a higher timestamp — we do not care + // about that last aspect). + // + // By setting the consolidation to this value when performing the initial + // alignment, we select the most reactive Replica (hopefully the closest as + // well). + consolidation = ConsolidationMode::Monotonic; + } + match replication .zenoh_session .get(Into::::into(replica_aligner_ke.clone())) .attachment(attachment) - // NOTE: We need to put the Consolidation to `None` as otherwise if multiple replies - // are sent, they will be "consolidated" and only one of them will make it - // through. - // - // When we retrieve Samples from a Replica, each Sample is sent in a separate - // reply. Hence the need to have no consolidation. - .consolidation(ConsolidationMode::None) + .consolidation(consolidation) .await { Err(e) => { @@ -387,10 +450,17 @@ impl Replication { sample, ) .await; + + // The consolidation mode `Monotonic`, used for sending out an + // `AlignmentQuery::Discovery`, will keep on sending replies. We only want + // to discover / align with a single Replica so we break here. + if matches!(alignment_query, AlignmentQuery::Discovery) { + return; + } } } } - }); + }) } /// Processes the [AlignmentReply] sent by the Replica that has potentially data this Storage is @@ -438,6 +508,39 @@ impl Replication { sample: Sample, ) { match alignment_reply { + AlignmentReply::Discovery(replica_zid) => { + let parsed_ke = match aligner_key_expr_formatter::parse(&replica_aligner_ke) { + Ok(ke) => ke, + Err(e) => { + tracing::error!( + "Failed to parse < {replica_aligner_ke} > as a valid Aligner key \ + expression: {e:?}" + ); + return; + } + }; + + let replica_aligner_ke = match keformat!( + aligner_key_expr_formatter::formatter(), + hash_configuration = parsed_ke.hash_configuration(), + zid = replica_zid, + ) { + Ok(ke) => ke, + Err(e) => { + tracing::error!("Failed to generate a valid Aligner key expression: {e:?}"); + return; + } + }; + + tracing::debug!("Performing initial alignment with Replica < {replica_zid} >"); + + if let Err(e) = self + .spawn_query_replica_aligner(replica_aligner_ke, AlignmentQuery::All) + .await + { + tracing::error!("Error returned while performing the initial alignment: {e:?}"); + } + } AlignmentReply::Intervals(replica_intervals) => { tracing::trace!("Processing `AlignmentReply::Intervals`"); let intervals_diff = { @@ -616,18 +719,26 @@ impl Replication { } } - if matches!( - self.storage - .lock() - .await - .put( - replica_event.stripped_key.clone(), - sample.into(), - replica_event.timestamp, - ) - .await, - Ok(StorageInsertionResult::Outdated) | Err(_) - ) { + // NOTE: This code can only be called with `action` set to `delete` on an initial + // alignment, in which case the Storage of the receiving Replica is empty => there + // is no need to actually call `storage.delete`. + // + // Outside of an initial alignment, the `delete` action will be performed at the + // step above, in `AlignmentReply::Events`. + if replica_event.action == SampleKind::Put + && matches!( + self.storage + .lock() + .await + .put( + replica_event.stripped_key.clone(), + sample.into(), + replica_event.timestamp, + ) + .await, + Ok(StorageInsertionResult::Outdated) | Err(_) + ) + { return; } diff --git a/plugins/zenoh-plugin-storage-manager/src/replication/core.rs b/plugins/zenoh-plugin-storage-manager/src/replication/core.rs index bd96912a4b..0db1459ec7 100644 --- a/plugins/zenoh-plugin-storage-manager/src/replication/core.rs +++ b/plugins/zenoh-plugin-storage-manager/src/replication/core.rs @@ -53,6 +53,63 @@ pub(crate) struct Replication { } impl Replication { + /// Performs an initial alignment, skipping the comparison of Digest, asking directly the first + /// discovered Replica for all its entries. + /// + /// # ⚠️ Assumption: empty Storage + /// + /// We assume that this method will only be called if the underlying Storage is empty. This has + /// at least one consequence: if the Aligner receives a `delete` event from the Replica, it will + /// not attempt to delete anything from the Storage. + /// + /// # Replica discovery + /// + /// To discover a Replica, this method will create a Digest subscriber, wait to receive a + /// *valid* Digest and, upon reception, ask that Replica for all its entries. + /// + /// To avoid waiting indefinitely (in case there are no other Replica on the network), the + /// subscriber will wait for, at most, the duration of two Intervals. + pub(crate) async fn initial_alignment(&self) { + let ke_all_replicas = match keformat!( + aligner_key_expr_formatter::formatter(), + hash_configuration = *self + .replication_log + .read() + .await + .configuration + .fingerprint(), + zid = "*", + ) { + Ok(ke) => ke, + Err(e) => { + tracing::error!( + "Failed to generate key expression to query all Replicas: {e:?}. Skipping \ + initial alignment." + ); + return; + } + }; + + // NOTE: As discussed with @OlivierHecart, the plugins do not wait for the duration of the + // "scouting delay" before performing any Zenoh operation. Hence, we manually enforce this + // delay when performing the initial alignment. + let delay = self + .zenoh_session + .config() + .lock() + .scouting + .delay() + .unwrap_or(500); + tokio::time::sleep(Duration::from_millis(delay)).await; + + if let Err(e) = self + .spawn_query_replica_aligner(ke_all_replicas, AlignmentQuery::Discovery) + .await + { + tracing::error!("Initial alignment failed with: {e:?}"); + } + } + /// Spawns a task that periodically publishes the [Digest] of the Replication [Log]. /// /// This task will perform the following steps: diff --git a/plugins/zenoh-plugin-storage-manager/src/replication/service.rs b/plugins/zenoh-plugin-storage-manager/src/replication/service.rs index f3b87c6c3f..e48fb4fecd 100644 --- a/plugins/zenoh-plugin-storage-manager/src/replication/service.rs +++ b/plugins/zenoh-plugin-storage-manager/src/replication/service.rs @@ -12,13 +12,13 @@ // ZettaScale Zenoh Team, // -use std::{sync::Arc, time::Duration}; +use std::sync::Arc; use tokio::{ sync::{broadcast::Receiver, RwLock}, task::JoinHandle, }; -use zenoh::{key_expr::OwnedKeyExpr, query::QueryTarget, sample::Locality, session::Session}; +use zenoh::{key_expr::OwnedKeyExpr, session::Session}; use super::{core::Replication, LogLatest}; use crate::storages_mgt::{LatestUpdates, StorageMessage, StorageService}; @@ -29,17 +29,22 @@ pub(crate) struct ReplicationService { aligner_queryable_handle: JoinHandle<()>, } -pub(crate) const MAX_RETRY: usize = 2; -pub(crate) const WAIT_PERIOD_SECS: u64 = 4; - impl ReplicationService { /// Starts the `ReplicationService`, spawning multiple tasks. /// + /// # Initial alignment + /// + /// To optimise network resources, if the Storage is empty an "initial alignment" will be + /// performed: if a Replica is detected, a query will be made to retrieve the entire content of + /// its Storage. + /// /// # Tasks spawned /// - /// This function will spawn two tasks: + /// This function will spawn four long-lived tasks: /// 1. One to publish the [Digest]. - /// 2. One to wait on the provided [Receiver] in order to stop the Replication Service, + /// 2. One to receive the [Digest] of other Replica. + /// 3. One to receive alignment queries of other Replica. + /// 4. One to wait on the provided [Receiver] in order to stop the Replication Service, /// attempting to abort all the tasks that were spawned, once a Stop message has been /// received. pub async fn spawn_start( @@ -50,65 +55,27 @@ impl ReplicationService { latest_updates: Arc>, mut rx: Receiver, ) { - // We perform a "wait-try" policy because Zenoh needs some time to propagate the routing - // information and, here, we need to have the queryables propagated. - // - // 4 seconds is an arbitrary value. - let mut attempt = 0; - let mut received_reply = false; - - while attempt < MAX_RETRY { - attempt += 1; - tokio::time::sleep(Duration::from_secs(WAIT_PERIOD_SECS)).await; - - match zenoh_session - .get(&storage_key_expr) - // `BestMatching`, the default option for `target`, will try to minimise the storage - // that are queried and their distance while trying to maximise the key space - // covered. - // - // In other words, if there is a close and complete storage, it will only query this - // one. - .target(QueryTarget::BestMatching) - // The value `Remote` is self-explanatory but why it is needed deserves an - // explanation: we do not want to query the local database as the purpose is to get - // the data from other replicas (if there is one). - .allowed_destination(Locality::Remote) - .await - { - Ok(replies) => { - while let Ok(reply) = replies.recv_async().await { - received_reply = true; - if let Ok(sample) = reply.into_result() { - if let Err(e) = storage_service.process_sample(sample).await { - tracing::error!("{e:?}"); - } - } - } - } - Err(e) => tracing::error!("Initial alignment Query failed with: {e:?}"), - } + let storage = storage_service.storage.clone(); - if received_reply { - break; - } + let replication = Replication { + zenoh_session, + replication_log, + storage_key_expr, + latest_updates, + storage, + }; - tracing::debug!( - "Found no Queryable matching '{storage_key_expr}'. Attempt {attempt}/{MAX_RETRY}." - ); + if replication + .replication_log + .read() + .await + .intervals + .is_empty() + { + replication.initial_alignment().await; } - let storage = storage_service.storage.clone(); - tokio::task::spawn(async move { - let replication = Replication { - zenoh_session, - replication_log, - storage_key_expr, - latest_updates, - storage, - }; - let replication_service = Self { digest_publisher_handle: replication.spawn_digest_publisher(), digest_subscriber_handle: replication.spawn_digest_subscriber(), @@ -124,7 +91,7 @@ impl ReplicationService { }); } - /// Stops all the tasks spawned by the `ReplicationService`. + /// Stops all the long-lived tasks spawned by the `ReplicationService`. pub fn stop(self) { self.digest_publisher_handle.abort(); self.digest_subscriber_handle.abort(); From f60a7f004d9135fe400d229b4836ad67dcbfce2c Mon Sep 17 00:00:00 2001 From: Gabriele Baldoni Date: Fri, 27 Sep 2024 09:39:15 +0200 Subject: [PATCH 17/26] fix: starting rest plugin like any other plugin (#1478) * fix: starting rest plugin like any other plugin Signed-off-by: Gabriele Baldoni * fix: using mix of blockon_runtime and runtime_spawn Signed-off-by: Gabriele Baldoni * chore: removing commented line Signed-off-by: Gabriele Baldoni --------- Signed-off-by: Gabriele Baldoni --- plugins/zenoh-plugin-rest/src/lib.rs | 29 +++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/plugins/zenoh-plugin-rest/src/lib.rs b/plugins/zenoh-plugin-rest/src/lib.rs index c06fc09951..f02a47c9a5 100644 --- a/plugins/zenoh-plugin-rest/src/lib.rs +++ b/plugins/zenoh-plugin-rest/src/lib.rs @@ -34,7 +34,7 @@ use futures::StreamExt; use http_types::Method; use serde::{Deserialize, Serialize}; use tide::{http::Mime, sse::Sender, Request, Response, Server, StatusCode}; -use tokio::time::timeout; +use tokio::{task::JoinHandle, time::timeout}; use zenoh::{ bytes::{Encoding, ZBytes}, internal::{ @@ -72,6 +72,7 @@ lazy_static::lazy_static! { .build() .expect("Unable to create runtime"); } + #[inline(always)] pub(crate) fn blockon_runtime(task: F) -> F::Output { // Check whether able to get the current runtime @@ -87,6 +88,24 @@ pub(crate) fn blockon_runtime(task: F) -> F::Output { } } +pub(crate) fn spawn_runtime(task: F) -> JoinHandle +where + F: Future + Send + 'static, + F::Output: Send + 'static, +{ + // Check whether able to get the current runtime + match tokio::runtime::Handle::try_current() { + Ok(rt) => { + // Able to get the current runtime (standalone binary), spawn on the current runtime + rt.spawn(task) + } + Err(_) => { + // Unable to get the current runtime (dynamic plugins), spawn on the global runtime + TOKIO_RUNTIME.spawn(task) + } + } +} + #[derive(Serialize, Deserialize)] struct JSONSample { key: String, @@ -286,15 +305,15 @@ impl Plugin for RestPlugin { MAX_BLOCK_THREAD_NUM.store(conf.max_block_thread_num, Ordering::SeqCst); let task = run(runtime.clone(), conf.clone()); - let task = blockon_runtime(async { - timeout(Duration::from_millis(1), TOKIO_RUNTIME.spawn(task)).await - }); + let task = + blockon_runtime(async { timeout(Duration::from_millis(1), spawn_runtime(task)).await }); - // The spawn task (TOKIO_RUNTIME.spawn(task)) should not return immediately. The server should block inside. + // The spawn task (spawn_runtime(task)).await) should not return immediately. The server should block inside. // If it returns immediately (for example, address already in use), we can get the error inside Ok if let Ok(Ok(Err(e))) = task { bail!("REST server failed within 1ms: {e}") } + Ok(Box::new(RunningPlugin(conf))) } } From 6cd1f16b1e24256306a1a0e1c9826ba4890b687a Mon Sep 17 00:00:00 2001 From: Diogo Mendes Matsubara Date: Fri, 27 Sep 2024 09:56:02 +0200 Subject: [PATCH 18/26] chore: update release.yml for required labels --- .github/release.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/release.yml b/.github/release.yml index c4022026eb..23d56cfb1a 100644 --- a/.github/release.yml +++ b/.github/release.yml @@ -14,12 +14,22 @@ changelog: categories: + - title: Breaking changes 💥 + labels: + - breaking-change - title: New features 🎉 labels: - enhancement + - new feature - title: Bug fixes 🐞 labels: - bug + - title: Documentation 📝 + labels: + - documentation + - title: Dependencies 👷 + labels: + - dependencies - title: Other changes labels: - - "*" + - "*" \ No newline at end of file From 941f699f5f6811ead00f27a51068e302000c46e4 Mon Sep 17 00:00:00 2001 From: OlivierHecart Date: Fri, 27 Sep 2024 12:22:12 +0200 Subject: [PATCH 19/26] Fix peers start conditions bug (#1477) * connect_peers returns false if not connected to avoid wrong start_contitions notif * Document `connect` and `connect_peer` * Clarify comments * Update comments --------- Co-authored-by: Mahmoud Mazouz --- zenoh/src/net/routing/hat/p2p_peer/gossip.rs | 2 +- zenoh/src/net/runtime/orchestrator.rs | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/zenoh/src/net/routing/hat/p2p_peer/gossip.rs b/zenoh/src/net/routing/hat/p2p_peer/gossip.rs index b3216c6b8c..d69eb74cc7 100644 --- a/zenoh/src/net/routing/hat/p2p_peer/gossip.rs +++ b/zenoh/src/net/routing/hat/p2p_peer/gossip.rs @@ -427,8 +427,8 @@ impl Network { .get_transport_unicast(&zid) .await .is_none() + && runtime.connect_peer(&zid, &locators).await { - runtime.connect_peer(&zid, &locators).await; runtime .start_conditions() .terminate_peer_connector_zid(zid) diff --git a/zenoh/src/net/runtime/orchestrator.rs b/zenoh/src/net/runtime/orchestrator.rs index faadf5eb5c..c95f47a970 100644 --- a/zenoh/src/net/runtime/orchestrator.rs +++ b/zenoh/src/net/runtime/orchestrator.rs @@ -911,7 +911,7 @@ impl Runtime { } } - /// Returns `true` if a new Transport instance has been opened with `zid`. + /// Returns `true` if a new Transport instance is established with `zid` or had already been established. #[must_use] async fn connect(&self, zid: &ZenohIdProto, scouted_locators: &[Locator]) -> bool { if !self.insert_pending_connection(*zid).await { @@ -1024,7 +1024,8 @@ impl Runtime { } } - pub async fn connect_peer(&self, zid: &ZenohIdProto, locators: &[Locator]) { + /// Returns `true` if a new Transport instance is established with `zid` or had already been established. + pub async fn connect_peer(&self, zid: &ZenohIdProto, locators: &[Locator]) -> bool { let manager = self.manager(); if zid != &manager.zid() { let has_unicast = manager.get_transport_unicast(zid).await.is_some(); @@ -1042,10 +1043,13 @@ impl Runtime { if !has_unicast && !has_multicast { tracing::debug!("Try to connect to peer {} via any of {:?}", zid, locators); - let _ = self.connect(zid, locators).await; + self.connect(zid, locators).await } else { tracing::trace!("Already connected scouted peer: {}", zid); + true } + } else { + true } } @@ -1089,7 +1093,7 @@ impl Runtime { ) { Runtime::scout(ucast_sockets, what, addr, move |hello| async move { if !hello.locators.is_empty() { - self.connect_peer(&hello.zid, &hello.locators).await + self.connect_peer(&hello.zid, &hello.locators).await; } else { tracing::warn!("Received Hello with no locators: {:?}", hello); } From af3ac7bdca37682130f01bb982276a4b7223a4ef Mon Sep 17 00:00:00 2001 From: OlivierHecart Date: Fri, 27 Sep 2024 12:42:01 +0200 Subject: [PATCH 20/26] Remove closing from hat trait (#1469) * Remove closing from hat trait * Also move orchestrator session closing code to closed phase * Remove closing from TransportPeerEventHandler --- io/zenoh-transport/src/lib.rs | 4 - io/zenoh-transport/src/multicast/transport.rs | 5 - .../src/unicast/lowlatency/transport.rs | 5 - .../src/unicast/universal/transport.rs | 5 - io/zenoh-transport/tests/endpoints.rs | 1 - .../tests/multicast_compression.rs | 2 - .../tests/multicast_transport.rs | 2 - .../tests/transport_whitelist.rs | 1 - .../tests/unicast_authenticator.rs | 1 - .../tests/unicast_compression.rs | 2 - .../tests/unicast_concurrent.rs | 1 - .../tests/unicast_intermittent.rs | 1 - .../tests/unicast_priorities.rs | 2 - io/zenoh-transport/tests/unicast_shm.rs | 1 - .../tests/unicast_simultaneous.rs | 1 - io/zenoh-transport/tests/unicast_transport.rs | 2 - zenoh/src/api/admin.rs | 4 - zenoh/src/net/primitives/demux.rs | 17 +- zenoh/src/net/primitives/mux.rs | 8 +- zenoh/src/net/routing/dispatcher/face.rs | 28 ++- zenoh/src/net/routing/dispatcher/tables.rs | 24 +-- zenoh/src/net/routing/hat/client/mod.rs | 11 +- .../src/net/routing/hat/linkstate_peer/mod.rs | 45 ++--- zenoh/src/net/routing/hat/mod.rs | 9 +- zenoh/src/net/routing/hat/p2p_peer/mod.rs | 27 +-- zenoh/src/net/routing/hat/router/mod.rs | 173 ++++++++---------- zenoh/src/net/runtime/mod.rs | 22 +-- zenoh/src/net/runtime/orchestrator.rs | 2 +- zenoh/src/net/tests/tables.rs | 64 +++---- 29 files changed, 166 insertions(+), 304 deletions(-) diff --git a/io/zenoh-transport/src/lib.rs b/io/zenoh-transport/src/lib.rs index e603563b6e..f004b4d511 100644 --- a/io/zenoh-transport/src/lib.rs +++ b/io/zenoh-transport/src/lib.rs @@ -82,7 +82,6 @@ impl TransportEventHandler for DummyTransportEventHandler { /*************************************/ pub trait TransportMulticastEventHandler: Send + Sync { fn new_peer(&self, peer: TransportPeer) -> ZResult>; - fn closing(&self); fn closed(&self); fn as_any(&self) -> &dyn Any; } @@ -95,7 +94,6 @@ impl TransportMulticastEventHandler for DummyTransportMulticastEventHandler { fn new_peer(&self, _peer: TransportPeer) -> ZResult> { Ok(Arc::new(DummyTransportPeerEventHandler)) } - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { self @@ -121,7 +119,6 @@ pub trait TransportPeerEventHandler: Send + Sync { fn handle_message(&self, msg: NetworkMessage) -> ZResult<()>; fn new_link(&self, src: Link); fn del_link(&self, link: Link); - fn closing(&self); fn closed(&self); fn as_any(&self) -> &dyn Any; } @@ -137,7 +134,6 @@ impl TransportPeerEventHandler for DummyTransportPeerEventHandler { fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { diff --git a/io/zenoh-transport/src/multicast/transport.rs b/io/zenoh-transport/src/multicast/transport.rs index 36b9dbbea0..bcccaa9a85 100644 --- a/io/zenoh-transport/src/multicast/transport.rs +++ b/io/zenoh-transport/src/multicast/transport.rs @@ -178,11 +178,7 @@ impl TransportMulticastInner { pub(super) async fn delete(&self) -> ZResult<()> { tracing::debug!("Closing multicast transport on {:?}", self.locator); - // Notify the callback that we are going to close the transport let callback = zwrite!(self.callback).take(); - if let Some(cb) = callback.as_ref() { - cb.closing(); - } // Delete the transport on the manager let _ = self.manager.del_transport_multicast(&self.locator).await; @@ -441,7 +437,6 @@ impl TransportMulticastInner { // TODO(yuyuan): Unify the termination peer.token.cancel(); - peer.handler.closing(); drop(guard); peer.handler.closed(); } diff --git a/io/zenoh-transport/src/unicast/lowlatency/transport.rs b/io/zenoh-transport/src/unicast/lowlatency/transport.rs index c602dcf806..69d88af636 100644 --- a/io/zenoh-transport/src/unicast/lowlatency/transport.rs +++ b/io/zenoh-transport/src/unicast/lowlatency/transport.rs @@ -117,12 +117,7 @@ impl TransportUnicastLowlatency { // to avoid concurrent new_transport and closing/closed notifications let mut a_guard = self.get_alive().await; *a_guard = false; - - // Notify the callback that we are going to close the transport let callback = zwrite!(self.callback).take(); - if let Some(cb) = callback.as_ref() { - cb.closing(); - } // Delete the transport on the manager let _ = self.manager.del_transport_unicast(&self.config.zid).await; diff --git a/io/zenoh-transport/src/unicast/universal/transport.rs b/io/zenoh-transport/src/unicast/universal/transport.rs index f01a4a8f18..fdaadaea66 100644 --- a/io/zenoh-transport/src/unicast/universal/transport.rs +++ b/io/zenoh-transport/src/unicast/universal/transport.rs @@ -129,12 +129,7 @@ impl TransportUnicastUniversal { // to avoid concurrent new_transport and closing/closed notifications let mut a_guard = self.get_alive().await; *a_guard = false; - - // Notify the callback that we are going to close the transport let callback = zwrite!(self.callback).take(); - if let Some(cb) = callback.as_ref() { - cb.closing(); - } // Delete the transport on the manager let _ = self.manager.del_transport_unicast(&self.config.zid).await; diff --git a/io/zenoh-transport/tests/endpoints.rs b/io/zenoh-transport/tests/endpoints.rs index 7fe2f949ef..3ebb015981 100644 --- a/io/zenoh-transport/tests/endpoints.rs +++ b/io/zenoh-transport/tests/endpoints.rs @@ -62,7 +62,6 @@ impl TransportPeerEventHandler for SC { } fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { diff --git a/io/zenoh-transport/tests/multicast_compression.rs b/io/zenoh-transport/tests/multicast_compression.rs index 3b8715c0df..5e31aa6514 100644 --- a/io/zenoh-transport/tests/multicast_compression.rs +++ b/io/zenoh-transport/tests/multicast_compression.rs @@ -111,7 +111,6 @@ mod tests { count: self.count.clone(), })) } - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { @@ -127,7 +126,6 @@ mod tests { fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { diff --git a/io/zenoh-transport/tests/multicast_transport.rs b/io/zenoh-transport/tests/multicast_transport.rs index 664de47ffb..18c8468ecc 100644 --- a/io/zenoh-transport/tests/multicast_transport.rs +++ b/io/zenoh-transport/tests/multicast_transport.rs @@ -110,7 +110,6 @@ mod tests { count: self.count.clone(), })) } - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { @@ -126,7 +125,6 @@ mod tests { fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { diff --git a/io/zenoh-transport/tests/transport_whitelist.rs b/io/zenoh-transport/tests/transport_whitelist.rs index f8428b457d..66f5b58e3b 100644 --- a/io/zenoh-transport/tests/transport_whitelist.rs +++ b/io/zenoh-transport/tests/transport_whitelist.rs @@ -58,7 +58,6 @@ impl TransportPeerEventHandler for SCRouter { fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { diff --git a/io/zenoh-transport/tests/unicast_authenticator.rs b/io/zenoh-transport/tests/unicast_authenticator.rs index a9b22ad5bb..77f025717d 100644 --- a/io/zenoh-transport/tests/unicast_authenticator.rs +++ b/io/zenoh-transport/tests/unicast_authenticator.rs @@ -70,7 +70,6 @@ impl TransportPeerEventHandler for MHRouterAuthenticator { } fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { diff --git a/io/zenoh-transport/tests/unicast_compression.rs b/io/zenoh-transport/tests/unicast_compression.rs index 7b18983b4b..b7f34fbf7f 100644 --- a/io/zenoh-transport/tests/unicast_compression.rs +++ b/io/zenoh-transport/tests/unicast_compression.rs @@ -110,7 +110,6 @@ mod tests { fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { @@ -150,7 +149,6 @@ mod tests { fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { diff --git a/io/zenoh-transport/tests/unicast_concurrent.rs b/io/zenoh-transport/tests/unicast_concurrent.rs index ea124e1c05..410ac33955 100644 --- a/io/zenoh-transport/tests/unicast_concurrent.rs +++ b/io/zenoh-transport/tests/unicast_concurrent.rs @@ -98,7 +98,6 @@ impl TransportPeerEventHandler for MHPeer { fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { diff --git a/io/zenoh-transport/tests/unicast_intermittent.rs b/io/zenoh-transport/tests/unicast_intermittent.rs index 9564eeb865..3e2f8196f4 100644 --- a/io/zenoh-transport/tests/unicast_intermittent.rs +++ b/io/zenoh-transport/tests/unicast_intermittent.rs @@ -138,7 +138,6 @@ impl TransportPeerEventHandler for SCClient { fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { diff --git a/io/zenoh-transport/tests/unicast_priorities.rs b/io/zenoh-transport/tests/unicast_priorities.rs index d71cc845cc..87cf5b5e9e 100644 --- a/io/zenoh-transport/tests/unicast_priorities.rs +++ b/io/zenoh-transport/tests/unicast_priorities.rs @@ -133,7 +133,6 @@ impl TransportPeerEventHandler for SCRouter { fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { @@ -183,7 +182,6 @@ impl TransportPeerEventHandler for SCClient { fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { diff --git a/io/zenoh-transport/tests/unicast_shm.rs b/io/zenoh-transport/tests/unicast_shm.rs index db5f719665..b03771a164 100644 --- a/io/zenoh-transport/tests/unicast_shm.rs +++ b/io/zenoh-transport/tests/unicast_shm.rs @@ -141,7 +141,6 @@ mod tests { fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { diff --git a/io/zenoh-transport/tests/unicast_simultaneous.rs b/io/zenoh-transport/tests/unicast_simultaneous.rs index 248ff2ef53..97d43fc672 100644 --- a/io/zenoh-transport/tests/unicast_simultaneous.rs +++ b/io/zenoh-transport/tests/unicast_simultaneous.rs @@ -126,7 +126,6 @@ mod tests { fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { diff --git a/io/zenoh-transport/tests/unicast_transport.rs b/io/zenoh-transport/tests/unicast_transport.rs index 5a414d664d..e8f473b754 100644 --- a/io/zenoh-transport/tests/unicast_transport.rs +++ b/io/zenoh-transport/tests/unicast_transport.rs @@ -296,7 +296,6 @@ impl TransportPeerEventHandler for SCRouter { fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { @@ -336,7 +335,6 @@ impl TransportPeerEventHandler for SCClient { fn new_link(&self, _link: Link) {} fn del_link(&self, _link: Link) {} - fn closing(&self) {} fn closed(&self) {} fn as_any(&self) -> &dyn Any { diff --git a/zenoh/src/api/admin.rs b/zenoh/src/api/admin.rs index b96fc75dd2..f7bcc06419 100644 --- a/zenoh/src/api/admin.rs +++ b/zenoh/src/api/admin.rs @@ -187,8 +187,6 @@ impl TransportMulticastEventHandler for Handler { } } - fn closing(&self) {} - fn closed(&self) {} fn as_any(&self) -> &dyn std::any::Any { @@ -250,8 +248,6 @@ impl TransportPeerEventHandler for PeerHandler { ); } - fn closing(&self) {} - fn closed(&self) { let info = DataInfo { kind: SampleKind::Delete, diff --git a/zenoh/src/net/primitives/demux.rs b/zenoh/src/net/primitives/demux.rs index e4774aab4a..8735ed0f32 100644 --- a/zenoh/src/net/primitives/demux.rs +++ b/zenoh/src/net/primitives/demux.rs @@ -100,25 +100,10 @@ impl TransportPeerEventHandler for DeMux { fn del_link(&self, _link: Link) {} - fn closing(&self) { + fn closed(&self) { self.face.send_close(); - if let Some(transport) = self.transport.as_ref() { - let mut declares = vec![]; - let ctrl_lock = zlock!(self.face.tables.ctrl_lock); - let mut tables = zwrite!(self.face.tables.tables); - let _ = ctrl_lock.closing(&mut tables, &self.face.tables, transport, &mut |p, m| { - declares.push((p.clone(), m)) - }); - drop(tables); - drop(ctrl_lock); - for (p, m) in declares { - p.send_declare(m); - } - } } - fn closed(&self) {} - fn as_any(&self) -> &dyn Any { self } diff --git a/zenoh/src/net/primitives/mux.rs b/zenoh/src/net/primitives/mux.rs index 47067f231e..63376a6d63 100644 --- a/zenoh/src/net/primitives/mux.rs +++ b/zenoh/src/net/primitives/mux.rs @@ -200,9 +200,7 @@ impl Primitives for Mux { } } - fn send_close(&self) { - // self.handler.closing().await; - } + fn send_close(&self) {} } impl EPrimitives for Mux { @@ -530,9 +528,7 @@ impl Primitives for McastMux { } } - fn send_close(&self) { - // self.handler.closing().await; - } + fn send_close(&self) {} } impl EPrimitives for McastMux { diff --git a/zenoh/src/net/routing/dispatcher/face.rs b/zenoh/src/net/routing/dispatcher/face.rs index 654f5cf9c3..57d3dbb041 100644 --- a/zenoh/src/net/routing/dispatcher/face.rs +++ b/zenoh/src/net/routing/dispatcher/face.rs @@ -16,6 +16,7 @@ use std::{ collections::HashMap, fmt, sync::{Arc, Weak}, + time::Duration, }; use tokio_util::sync::CancellationToken; @@ -37,13 +38,16 @@ use super::{ super::router::*, interests::{declare_final, declare_interest, undeclare_interest, CurrentInterest}, resource::*, - tables::{self, TablesLock}, + tables::TablesLock, }; use crate::{ api::key_expr::KeyExpr, net::{ primitives::{McastMux, Mux, Primitives}, - routing::interceptor::{InterceptorTrait, InterceptorsChain}, + routing::{ + dispatcher::interests::finalize_pending_interests, + interceptor::{InterceptorTrait, InterceptorsChain}, + }, }, }; @@ -421,7 +425,25 @@ impl Primitives for Face { } fn send_close(&self) { - tables::close_face(&self.tables, &Arc::downgrade(&self.state)); + tracing::debug!("Close {}", self.state); + let mut state = self.state.clone(); + state.task_controller.terminate_all(Duration::from_secs(10)); + finalize_pending_queries(&self.tables, &mut state); + let mut declares = vec![]; + let ctrl_lock = zlock!(self.tables.ctrl_lock); + finalize_pending_interests(&self.tables, &mut state, &mut |p, m| { + declares.push((p.clone(), m)) + }); + ctrl_lock.close_face( + &self.tables, + &self.tables.clone(), + &mut state, + &mut |p, m| declares.push((p.clone(), m)), + ); + drop(ctrl_lock); + for (p, m) in declares { + p.send_declare(m); + } } } diff --git a/zenoh/src/net/routing/dispatcher/tables.rs b/zenoh/src/net/routing/dispatcher/tables.rs index 2c5cfffffb..4dd447360e 100644 --- a/zenoh/src/net/routing/dispatcher/tables.rs +++ b/zenoh/src/net/routing/dispatcher/tables.rs @@ -14,7 +14,7 @@ use std::{ any::Any, collections::HashMap, - sync::{Arc, Mutex, RwLock, Weak}, + sync::{Arc, Mutex, RwLock}, time::Duration, }; @@ -30,7 +30,6 @@ use zenoh_sync::get_mut_unchecked; use super::face::FaceState; pub use super::{pubsub::*, queries::*, resource::*}; use crate::net::routing::{ - dispatcher::interests::finalize_pending_interests, hat::{self, HatTrait}, interceptor::{interceptor_factories, InterceptorFactory}, }; @@ -169,27 +168,6 @@ impl Tables { } } -pub fn close_face(tables: &TablesLock, face: &Weak) { - match face.upgrade() { - Some(mut face) => { - tracing::debug!("Close {}", face); - face.task_controller.terminate_all(Duration::from_secs(10)); - finalize_pending_queries(tables, &mut face); - let mut declares = vec![]; - let ctrl_lock = zlock!(tables.ctrl_lock); - finalize_pending_interests(tables, &mut face, &mut |p, m| { - declares.push((p.clone(), m)) - }); - ctrl_lock.close_face(tables, &mut face, &mut |p, m| declares.push((p.clone(), m))); - drop(ctrl_lock); - for (p, m) in declares { - p.send_declare(m); - } - } - None => tracing::error!("Face already closed!"), - } -} - pub struct TablesLock { pub tables: RwLock, pub(crate) ctrl_lock: Mutex>, diff --git a/zenoh/src/net/routing/hat/client/mod.rs b/zenoh/src/net/routing/hat/client/mod.rs index 04ab653d9f..169b6ccbf1 100644 --- a/zenoh/src/net/routing/hat/client/mod.rs +++ b/zenoh/src/net/routing/hat/client/mod.rs @@ -130,6 +130,7 @@ impl HatBaseTrait for HatCode { fn close_face( &self, tables: &TablesLock, + _tables_ref: &Arc, face: &mut Arc, send_declare: &mut SendDeclare, ) { @@ -260,16 +261,6 @@ impl HatBaseTrait for HatCode { 0 } - fn closing( - &self, - _tables: &mut Tables, - _tables_ref: &Arc, - _transport: &TransportUnicast, - _send_declare: &mut SendDeclare, - ) -> ZResult<()> { - Ok(()) - } - #[inline] fn ingress_filter(&self, _tables: &Tables, _face: &FaceState, _expr: &mut RoutingExpr) -> bool { true diff --git a/zenoh/src/net/routing/hat/linkstate_peer/mod.rs b/zenoh/src/net/routing/hat/linkstate_peer/mod.rs index 167d9ae58b..a5d1608274 100644 --- a/zenoh/src/net/routing/hat/linkstate_peer/mod.rs +++ b/zenoh/src/net/routing/hat/linkstate_peer/mod.rs @@ -261,6 +261,7 @@ impl HatBaseTrait for HatCode { fn close_face( &self, tables: &TablesLock, + tables_ref: &Arc, face: &mut Arc, send_declare: &mut SendDeclare, ) { @@ -367,6 +368,21 @@ impl HatBaseTrait for HatCode { Resource::clean(&mut res); } wtables.faces.remove(&face.id); + + if face.whatami != WhatAmI::Client { + for (_, removed_node) in hat_mut!(wtables) + .linkstatepeers_net + .as_mut() + .unwrap() + .remove_link(&face.zid) + { + pubsub_remove_node(&mut wtables, &removed_node.zid, send_declare); + queries_remove_node(&mut wtables, &removed_node.zid, send_declare); + token_remove_node(&mut wtables, &removed_node.zid, send_declare); + } + + hat_mut!(wtables).schedule_compute_trees(tables_ref.clone()); + }; drop(wtables); } @@ -422,35 +438,6 @@ impl HatBaseTrait for HatCode { .get_local_context(routing_context, face_hat!(face).link_id) } - fn closing( - &self, - tables: &mut Tables, - tables_ref: &Arc, - transport: &TransportUnicast, - send_declare: &mut SendDeclare, - ) -> ZResult<()> { - match (transport.get_zid(), transport.get_whatami()) { - (Ok(zid), Ok(whatami)) => { - if whatami != WhatAmI::Client { - for (_, removed_node) in hat_mut!(tables) - .linkstatepeers_net - .as_mut() - .unwrap() - .remove_link(&zid) - { - pubsub_remove_node(tables, &removed_node.zid, send_declare); - queries_remove_node(tables, &removed_node.zid, send_declare); - token_remove_node(tables, &removed_node.zid, send_declare); - } - - hat_mut!(tables).schedule_compute_trees(tables_ref.clone()); - }; - } - (_, _) => tracing::error!("Closed transport in session closing!"), - } - Ok(()) - } - #[inline] fn ingress_filter(&self, _tables: &Tables, _face: &FaceState, _expr: &mut RoutingExpr) -> bool { true diff --git a/zenoh/src/net/routing/hat/mod.rs b/zenoh/src/net/routing/hat/mod.rs index 3e5a81d259..74850ea4c1 100644 --- a/zenoh/src/net/routing/hat/mod.rs +++ b/zenoh/src/net/routing/hat/mod.rs @@ -131,17 +131,10 @@ pub(crate) trait HatBaseTrait { fn info(&self, tables: &Tables, kind: WhatAmI) -> String; - fn closing( - &self, - tables: &mut Tables, - tables_ref: &Arc, - transport: &TransportUnicast, - send_declare: &mut SendDeclare, - ) -> ZResult<()>; - fn close_face( &self, tables: &TablesLock, + tables_ref: &Arc, face: &mut Arc, send_declare: &mut SendDeclare, ); diff --git a/zenoh/src/net/routing/hat/p2p_peer/mod.rs b/zenoh/src/net/routing/hat/p2p_peer/mod.rs index 4d20204c19..b4da9a241d 100644 --- a/zenoh/src/net/routing/hat/p2p_peer/mod.rs +++ b/zenoh/src/net/routing/hat/p2p_peer/mod.rs @@ -205,6 +205,7 @@ impl HatBaseTrait for HatCode { fn close_face( &self, tables: &TablesLock, + _tables_ref: &Arc, face: &mut Arc, send_declare: &mut SendDeclare, ) { @@ -311,6 +312,14 @@ impl HatBaseTrait for HatCode { Resource::clean(&mut res); } wtables.faces.remove(&face.id); + + if face.whatami != WhatAmI::Client { + hat_mut!(wtables) + .gossip + .as_mut() + .unwrap() + .remove_link(&face.zid); + }; drop(wtables); } @@ -354,24 +363,6 @@ impl HatBaseTrait for HatCode { 0 } - fn closing( - &self, - tables: &mut Tables, - _tables_ref: &Arc, - transport: &TransportUnicast, - _send_declare: &mut SendDeclare, - ) -> ZResult<()> { - match (transport.get_zid(), transport.get_whatami()) { - (Ok(zid), Ok(whatami)) => { - if whatami != WhatAmI::Client { - hat_mut!(tables).gossip.as_mut().unwrap().remove_link(&zid); - }; - } - (_, _) => tracing::error!("Closed transport in session closing!"), - } - Ok(()) - } - #[inline] fn ingress_filter(&self, _tables: &Tables, _face: &FaceState, _expr: &mut RoutingExpr) -> bool { true diff --git a/zenoh/src/net/routing/hat/router/mod.rs b/zenoh/src/net/routing/hat/router/mod.rs index c7ea567fb4..5e1906920f 100644 --- a/zenoh/src/net/routing/hat/router/mod.rs +++ b/zenoh/src/net/routing/hat/router/mod.rs @@ -430,6 +430,7 @@ impl HatBaseTrait for HatCode { fn close_face( &self, tables: &TablesLock, + tables_ref: &Arc, face: &mut Arc, send_declare: &mut SendDeclare, ) { @@ -536,6 +537,84 @@ impl HatBaseTrait for HatCode { Resource::clean(&mut res); } wtables.faces.remove(&face.id); + + match face.whatami { + WhatAmI::Router => { + for (_, removed_node) in hat_mut!(wtables) + .routers_net + .as_mut() + .unwrap() + .remove_link(&face.zid) + { + pubsub_remove_node( + &mut wtables, + &removed_node.zid, + WhatAmI::Router, + send_declare, + ); + queries_remove_node( + &mut wtables, + &removed_node.zid, + WhatAmI::Router, + send_declare, + ); + token_remove_node( + &mut wtables, + &removed_node.zid, + WhatAmI::Router, + send_declare, + ); + } + + if hat!(wtables).full_net(WhatAmI::Peer) { + hat_mut!(wtables).shared_nodes = shared_nodes( + hat!(wtables).routers_net.as_ref().unwrap(), + hat!(wtables).linkstatepeers_net.as_ref().unwrap(), + ); + } + + hat_mut!(wtables).schedule_compute_trees(tables_ref.clone(), WhatAmI::Router); + } + WhatAmI::Peer => { + if hat!(wtables).full_net(WhatAmI::Peer) { + for (_, removed_node) in hat_mut!(wtables) + .linkstatepeers_net + .as_mut() + .unwrap() + .remove_link(&face.zid) + { + pubsub_remove_node( + &mut wtables, + &removed_node.zid, + WhatAmI::Peer, + send_declare, + ); + queries_remove_node( + &mut wtables, + &removed_node.zid, + WhatAmI::Peer, + send_declare, + ); + token_remove_node( + &mut wtables, + &removed_node.zid, + WhatAmI::Peer, + send_declare, + ); + } + + hat_mut!(wtables).shared_nodes = shared_nodes( + hat!(wtables).routers_net.as_ref().unwrap(), + hat!(wtables).linkstatepeers_net.as_ref().unwrap(), + ); + + hat_mut!(wtables).schedule_compute_trees(tables_ref.clone(), WhatAmI::Peer); + } else if let Some(net) = hat_mut!(wtables).linkstatepeers_net.as_mut() { + net.remove_link(&face.zid); + } + } + _ => (), + }; drop(wtables); } @@ -689,100 +768,6 @@ impl HatBaseTrait for HatCode { } } - fn closing( - &self, - tables: &mut Tables, - tables_ref: &Arc, - transport: &TransportUnicast, - send_declare: &mut SendDeclare, - ) -> ZResult<()> { - match (transport.get_zid(), transport.get_whatami()) { - (Ok(zid), Ok(whatami)) => { - match whatami { - WhatAmI::Router => { - for (_, removed_node) in hat_mut!(tables) - .routers_net - .as_mut() - .unwrap() - .remove_link(&zid) - { - pubsub_remove_node( - tables, - &removed_node.zid, - WhatAmI::Router, - send_declare, - ); - queries_remove_node( - tables, - &removed_node.zid, - WhatAmI::Router, - send_declare, - ); - token_remove_node( - tables, - &removed_node.zid, - WhatAmI::Router, - send_declare, - ); - } - - if hat!(tables).full_net(WhatAmI::Peer) { - hat_mut!(tables).shared_nodes = shared_nodes( - hat!(tables).routers_net.as_ref().unwrap(), - hat!(tables).linkstatepeers_net.as_ref().unwrap(), - ); - } - - hat_mut!(tables) - .schedule_compute_trees(tables_ref.clone(), WhatAmI::Router); - } - WhatAmI::Peer => { - if hat!(tables).full_net(WhatAmI::Peer) { - for (_, removed_node) in hat_mut!(tables) - .linkstatepeers_net - .as_mut() - .unwrap() - .remove_link(&zid) - { - pubsub_remove_node( - tables, - &removed_node.zid, - WhatAmI::Peer, - send_declare, - ); - queries_remove_node( - tables, - &removed_node.zid, - WhatAmI::Peer, - send_declare, - ); - token_remove_node( - tables, - &removed_node.zid, - WhatAmI::Peer, - send_declare, - ); - } - - hat_mut!(tables).shared_nodes = shared_nodes( - hat!(tables).routers_net.as_ref().unwrap(), - hat!(tables).linkstatepeers_net.as_ref().unwrap(), - ); - - hat_mut!(tables) - .schedule_compute_trees(tables_ref.clone(), WhatAmI::Peer); - } else if let Some(net) = hat_mut!(tables).linkstatepeers_net.as_mut() { - net.remove_link(&zid); - } - } - _ => (), - }; - } - (_, _) => tracing::error!("Closed transport in session closing!"), - } - Ok(()) - } - #[inline] fn ingress_filter(&self, tables: &Tables, face: &FaceState, expr: &mut RoutingExpr) -> bool { face.whatami != WhatAmI::Peer diff --git a/zenoh/src/net/runtime/mod.rs b/zenoh/src/net/runtime/mod.rs index aca6734d4d..301698eea6 100644 --- a/zenoh/src/net/runtime/mod.rs +++ b/zenoh/src/net/runtime/mod.rs @@ -457,16 +457,9 @@ impl TransportPeerEventHandler for RuntimeSession { } } - fn closing(&self) { - self.main_handler.closing(); - Runtime::closing_session(self); - for handler in &self.slave_handlers { - handler.closing(); - } - } - fn closed(&self) { self.main_handler.closed(); + Runtime::closed_session(self); for handler in &self.slave_handlers { handler.closed(); } @@ -500,12 +493,6 @@ impl TransportMulticastEventHandler for RuntimeMulticastGroup { })) } - fn closing(&self) { - for handler in &self.slave_handlers { - handler.closed(); - } - } - fn closed(&self) { for handler in &self.slave_handlers { handler.closed(); @@ -541,13 +528,6 @@ impl TransportPeerEventHandler for RuntimeMulticastSession { } } - fn closing(&self) { - self.main_handler.closing(); - for handler in &self.slave_handlers { - handler.closing(); - } - } - fn closed(&self) { self.main_handler.closed(); for handler in &self.slave_handlers { diff --git a/zenoh/src/net/runtime/orchestrator.rs b/zenoh/src/net/runtime/orchestrator.rs index c95f47a970..e09a4e812b 100644 --- a/zenoh/src/net/runtime/orchestrator.rs +++ b/zenoh/src/net/runtime/orchestrator.rs @@ -1184,7 +1184,7 @@ impl Runtime { } } - pub(super) fn closing_session(session: &RuntimeSession) { + pub(super) fn closed_session(session: &RuntimeSession) { if session.runtime.is_closed() { return; } diff --git a/zenoh/src/net/tests/tables.rs b/zenoh/src/net/tests/tables.rs index 23c0d9c053..8953397b8d 100644 --- a/zenoh/src/net/tests/tables.rs +++ b/zenoh/src/net/tests/tables.rs @@ -32,10 +32,7 @@ use zenoh_protocol::{ use crate::net::{ primitives::{DummyPrimitives, EPrimitives, Primitives}, routing::{ - dispatcher::{ - pubsub::SubscriberInfo, - tables::{self, Tables}, - }, + dispatcher::{pubsub::SubscriberInfo, tables::Tables}, router::*, RoutingContext, }, @@ -189,15 +186,14 @@ fn multisub_test() { let tables = router.tables.clone(); let primitives = Arc::new(DummyPrimitives {}); - let face0 = Arc::downgrade(&router.new_primitives(primitives).state); - assert!(face0.upgrade().is_some()); + let face0 = &router.new_primitives(primitives); // -------------- let sub_info = SubscriberInfo; declare_subscription( zlock!(tables.ctrl_lock).as_ref(), &tables, - &mut face0.upgrade().unwrap(), + &mut face0.state.clone(), 0, &"sub".into(), &sub_info, @@ -213,7 +209,7 @@ fn multisub_test() { declare_subscription( zlock!(tables.ctrl_lock).as_ref(), &tables, - &mut face0.upgrade().unwrap(), + &mut face0.state.clone(), 1, &"sub".into(), &sub_info, @@ -225,7 +221,7 @@ fn multisub_test() { undeclare_subscription( zlock!(tables.ctrl_lock).as_ref(), &tables, - &mut face0.upgrade().unwrap(), + &mut face0.state.clone(), 0, &WireExpr::empty(), NodeId::default(), @@ -236,7 +232,7 @@ fn multisub_test() { undeclare_subscription( zlock!(tables.ctrl_lock).as_ref(), &tables, - &mut face0.upgrade().unwrap(), + &mut face0.state.clone(), 1, &WireExpr::empty(), NodeId::default(), @@ -244,7 +240,7 @@ fn multisub_test() { ); assert!(res.upgrade().is_none()); - tables::close_face(&tables, &face0); + face0.send_close(); } #[tokio::test(flavor = "multi_thread", worker_threads = 1)] @@ -260,11 +256,10 @@ async fn clean_test() { let tables = router.tables.clone(); let primitives = Arc::new(DummyPrimitives {}); - let face0 = Arc::downgrade(&router.new_primitives(primitives).state); - assert!(face0.upgrade().is_some()); + let face0 = &router.new_primitives(primitives); // -------------- - register_expr(&tables, &mut face0.upgrade().unwrap(), 1, &"todrop1".into()); + register_expr(&tables, &mut face0.state.clone(), 1, &"todrop1".into()); let optres1 = Resource::get_resource(zread!(tables.tables)._get_root(), "todrop1") .map(|res| Arc::downgrade(&res)); assert!(optres1.is_some()); @@ -273,7 +268,7 @@ async fn clean_test() { register_expr( &tables, - &mut face0.upgrade().unwrap(), + &mut face0.state.clone(), 2, &"todrop1/todrop11".into(), ); @@ -283,30 +278,30 @@ async fn clean_test() { let res2 = optres2.unwrap(); assert!(res2.upgrade().is_some()); - register_expr(&tables, &mut face0.upgrade().unwrap(), 3, &"**".into()); + register_expr(&tables, &mut face0.state.clone(), 3, &"**".into()); let optres3 = Resource::get_resource(zread!(tables.tables)._get_root(), "**") .map(|res| Arc::downgrade(&res)); assert!(optres3.is_some()); let res3 = optres3.unwrap(); assert!(res3.upgrade().is_some()); - unregister_expr(&tables, &mut face0.upgrade().unwrap(), 1); + unregister_expr(&tables, &mut face0.state.clone(), 1); assert!(res1.upgrade().is_some()); assert!(res2.upgrade().is_some()); assert!(res3.upgrade().is_some()); - unregister_expr(&tables, &mut face0.upgrade().unwrap(), 2); + unregister_expr(&tables, &mut face0.state.clone(), 2); assert!(res1.upgrade().is_none()); assert!(res2.upgrade().is_none()); assert!(res3.upgrade().is_some()); - unregister_expr(&tables, &mut face0.upgrade().unwrap(), 3); + unregister_expr(&tables, &mut face0.state.clone(), 3); assert!(res1.upgrade().is_none()); assert!(res2.upgrade().is_none()); assert!(res3.upgrade().is_none()); // -------------- - register_expr(&tables, &mut face0.upgrade().unwrap(), 1, &"todrop1".into()); + register_expr(&tables, &mut face0.state.clone(), 1, &"todrop1".into()); let optres1 = Resource::get_resource(zread!(tables.tables)._get_root(), "todrop1") .map(|res| Arc::downgrade(&res)); assert!(optres1.is_some()); @@ -318,7 +313,7 @@ async fn clean_test() { declare_subscription( zlock!(tables.ctrl_lock).as_ref(), &tables, - &mut face0.upgrade().unwrap(), + &mut face0.state.clone(), 0, &"todrop1/todrop11".into(), &sub_info, @@ -334,7 +329,7 @@ async fn clean_test() { declare_subscription( zlock!(tables.ctrl_lock).as_ref(), &tables, - &mut face0.upgrade().unwrap(), + &mut face0.state.clone(), 1, &WireExpr::from(1).with_suffix("/todrop12"), &sub_info, @@ -351,7 +346,7 @@ async fn clean_test() { undeclare_subscription( zlock!(tables.ctrl_lock).as_ref(), &tables, - &mut face0.upgrade().unwrap(), + &mut face0.state.clone(), 1, &WireExpr::empty(), NodeId::default(), @@ -367,7 +362,7 @@ async fn clean_test() { undeclare_subscription( zlock!(tables.ctrl_lock).as_ref(), &tables, - &mut face0.upgrade().unwrap(), + &mut face0.state.clone(), 0, &WireExpr::empty(), NodeId::default(), @@ -377,17 +372,17 @@ async fn clean_test() { assert!(res2.upgrade().is_none()); assert!(res3.upgrade().is_none()); - unregister_expr(&tables, &mut face0.upgrade().unwrap(), 1); + unregister_expr(&tables, &mut face0.state.clone(), 1); assert!(res1.upgrade().is_none()); assert!(res2.upgrade().is_none()); assert!(res3.upgrade().is_none()); // -------------- - register_expr(&tables, &mut face0.upgrade().unwrap(), 2, &"todrop3".into()); + register_expr(&tables, &mut face0.state.clone(), 2, &"todrop3".into()); declare_subscription( zlock!(tables.ctrl_lock).as_ref(), &tables, - &mut face0.upgrade().unwrap(), + &mut face0.state.clone(), 2, &"todrop3".into(), &sub_info, @@ -403,7 +398,7 @@ async fn clean_test() { undeclare_subscription( zlock!(tables.ctrl_lock).as_ref(), &tables, - &mut face0.upgrade().unwrap(), + &mut face0.state.clone(), 2, &WireExpr::empty(), NodeId::default(), @@ -411,16 +406,16 @@ async fn clean_test() { ); assert!(res1.upgrade().is_some()); - unregister_expr(&tables, &mut face0.upgrade().unwrap(), 2); + unregister_expr(&tables, &mut face0.state.clone(), 2); assert!(res1.upgrade().is_none()); // -------------- - register_expr(&tables, &mut face0.upgrade().unwrap(), 3, &"todrop4".into()); - register_expr(&tables, &mut face0.upgrade().unwrap(), 4, &"todrop5".into()); + register_expr(&tables, &mut face0.state.clone(), 3, &"todrop4".into()); + register_expr(&tables, &mut face0.state.clone(), 4, &"todrop5".into()); declare_subscription( zlock!(tables.ctrl_lock).as_ref(), &tables, - &mut face0.upgrade().unwrap(), + &mut face0.state.clone(), 3, &"todrop5".into(), &sub_info, @@ -430,7 +425,7 @@ async fn clean_test() { declare_subscription( zlock!(tables.ctrl_lock).as_ref(), &tables, - &mut face0.upgrade().unwrap(), + &mut face0.state.clone(), 4, &"todrop6".into(), &sub_info, @@ -455,8 +450,7 @@ async fn clean_test() { assert!(res2.upgrade().is_some()); assert!(res3.upgrade().is_some()); - tables::close_face(&tables, &face0); - assert!(face0.upgrade().is_none()); + face0.send_close(); assert!(res1.upgrade().is_none()); assert!(res2.upgrade().is_none()); assert!(res3.upgrade().is_none()); From 3964d5a4d211a059e8942664643815d615c44ba8 Mon Sep 17 00:00:00 2001 From: OlivierHecart Date: Fri, 27 Sep 2024 12:47:34 +0200 Subject: [PATCH 21/26] Send Declare and OAM messages with Control priority (#1476) --- commons/zenoh-protocol/src/network/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commons/zenoh-protocol/src/network/mod.rs b/commons/zenoh-protocol/src/network/mod.rs index 336f952f3d..ed23b0337a 100644 --- a/commons/zenoh-protocol/src/network/mod.rs +++ b/commons/zenoh-protocol/src/network/mod.rs @@ -251,13 +251,13 @@ pub mod ext { pub const DEFAULT: Self = Self::new(Priority::DEFAULT, CongestionControl::DEFAULT, false); - pub const DECLARE: Self = Self::new(Priority::DEFAULT, CongestionControl::Block, false); + pub const DECLARE: Self = Self::new(Priority::Control, CongestionControl::Block, false); pub const PUSH: Self = Self::new(Priority::DEFAULT, CongestionControl::Drop, false); pub const REQUEST: Self = Self::new(Priority::DEFAULT, CongestionControl::Block, false); pub const RESPONSE: Self = Self::new(Priority::DEFAULT, CongestionControl::Block, false); pub const RESPONSE_FINAL: Self = Self::new(Priority::DEFAULT, CongestionControl::Block, false); - pub const OAM: Self = Self::new(Priority::DEFAULT, CongestionControl::Block, false); + pub const OAM: Self = Self::new(Priority::Control, CongestionControl::Block, false); pub const fn new( priority: Priority, From e79c80063f3d6b176d9bbef5f6d0cad9ead27b01 Mon Sep 17 00:00:00 2001 From: OlivierHecart Date: Fri, 27 Sep 2024 13:49:14 +0200 Subject: [PATCH 22/26] Avoid waiting for dropping deadline if wait time is null (#1480) --- io/zenoh-transport/src/common/pipeline.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/io/zenoh-transport/src/common/pipeline.rs b/io/zenoh-transport/src/common/pipeline.rs index 8a712d56db..16ed92bca3 100644 --- a/io/zenoh-transport/src/common/pipeline.rs +++ b/io/zenoh-transport/src/common/pipeline.rs @@ -141,7 +141,7 @@ impl StageIn { &mut self, msg: &mut NetworkMessage, priority: Priority, - deadline_before_drop: Option, + deadline_before_drop: Option>, ) -> bool { // Lock the current serialization batch. let mut c_guard = self.mutex.current(); @@ -163,7 +163,7 @@ impl StageIn { Some(deadline) if !$fragment => { // We are in the congestion scenario and message is droppable // Wait for an available batch until deadline - if !self.s_ref.wait_deadline(deadline) { + if !deadline.map_or(false, |deadline| self.s_ref.wait_deadline(deadline)) { // Still no available batch. // Restore the sequence number and drop the message $restore_sn; @@ -628,7 +628,11 @@ impl TransmissionPipelineProducer { }; // If message is droppable, compute a deadline after which the sample could be dropped let deadline_before_drop = if msg.is_droppable() { - Some(Instant::now() + self.wait_before_drop) + if self.wait_before_drop.is_zero() { + Some(None) + } else { + Some(Some(Instant::now() + self.wait_before_drop)) + } } else { None }; From 3bf271b64891016b1867961b2750594890ff6276 Mon Sep 17 00:00:00 2001 From: Michael Ilyin Date: Sun, 29 Sep 2024 16:43:41 +0200 Subject: [PATCH 23/26] serializarion for zenoh::time::Timestamp --- examples/examples/z_bytes.rs | 11 +++++++++-- zenoh-ext/src/serialization.rs | 31 ++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/examples/examples/z_bytes.rs b/examples/examples/z_bytes.rs index c75932ef36..2cc6d21789 100644 --- a/examples/examples/z_bytes.rs +++ b/examples/examples/z_bytes.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::{collections::HashMap, time::{SystemTime, UNIX_EPOCH}}; // // Copyright (c) 2024 ZettaScale Technology @@ -13,7 +13,7 @@ use std::collections::HashMap; // Contributors: // ZettaScale Zenoh Team, // -use zenoh::bytes::ZBytes; +use zenoh::{bytes::ZBytes, time::{Timestamp, TimestampId}}; fn main() { // Raw bytes @@ -100,6 +100,13 @@ fn main() { let output: (f64, String) = z_deserialize(&payload).unwrap(); assert_eq!(input, output); + // Zenoh types + let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().into(); + let input = Timestamp::new(now, TimestampId::rand()); + let payload = z_serialize(&input); + let output: Timestamp = z_deserialize(&payload).unwrap(); + assert_eq!(input, output); + // Look at Serialize/Deserialize documentation for the exhaustive // list of provided implementations } diff --git a/zenoh-ext/src/serialization.rs b/zenoh-ext/src/serialization.rs index a368bc1684..874b04cc34 100644 --- a/zenoh-ext/src/serialization.rs +++ b/zenoh-ext/src/serialization.rs @@ -9,7 +9,7 @@ use std::{ ptr, }; -use zenoh::bytes::{ZBytes, ZBytesReader, ZBytesWriter}; +use zenoh::{bytes::{ZBytes, ZBytesReader, ZBytesWriter}, time::{Timestamp, TimestampId, NTP64}}; #[derive(Debug)] pub struct ZDeserializeError; @@ -89,6 +89,12 @@ impl ZSerializer { } } +impl Default for ZSerializer { + fn default() -> Self { + Self::new() + } +} + impl From for ZBytes { fn from(value: ZSerializer) -> Self { value.finish() @@ -460,3 +466,26 @@ macro_rules! impl_varint { )*}; } impl_varint!(u8: i8, u16: i16, u32: i32, u64: i64, usize: isize); + +// +// Serialization/deseialization for zenoh types +// + +impl Serialize for zenoh::time::Timestamp { + fn serialize(&self, serializer: &mut ZSerializer) { + let time = self.get_time().as_u64(); + let id = self.get_id().to_le_bytes(); + time.serialize(serializer); + id.serialize(serializer); + } +} + +impl Deserialize for zenoh::time::Timestamp { + fn deserialize(deserializer: &mut ZDeserializer) -> Result { + let time = u64::deserialize(deserializer)?; + let time = NTP64(time); + let id = Vec::::deserialize(deserializer)?; + let id = id.as_slice().try_into().map_err(|_| ZDeserializeError)?; + Ok(Timestamp::new(time, id)) + } +} \ No newline at end of file From 795843ffb1f20a7fe22f8d4ebac3214915ff61dd Mon Sep 17 00:00:00 2001 From: Michael Ilyin Date: Sun, 29 Sep 2024 21:15:52 +0200 Subject: [PATCH 24/26] intermediate types serialization added --- zenoh-ext/src/serialization.rs | 38 +++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/zenoh-ext/src/serialization.rs b/zenoh-ext/src/serialization.rs index 874b04cc34..ee3066b0f9 100644 --- a/zenoh-ext/src/serialization.rs +++ b/zenoh-ext/src/serialization.rs @@ -471,21 +471,45 @@ impl_varint!(u8: i8, u16: i16, u32: i32, u64: i64, usize: isize); // Serialization/deseialization for zenoh types // -impl Serialize for zenoh::time::Timestamp { +impl Serialize for zenoh::time::NTP64 { fn serialize(&self, serializer: &mut ZSerializer) { - let time = self.get_time().as_u64(); - let id = self.get_id().to_le_bytes(); + let time = self.as_u64(); time.serialize(serializer); - id.serialize(serializer); } } -impl Deserialize for zenoh::time::Timestamp { +impl Deserialize for zenoh::time::NTP64 { fn deserialize(deserializer: &mut ZDeserializer) -> Result { let time = u64::deserialize(deserializer)?; - let time = NTP64(time); + Ok(NTP64(time)) + } +} + +impl Serialize for zenoh::time::TimestampId { + fn serialize(&self, serializer: &mut ZSerializer) { + self.to_le_bytes().serialize(serializer); + } +} + +impl Deserialize for zenoh::time::TimestampId { + fn deserialize(deserializer: &mut ZDeserializer) -> Result { let id = Vec::::deserialize(deserializer)?; let id = id.as_slice().try_into().map_err(|_| ZDeserializeError)?; + Ok(id) + } +} + +impl Serialize for zenoh::time::Timestamp { + fn serialize(&self, serializer: &mut ZSerializer) { + self.get_time().serialize(serializer); + self.get_id().serialize(serializer); + } +} + +impl Deserialize for zenoh::time::Timestamp { + fn deserialize(deserializer: &mut ZDeserializer) -> Result { + let time = NTP64::deserialize(deserializer)?; + let id = TimestampId::deserialize(deserializer)?; Ok(Timestamp::new(time, id)) } -} \ No newline at end of file +} From ba934f80cf040ea06583689cbe8cdf094b015774 Mon Sep 17 00:00:00 2001 From: Michael Ilyin Date: Sun, 29 Sep 2024 22:00:08 +0200 Subject: [PATCH 25/26] encoding serialization added --- examples/examples/z_bytes.rs | 14 ++++++++++++-- zenoh-ext/src/serialization.rs | 29 +++++++++++++++++++++-------- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/examples/examples/z_bytes.rs b/examples/examples/z_bytes.rs index 2cc6d21789..27b40bc6e9 100644 --- a/examples/examples/z_bytes.rs +++ b/examples/examples/z_bytes.rs @@ -1,4 +1,3 @@ -use std::{collections::HashMap, time::{SystemTime, UNIX_EPOCH}}; // // Copyright (c) 2024 ZettaScale Technology @@ -13,7 +12,8 @@ use std::{collections::HashMap, time::{SystemTime, UNIX_EPOCH}}; // Contributors: // ZettaScale Zenoh Team, // -use zenoh::{bytes::ZBytes, time::{Timestamp, TimestampId}}; +use zenoh::{bytes::{Encoding, ZBytes}, time::{Timestamp, TimestampId}}; +use std::{collections::HashMap, str::FromStr, time::{SystemTime, UNIX_EPOCH}}; fn main() { // Raw bytes @@ -107,6 +107,16 @@ fn main() { let output: Timestamp = z_deserialize(&payload).unwrap(); assert_eq!(input, output); + let input = Encoding::TEXT_JSON; + let payload = z_serialize(&input); + let output: Encoding = z_deserialize(&payload).unwrap(); + assert_eq!(input, output); + + let input = Encoding::from_str("text/plain;foobar").unwrap(); + let payload = z_serialize(&input); + let output: Encoding = z_deserialize(&payload).unwrap(); + assert_eq!(input, output); + // Look at Serialize/Deserialize documentation for the exhaustive // list of provided implementations } diff --git a/zenoh-ext/src/serialization.rs b/zenoh-ext/src/serialization.rs index ee3066b0f9..f0dc67283c 100644 --- a/zenoh-ext/src/serialization.rs +++ b/zenoh-ext/src/serialization.rs @@ -6,10 +6,10 @@ use std::{ io::{Read, Write}, marker::PhantomData, ops::{Deref, DerefMut}, - ptr, + ptr, str::FromStr, }; -use zenoh::{bytes::{ZBytes, ZBytesReader, ZBytesWriter}, time::{Timestamp, TimestampId, NTP64}}; +use zenoh::{bytes::{Encoding, ZBytes, ZBytesReader, ZBytesWriter}, time::{Timestamp, TimestampId, NTP64}}; #[derive(Debug)] pub struct ZDeserializeError; @@ -471,27 +471,27 @@ impl_varint!(u8: i8, u16: i16, u32: i32, u64: i64, usize: isize); // Serialization/deseialization for zenoh types // -impl Serialize for zenoh::time::NTP64 { +impl Serialize for NTP64 { fn serialize(&self, serializer: &mut ZSerializer) { let time = self.as_u64(); time.serialize(serializer); } } -impl Deserialize for zenoh::time::NTP64 { +impl Deserialize for NTP64 { fn deserialize(deserializer: &mut ZDeserializer) -> Result { let time = u64::deserialize(deserializer)?; Ok(NTP64(time)) } } -impl Serialize for zenoh::time::TimestampId { +impl Serialize for TimestampId { fn serialize(&self, serializer: &mut ZSerializer) { self.to_le_bytes().serialize(serializer); } } -impl Deserialize for zenoh::time::TimestampId { +impl Deserialize for TimestampId { fn deserialize(deserializer: &mut ZDeserializer) -> Result { let id = Vec::::deserialize(deserializer)?; let id = id.as_slice().try_into().map_err(|_| ZDeserializeError)?; @@ -499,17 +499,30 @@ impl Deserialize for zenoh::time::TimestampId { } } -impl Serialize for zenoh::time::Timestamp { +impl Serialize for Timestamp { fn serialize(&self, serializer: &mut ZSerializer) { self.get_time().serialize(serializer); self.get_id().serialize(serializer); } } -impl Deserialize for zenoh::time::Timestamp { +impl Deserialize for Timestamp { fn deserialize(deserializer: &mut ZDeserializer) -> Result { let time = NTP64::deserialize(deserializer)?; let id = TimestampId::deserialize(deserializer)?; Ok(Timestamp::new(time, id)) } } + +impl Serialize for Encoding { + fn serialize(&self, serializer: &mut ZSerializer) { + self.to_string().serialize(serializer); + } +} + +impl Deserialize for zenoh::bytes::Encoding { + fn deserialize(deserializer: &mut ZDeserializer) -> Result { + let encoding = String::deserialize(deserializer)?; + Encoding::from_str(&encoding).map_err(|_| ZDeserializeError) + } +} From cf37dd653c9b25ba485b107976604898653b3b6b Mon Sep 17 00:00:00 2001 From: Michael Ilyin Date: Sun, 29 Sep 2024 22:01:05 +0200 Subject: [PATCH 26/26] cargo fmt --- examples/examples/z_bytes.rs | 13 ++++++++++--- zenoh-ext/src/serialization.rs | 10 +++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/examples/examples/z_bytes.rs b/examples/examples/z_bytes.rs index 27b40bc6e9..808a8a2ec2 100644 --- a/examples/examples/z_bytes.rs +++ b/examples/examples/z_bytes.rs @@ -1,4 +1,3 @@ - // // Copyright (c) 2024 ZettaScale Technology // @@ -12,8 +11,16 @@ // Contributors: // ZettaScale Zenoh Team, // -use zenoh::{bytes::{Encoding, ZBytes}, time::{Timestamp, TimestampId}}; -use std::{collections::HashMap, str::FromStr, time::{SystemTime, UNIX_EPOCH}}; +use std::{ + collections::HashMap, + str::FromStr, + time::{SystemTime, UNIX_EPOCH}, +}; + +use zenoh::{ + bytes::{Encoding, ZBytes}, + time::{Timestamp, TimestampId}, +}; fn main() { // Raw bytes diff --git a/zenoh-ext/src/serialization.rs b/zenoh-ext/src/serialization.rs index f0dc67283c..04b30a5976 100644 --- a/zenoh-ext/src/serialization.rs +++ b/zenoh-ext/src/serialization.rs @@ -6,10 +6,14 @@ use std::{ io::{Read, Write}, marker::PhantomData, ops::{Deref, DerefMut}, - ptr, str::FromStr, + ptr, + str::FromStr, }; -use zenoh::{bytes::{Encoding, ZBytes, ZBytesReader, ZBytesWriter}, time::{Timestamp, TimestampId, NTP64}}; +use zenoh::{ + bytes::{Encoding, ZBytes, ZBytesReader, ZBytesWriter}, + time::{Timestamp, TimestampId, NTP64}, +}; #[derive(Debug)] pub struct ZDeserializeError; @@ -502,7 +506,7 @@ impl Deserialize for TimestampId { impl Serialize for Timestamp { fn serialize(&self, serializer: &mut ZSerializer) { self.get_time().serialize(serializer); - self.get_id().serialize(serializer); + self.get_id().serialize(serializer); } }