From 152455e897fdf78dae567315d579dd2d39ea0303 Mon Sep 17 00:00:00 2001 From: Alejandro Miguez Date: Tue, 12 Aug 2025 12:31:00 -0300 Subject: [PATCH 01/14] Implement partitioned grain registry for horizontal scaling --- CHANGELOG.md | 40 + config/sys.config.src | 3 +- src/erleans.app.src | 3 +- src/erleans_app.erl | 2 +- src/erleans_pm.bkp | 1461 ++++++++++++++++++++++++++++ src/erleans_pm.erl | 1414 +++------------------------ src/erleans_registry_partition.erl | 975 +++++++++++++++++++ src/erleans_registry_sup.erl | 87 ++ src/erleans_sup.erl | 10 +- test/dist_lifecycle_SUITE.erl | 2 + test/partition_logic_test.erl | 158 +++ test/registry_partition_SUITE.erl | 385 ++++++++ test/sys.config | 4 +- 13 files changed, 3245 insertions(+), 1299 deletions(-) create mode 100644 CHANGELOG.md create mode 100644 src/erleans_pm.bkp create mode 100644 src/erleans_registry_partition.erl create mode 100644 src/erleans_registry_sup.erl create mode 100644 test/partition_logic_test.erl create mode 100644 test/registry_partition_SUITE.erl diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..7564544 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,40 @@ +# CHANGELOG + +--- +## X.Y.Z (2025-08-12) +### CHANGES +- Implement partitioned grain registry for horizontal scaling + + Refactor erleans_pm from single-server architecture to partitioned + registry system using gproc_pool for consistent grain distribution + across N partition workers, improving performance and scalability. + + **Core Changes:** + - Split erleans_pm into router + partition architecture + - Create erleans_registry_partition: individual CRDT partition workers + - Create erleans_registry_sup: manages N partitions via gproc_pool + - Convert erleans_pm to router using consistent hashing for grain distribution + + **Technical Implementation:** + - Use gproc_pool hash strategy for deterministic grain-to-partition routing + - Maintain same public API for backward compatibility + - Preserve all CRDT synchronization and conflict resolution logic + - Add configurable pm_partitions setting (default: 1, recommend: 4) + + **Configuration & Dependencies:** + - Update startup order: erleans_config before erleans_registry_sup + - Add pm_partitions config to test environments + - Fix partisan peer service configuration in distributed tests + - Integrate with erleans_config API instead of direct application:get_env + + **Testing & Validation:** + - Add comprehensive registry_partition_SUITE for partition verification + - Add partition_logic_test for gproc_pool behavior analysis + - Fix dist_lifecycle_SUITE startup issues with partisan configuration + - Preserve all existing test functionality through API compatibility + + **Benefits:** + - Horizontal scaling: distribute grain load across multiple partitions + - Improved concurrency: reduce contention on single registry server + - Consistent routing: same grain always maps to same partition + - Backward compatible: existing code works without changes \ No newline at end of file diff --git a/config/sys.config.src b/config/sys.config.src index 5c3da05..3f33829 100644 --- a/config/sys.config.src +++ b/config/sys.config.src @@ -1,7 +1,8 @@ %% -*- erlang -*- [{erleans, [{providers, #{in_memory => #{module => erleans_provider_ets, args => #{}}}}, - {default_provider, in_memory} + {default_provider, in_memory}, + {pm_partitions, 4} ]}, {partisan, [ diff --git a/src/erleans.app.src b/src/erleans.app.src index 07e3d0c..668b0f2 100644 --- a/src/erleans.app.src +++ b/src/erleans.app.src @@ -18,7 +18,8 @@ {modules, []}, {env, [{deactivate_after, 2700000}, %% 45 minutes {refresh_interval, 5000}, - {num_partitions, 128}]}, + {num_partitions, 128}, + {pm_partitions, 4}]}, %% Number of registry partitions {licenses, ["Apache 2"]}, {links, [{"GitHub", "https://github.com/erleans/erleans"}]} diff --git a/src/erleans_app.erl b/src/erleans_app.erl index ecf1f71..8674407 100644 --- a/src/erleans_app.erl +++ b/src/erleans_app.erl @@ -53,7 +53,7 @@ setup_partisan() -> Overrides = #{ broadcast_mods => ordsets:to_list( ordsets:union( - ordsets:from_list([erleans_pm, partisan_plumtree_backend]), + ordsets:from_list([erleans_registry_partition, partisan_plumtree_backend]), ordsets:from_list(BroadcastMods) ) ) diff --git a/src/erleans_pm.bkp b/src/erleans_pm.bkp new file mode 100644 index 0000000..6e55034 --- /dev/null +++ b/src/erleans_pm.bkp @@ -0,0 +1,1461 @@ +%% ----------------------------------------------------------------------------- +%% Copyright Tristan Sloughter 2019. All Rights Reserved. +%% Copyright Leapsight 2020 - 2023. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% ----------------------------------------------------------------------------- + +-module(erleans_pm). + +-feature(maybe_expr, enable). + +-behaviour(bondy_mst_crdt). +-behaviour(partisan_gen_server). +-behaviour(partisan_plumtree_broadcast_handler). + +-include_lib("kernel/include/logger.hrl"). +-include_lib("partisan/include/partisan.hrl"). +-include("erleans.hrl"). + +-moduledoc #{format => "text/markdown"}. +?MODULEDOC(""" +This module implements the `erleans_pm` server process, the Erleans +grain process registry as a State-based CRDT. + +The server state consists of the following elements: +* A set of local monitor references with form +`{pid(), erleans:grain_ref(), reference()}` for every local registration. +This is stored in a protected `ets` set table managed by the +`erleans_table_owner` process to ensure the table survives this +server's crashes. +* A distributed and globally-replicated set of mappings from +`grain_key()` to a single `partisan_remote_ref:p()`. +This is stored on `bondy_mst`. + +## Controls +* A grain registers itself and can only do it using its +`erleans:grain_ref()` as name. This is ensured by this server by +calling `erleans_grain:grain_ref()` on the process calling the +function `register_name/0`. There is no provision in the API for a process to +register another process. +* A grain unregisters itself. There is no provision in the API for a process to +unregister another process. + +## Events +* A local registered grain `DOWN` signal is received. + +## Garbage Collection + +* Tombstones are not garbage collected but this is not a major problem as we +use the `grain_ref()` as key for the MST, so the size of the MST is always +bounded to the max number of grains that ever existed. In the near future we +will support tombstone removal. + +"""). + +-define(PERSISTENT_KEY, {?MODULE, tree}). +-define(TREE, persistent_term:get(?PERSISTENT_KEY)). +-define(MONITOR_TAB, erleans_pm_monitor). +-define(TIMEOUT, 15000). + +%% This server may receive a huge amount of messages. +%% We make sure that they are stored off heap to avoid excessive GCs. +-define(OPTS, [ + {channel, application:get_env(erleans, partisan_channel, undefined)}, + {spawn_opt, [{message_queue_data, off_heap}]} +]). + +-record(state, { + crdt :: bondy_mst_crdt:t(), + partisan_channel :: partisan:channel(), + initial_sync = false :: boolean() +}). + +-type t() :: #state{}. +-type grain_key() :: {GrainId :: any(), ImplMod :: module()}. +-type gossip_id() :: { + Peer :: bondy_mst_crdt:node_id(), + Root :: bondy_mst:hash() + }. +%% API +-export([start_link/0]). +-export([register_name/0]). +-export([register_name/1]). +-export([unregister_name/0]). +-export([whereis_name/1]). +-export([whereis_name/2]). +-export([grain_ref/1]). +-export([to_list/0]). +-export([lookup/1]). +-export([info/0]). + +%% BONDY_MST_CRDT CALLBACKS +-export([broadcast/1]). +-export([on_merge/1]). +-export([send/2]). +-export([sync/1]). + +%% PARTISAN_PLUMTREE_BROADCAST_HANDLER CALLBACKS +-export([broadcast_data/1]). +-export([broadcast_channel/0]). +-export([exchange/1]). +-export([exchange/2]). +-export([graft/1]). +-export([is_stale/1]). +-export([merge/2]). + +%% PARTISAN_GEN_SERVER CALLBACKS +-export([init/1]). +-export([handle_continue/2]). +-export([handle_call/3]). +-export([handle_cast/2]). +-export([handle_info/2]). +-export([terminate/2]). + + +%% TEST API +-ifdef(TEST). + -export([add_/2]). + -export([remove_/2]). + -export([register_name_/2]). + -export([unregister_name_/2]). + -dialyzer({nowarn_function, register_name_/2}). +-endif. + +-dialyzer({nowarn_function, register_name/0}). + +-compile({no_auto_import, [monitor/2]}). +-compile({no_auto_import, [monitor/3]}). +-compile({no_auto_import, [demonitor/1]}). +-compile({no_auto_import, [demonitor/2]}). + + +%% ============================================================================= +%% API +%% ============================================================================= + + + +?DOC(""" +Starts the `erleans_pm` server. +"""). +start_link() -> + partisan_gen_server:start_link({local, ?MODULE}, ?MODULE, [], ?OPTS). + + +?DOC(""" +Registers the calling process with the `grain_key()` derived from its +`erleans:grain_ref()`. + +The duplicate check logic is executed by the caller concurrently while the +actual registration is serialised via the `erleans_pm` server process. + +Returns an error with the following reasons: +* `badgrain` if the calling process is not an Erleans grain. +* `timeout` if there was no response from the server within the requested time +* `{already_in_use, partisan_remote_ref:p()}` if there is already a process +registered for the same `grain_key()`. +"""). +-spec register_name() -> + ok + | {error, badgrain} + | {error, timeout} + | {error, noproc} + | {error, {already_in_use, partisan_remote_ref:p()}}. + +register_name() -> + register_name(?TIMEOUT). + + +?DOC(""" +Registers the calling process with the `grain_key()` derived from its +`erleans:grain_ref()`. + +The duplicate check logic is executed by the caller concurrently while the +actual registration is serialised via the `erleans_pm` server process. + +Returns an error with the following reasons: +* `badgrain` if the calling process is not an Erleans grain. +* `timeout` if there was no response from the server within the requested time +* `{already_in_use, partisan_remote_ref:p()}` if there is already a process +registered for the same `grain_key()`. +"""). +-spec register_name(timeout()) -> + ok + | {error, badgrain} + | {error, timeout} + | {error, {already_in_use, partisan_remote_ref:p()}}. + +register_name(Timeout) -> + case erleans:grain_ref() of + undefined -> + {error, badgrain}; + + GrainRef -> + case lookup_local_pid(GrainRef) of + undefined -> + %% We get all known registrations order by location local < + %% node(), and then by node(). + Processes = lookup(GrainRef), + + case filter_alive(Processes) of + [] -> + safe_call( + ?MODULE, {register_name, GrainRef}, Timeout + ); + + [ProcRef|_] -> + %% We found at least one active grain that is + %% reachable, so we pick it. + {error, {already_in_use, ProcRef}} + end; + + Pid when Pid == self() -> + %% Idempotent, although it should not happen that the grain + %% calls us twice + ok; + + Pid when is_pid(Pid) -> + %% Anotehr local pid registered this name + ProcRef = partisan_remote_ref:from_term(Pid), + {error, {already_in_use, ProcRef}} + end + end. + + +?DOC(""" +Unregisters a grain. This call fails with `badgrain` if the calling +process is not the original caller to `register_name/0`. + +This call is serialised through the `erleans_pm` server process. +"""). +-spec unregister_name() -> ok | {error, badgrain}. + +unregister_name() -> + %% Gets the calling process grain_ref + case erleans:grain_ref() of + undefined -> + {error, badgrain}; + + GrainRef -> + partisan_gen_server:call(?MODULE, {unregister_name, GrainRef}) + end. + + +?DOC(""" +Returns a process reference for `GrainRef` unless there is no reference +in which case returns `undefined`. This function calls +`erleans_pm:whereis_name/2` passing the options `[safe]`. + +Notice that as we use an eventually consistent model and temporarily support +duplicated activations for a grain reference in different locations we could +have multiple instances in the global registry. This function chooses the +first reference in the list that represents a live process. Checking for +liveness incurs in a remote call for remote processes and thus can be +expensive in the presence of multiple instantiations. If you prefer to avoid +this check you can call `erleans_pm:whereis_name/2` passing [unsafe] as +the second argument. +"""). +-spec whereis_name(GrainRef :: erleans:grain_ref()) -> + partisan_remote_ref:p() | undefined. + +whereis_name(GrainRef) -> + whereis_name(GrainRef, [safe]). + + +?DOC(""" +Returns a process reference for `GrainRef` unless there is no reference +in which case returns `undefined`. +If the option `[safe]` is used it will return the process reference only if +its process is alive. Checking for liveness on remote processes incurs a +remote call. If there is no connection to the node in which the +process lives, it is deemed dead. + +If Opts is `[]` or `[unsafe]` the function will not check for liveness. +"""). +-spec whereis_name(GrainRef :: erleans:grain_ref(), Opts :: [safe | unsafe]) -> + partisan_remote_ref:p() | undefined. + +whereis_name(#{placement := stateless} = GrainRef, _) -> + whereis_stateless(GrainRef); + +whereis_name(#{placement := {stateless, _}} = GrainRef, _) -> + whereis_stateless(GrainRef); + +whereis_name(GrainRef, []) -> + whereis_name(GrainRef, [safe]); + +whereis_name(GrainRef, [_|T] = L) when T =/= [] -> + case lists:member(safe, L) of + true -> + whereis_name(GrainRef, [safe]); + + false -> + whereis_name(GrainRef, [unsafe]) + end; + +whereis_name(#{id := _} = GrainRef, [Flag]) -> + case lookup(GrainRef) of + [] -> + undefined; + + ProcRefs -> + pick(ProcRefs, [Flag]) + end. + + +?DOC(""" +Lookups all the registered grains under name `GrainRef` and returns a list of +registered references for the ref. +The list sorts local references first. +"""). +-spec lookup(GrainRef :: erleans:grain_ref() | grain_key()) -> + [partisan_remote_ref:p()]. + +lookup(#{id := _} = GrainRef) -> + lookup(grain_key(GrainRef)); + +lookup({_, _} = GrainKey) -> + case bondy_mst:get(?TREE, GrainKey) of + undefined -> + []; + + AWSet -> + sort_conflicting_values(AWSet) + end. + + +?DOC(""" +Returns the `erleans:grain_ref` for a pid or Partisan process reference. +This is more efficient than `erleans_grain:grain_ref` as it doesn't call the +grain process for local grains, which might be busy handling signals, but +instead uses this module's ets table used for monitoring. + +In case of a remote reference, this incurs in an RPC to the peer node where the +grain is activated. +"""). +-spec grain_ref(partisan:any_pid()) -> + {ok, erleans:grain_ref()} | {error, timeout | any()}. + +grain_ref(Pid) when is_pid(Pid) -> + %% A local grain so we use the monitor table which is faster + case monitor_lookup(Pid) of + {Pid, GrainRef, _} -> + {ok, GrainRef}; + + undefined -> + {error, not_found} + end; + +grain_ref(ProcRef) -> + %% We know this is not a pid so it must be a partisan process reference. + partisan:is_pid(ProcRef) orelse error({badarg, [ProcRef]}), + + case partisan_remote_ref:is_local(ProcRef) of + true -> + grain_ref(partisan_remote_ref:to_term(ProcRef)); + + false -> + %% We use RPC cause grain_ref uses ets directly so we avoid blocking + %% our peer process + Peer = partisan:node(ProcRef), + case partisan_rpc:call(Peer, ?MODULE, grain_ref, [ProcRef], 5000) of + {badrpc, Reason} -> + {error, Reason}; + + Result -> + Result + end + end. + + +?DOC(""" +The same as calling `to_list([safe])`. +"""). +-spec to_list() -> [{grain_key(), partisan_remote_ref:p()}]. + +to_list() -> + to_list([safe]). + + +?DOC(""" +Returns the list of all registry entries. + +## Options +* `safe` - returns only entries for grains that are known to be alive +* `unsafe` - returns all entries without checking for liveness. +"""). +-spec to_list([safe | unsafe]) -> [{grain_key(), partisan_remote_ref:p()}]. + +to_list([Flag]) -> + L = bondy_mst:fold( + ?TREE, + fun({GrainKey, Value}, Acc) -> + case sets:to_list(state_awset:query(Value)) of + [] -> + Acc; + + L -> + case pick(L, [Flag]) of + undefined -> + Acc; + + ProcRef -> + [{GrainKey, ProcRef} | Acc] + end + end + end, + [] + ), + lists:reverse(L). + + +?DOC(""" +Triggers a synchronisation exchange with a peer. +Calls `exchange/2` with an empty map as the second argument. +"""). +-spec sync(node()) -> {ok, pid()} | {error, term()}. + +sync(Peer) -> + sync(Peer, #{}). + + +?DOC(""" +Triggers a synchronisation exchange with a peer. +"""). +-spec sync(node(), map()) -> ok | {error, term()}. + +sync(Peer, Opts) -> + partisan_gen_server:call(?MODULE, {crdt_trigger, Peer, Opts}). + + +info() -> + partisan_gen_server:call(?MODULE, info). + + + +%% ============================================================================= +%% BONDY_MST_CRDT CALLBACKS +%% ============================================================================= + + +?DOC(""" +Implementation of the `bondy_mst_crdt` callback. +Casts message `Message` to this server on node `Peer` using `partisan`. +"""). +send(Peer, Message) -> + partisan_gen_server:cast({?MODULE, Peer}, {crdt_message, Message}). + + +?DOC(""" +Implementation of the `bondy_mst_crdt` callback. +Broadcasts message `Gossip` to peers using Plumtree (Epidemis broadcast trees). +"""). +broadcast(Gossip) -> + partisan:broadcast(Gossip, ?MODULE). + + +?DOC(""" +Implementation of the `bondy_mst_crdt` callback. +Removes stale entries and duplicates after merge. +"""). +on_merge(Peer) -> + partisan_gen_server:cast(?MODULE, {crdt_on_merge, Peer}). + + + +%% ============================================================================= +%% PARTISAN_PLUMTREE_BROADCAST_HANDLER CALLBACKS +%% ============================================================================= + + + +?DOC(""" +Implementation of the `partisan_plumtree_backend` callback. +Returns the channel to be used when broadcasting. +"""). +-spec broadcast_channel() -> partisan:channel(). + +broadcast_channel() -> + application:get_env(erleans, partisan_broadcast_channel, undefined). + + +?DOC(""" +Implementation of the `partisan_plumtree_backend` callback. +Deconstructs a broadcast that is sent using `broadcast/2` returning the message +id and payload. + +> This function is part of the implementation of the +partisan_plumtree_broadcast_handler behaviour. +> You should never call it directly. +"""). +-spec broadcast_data(Gossip :: bondy_mst_crdt:gossip()) -> + { + MessageId :: {bondy_mst_crdt:node_id(), bondy_mst:hash()}, + Payload :: bondy_mst_crdt:gossip() + }. + +broadcast_data(Gossip) -> + #{from := Peer, root := Root} = bondy_mst_crdt:gossip_data(Gossip), + {{Peer, Root}, Gossip}. + + +?DOC(""" +Implementation of the `partisan_plumtree_backend` callback. +Merges a remote copy of an object record sent via broadcast w/ the +local view for the key contained in the message id. If the remote copy is +causally older than the current data stored then `false` is returned and no +updates are merged. Otherwise, the remote copy is merged (possibly +generating siblings) and `true` is returned. + +> This function is part of the implementation of the +partisan_plumtree_broadcast_handler behaviour. +> You should never call it directly. +"""). +-spec merge(GossipId :: gossip_id(), Payload :: bondy_mst_crdt:gossip()) -> + boolean(). + +merge(_Id, Gossip) -> + partisan_gen_server:call(?MODULE, {crdt_merge, Gossip}). + + +?DOC(""" +Implementation of the `partisan_plumtree_backend` callback. +Same as merge/2 but merges the object on `Node' + +> This function is part of the implementation of the +partisan_plumtree_broadcast_handler behaviour. +> You should never call it directly. +"""). +-spec merge( + Peer :: node(), + Root :: bondy_mst:hash(), + Payload :: bondy_mst_crdt:gossip()) -> boolean(). + +merge(Peer, _Root, Gossip) -> + partisan_gen_server:call({?MODULE, Peer}, {crdt_merge, Gossip}). + + +?DOC(""" +Implementation of the `partisan_plumtree_backend` callback. +When a peer broadcasts a message it does it to the nodes in its eager-push set +only, but also simultaneously sends I_HAVE notifications to nodes in its +lazy-push set instead of the entire message. This callback is the one that +Plumtree calls when receiving an I_HAVE message. + +The main idea is: +“I have seen a broadcast message with this root. If you need it, let me know.” + +This saves bandwidth, because instead of blindly sending every neighbor the full +payload, the node sends just the root hash. The lazy neighbors can decide +whether they need the full message or not. + +If function returns `true` then Plumtree will do nothing. However,if it returns +`false` then Plumtree will `graft` the message from the peer and send it to us. + +> This function is part of the implementation of the +partisan_plumtree_broadcast_handler behaviour. +> You should never call it directly. +"""). +-spec is_stale(gossip_id()) -> boolean(). + +is_stale({Peer, Root}) -> + %% In our case the I_HAVE message is the root of the peer's tree, so we + %% always return `true` signaling Plumtree that we do not need the message, + %% and we send ourself a message to potentially init a merge with the peer + %% i.e. in this case we take the job of synchonising the CRDT in out hands + %% instead of relying on Plumtree. + ok = partisan_gen_server:cast(?MODULE, {crdt_maybe_merge, Peer, Root}), + true. + + +?DOC(""" +Implementation of the `partisan_plumtree_backend` callback. +In Plumtree this is used to return the object associated with the given prefixed +message id if the currently stored version has an equal context. Otherwise +returning the atom `stale`. + +Because it assumes that a grafted context can only be causally older than +the local view, a `stale` response means there is another message that +subsumes the grafted one. + +> This function is part of the implementation of the +partisan_plumtree_broadcast_handler behaviour. +> You should never call it directly. +"""). +-spec graft(gossip_id()) -> + stale | {ok, bondy_mst_crdt:gossip()} | {error, term()}. + +graft({_Peer, _Root}) -> + %% In our case, the message_id is just the peer's root hash, so in case + %% we contain the root we return a Gossip message with our root. Otherwise + %% we return 'stale'. + %% partisan_gen_server:call(?MODULE, {crdt_graft, Peer, Root}). + {error, disabled}. + + +?DOC(""" +Calls `sync/1`. +"""). +-spec exchange(node()) -> {ok, pid()} | {error, term()}. + +exchange(Peer) -> + exchange(Peer, #{}). + + +?DOC(""" +Calls `sync/2`. +"""). +-spec exchange(node(), map()) -> ok | {error, term()}. + +exchange(Peer, Opts) -> + sync(Peer, Opts). + + + +%% ============================================================================= +%% PARTISAN_GEN_SERVER BEHAVIOR CALLBACKS +%% ============================================================================ + + + +-spec init(Args :: term()) -> {ok, State :: t()}. + +init(_) -> + %% Trap exists otherwise terminate/1 won't be called when shutdown by + %% supervisor. + erlang:process_flag(trap_exit, true), + + %% Create or claim ets table. + %% If this server crashes, data will be preserved. + {ok, ?MONITOR_TAB} = erleans_table_owner:add_or_claim( + ?MONITOR_TAB, + [ + set, + protected, + named_table, + {keypos, 1}, + {write_concurrency, true}, + {read_concurrency, true}, + {decentralized_counters, true} + ] + ), + + %% We monitor all nodes so that we can cleanup our view of the registry + partisan:monitor_nodes(true), + + {channel, Channel} = lists:keyfind(channel, 1, partisan_gen:get_opts()), + + %% We wrap the tree using the exchange module + Node = partisan:node(), + Opts = #{ + %% MST opts + hash_algorithm => sha256, + merger => fun mst_merge_value/3, + store => bondy_mst_ets_store, + store_opts => #{ + name => atom_to_binary(?MODULE), + persistent => true + }, + %% CRDT opts + callback_mod => ?MODULE, + max_merges => 1, + max_merges_per_root => 1, + max_versions => 10, + version_ttl => timer:seconds(30), + fwd_bcast => false, + consistency_model => eventual + }, + + %% We create an ets-based MST bound to this process. + %% The ets table will be garbage collected if this process terminates. + CRDT = bondy_mst_crdt:new(Node, Opts), + Tree = bondy_mst_crdt:tree(CRDT), + + %% ets-based trees support read_concurrency (option store_opts.persistent) + %% so we can cache and share it using persistent_term to avoid a call to + %% this process. + ok = persistent_term:put(?PERSISTENT_KEY, Tree), + + State = #state{ + crdt = CRDT, + partisan_channel = Channel + }, + + {ok, State, {continue, monitor_existing}}. + + +handle_continue(monitor_existing, State0) -> + %% This prevents any grain to be registered as we are blocking the server + %% until we finish. + %% We fold the claimed ?MONITOR_TAB table to find any existing + %% registrations. In case the table is new, it would be empty. Otherwise, + %% we would iterate over registrations that were done by a previous + %% instance of this server before it crashed. + %% We re-register/monitor alive pids and remove dead ones. + Fun = fun + ({Pid, GrainRef, _OldMRef}, Acc0) -> + case erlang:is_process_alive(Pid) of + true -> + %% The process is still alive, but the monitor has died with + %% the previous instance of this gen_server, so we monitor + %% again. We use relaxed mode which allows us to update the + %% existing registration on ?MONITOR_TAB and the MST. + {_, Acc} = do_register_name(Acc0, GrainRef, Pid, relaxed), + Acc; + + false -> + %% The process has died, so we unregister. This will also + %% remove the registration from the MST. + {_, Acc} = do_unregister_name(Acc0, GrainRef, Pid), + Acc + end + end, + State = lists:foldl(Fun, State0, ets:tab2list(?MONITOR_TAB)), + + %% We should now have all existing local grains re-registered on this + %% server and broadcast messages sent to cluster peers. + {noreply, State}; + +handle_continue(_, State) -> + {noreply, State}. + +handle_call({register_name, GrainRef}, {Caller, _}, State0) +when is_pid(Caller) -> + {Reply, State} = do_register_name(State0, GrainRef, Caller), + {reply, Reply, State}; + +handle_call({register_name, _}, _From, State) -> + %% A call from a remote node, not allowed + {reply, {error, not_local}, State}; + +handle_call({unregister_name, GrainRef}, {Caller, _}, State0) +when is_pid(Caller) -> + {Reply, State} = do_unregister_name(State0, GrainRef, Caller), + {reply, Reply, State}; + +handle_call({unregister_name, _}, _From, State) -> + %% A call from a remote node, now allowed + {reply, {error, not_local}, State}; + +handle_call({register_name_test, GrainRef, ProcRef}, _From, State0) -> + %% Used for testing only + {Reply, State} = do_register_name_test(State0, GrainRef, ProcRef), + {reply, Reply, State}; + +handle_call({unregister_name_test, GrainRef, ProcRef}, _From, State0) -> + %% Used for testing only + Key = grain_key(GrainRef), + {Reply, State} = do_unregister_name_test(State0, Key, ProcRef), + {reply, Reply, State}; + +handle_call({add_test, GrainRef, ProcRef}, _From, State0) -> + %% Used for testing only + Key = grain_key(GrainRef), + State = add(State0, Key, ProcRef, partisan_remote_ref:node(ProcRef)), + {reply, ok, State}; + +handle_call({remove_test, GrainRef, ProcRef}, _From, State0) -> + %% Used for testing only + Key = grain_key(GrainRef), + State = remove(State0, Key, ProcRef, partisan_remote_ref:node(ProcRef)), + {reply, ok, State}; + +handle_call({crdt_merge, Gossip}, _From, State) -> + CRDT0 = State#state.crdt, + Root0 = bondy_mst_crdt:root(CRDT0), + CRDT = bondy_mst_crdt:handle(CRDT0, Gossip), + Root = bondy_mst_crdt:root(CRDT), + + %% Required by Plumtree. + %% Merges a remote copy of an object record sent via broadcast w/ the + %% local view for the key contained in the message id. If the remote copy is + %% causally older than the current data stored then `false` is returned and + %% no updates are merged. Otherwise, the remote copy is merged (possibly + %% generating siblings) and `true` is returned. + %% Since we will performing a merge if required during + %% bondy_mst_crdt:handle/2 we reply `false`. + Reply = Root =/= Root0, + {reply, Reply, State#state{crdt = CRDT}}; + +handle_call({crdt_trigger, Peer, _Opts}, _From, State) -> + Reply = bondy_mst_crdt:trigger(State#state.crdt, Peer), + {reply, Reply, State}; + +handle_call(info, _From, State) -> + Reply = #{ + tree => #{ + root => bondy_mst_crdt:root(State#state.crdt) + }, + local_registry => #{ + memory => ets:info(?MONITOR_TAB, memory), + size => ets:info(?MONITOR_TAB, size) + } + }, + {reply, Reply, State}; + +handle_call(_Request, _From, State) -> + {reply, {error, unknown_call}, State}. + + +-spec handle_cast(Request :: term(), State :: t()) -> + {noreply, NewState :: t()}. + +handle_cast({crdt_maybe_merge, Peer, Root}, State) -> + Root == bondy_mst_crdt:root(State#state.crdt) + andalso bondy_mst_crdt:trigger(State#state.crdt, Peer), + {noreply, State}; + +handle_cast({crdt_on_merge, _Peer}, #state{initial_sync = false} = State0) -> + State = remove_stale(State0#state{initial_sync = true}), + ok = maybe_deactivate_local_duplicates(State), + {noreply, State}; + +handle_cast({crdt_on_merge, _Peer}, #state{initial_sync = true} = State) -> + ok = maybe_deactivate_local_duplicates(State), + {noreply, State}; + +handle_cast({crdt_message, Msg}, State) -> + %% Fwd message to bondy_mst_crdt + CRDT = bondy_mst_crdt:handle(State#state.crdt, Msg), + {noreply, State#state{crdt = CRDT}}; + +handle_cast({force_unregister_name, GrainKey, ProcRef}, State0) -> + %% Internal case to deal with inconsistencies + case partisan_remote_ref:is_local(ProcRef) of + true -> + Pid = partisan_remote_ref:to_pid(ProcRef), + {_, State} = do_unregister_name(State0, GrainKey, Pid), + {noreply, State}; + + false -> + {noreply, State0} + end; + +handle_cast(_Request, State) -> + {noreply, State}. + + +-spec handle_info(Message :: term(), State :: t()) -> + {noreply, NewState :: t()}. + +handle_info({'ETS-TRANSFER', erleans_pm_monitor, _, []}, State) -> + {noreply, State}; + +handle_info({nodedown, Node}, State) -> + CRDT = bondy_mst_crdt:cancel_merge(State#state.crdt, Node), + {noreply, State#state{crdt = CRDT}}; + +handle_info({nodeup, _Node}, State) -> + {noreply, State}; + +handle_info({'DOWN', MRef, process, Pid, _Info}, State0) when is_pid(Pid) -> + %% Registered (monitored) grain exit + ?LOG_INFO("Grain down ~p", [{Pid, MRef}]), + {_, State} = do_unregister_process(State0, Pid), + {noreply, State}; + +handle_info(Event, State) -> + ?LOG_INFO("Received unknown event ~p", [Event]), + {noreply, State}. + + +-spec terminate( + Reason :: (normal | shutdown | {shutdown, term()} | term()), + State :: t()) -> ok. + +terminate(_Reason, State) -> + ok = unregister_all_local(State), + _ = persistent_term:erase(?PERSISTENT_KEY), + ok. + + + +%% ============================================================================= +%% PRIVATE +%% ============================================================================= + + +%% @private +safe_call(ServerRef, Cmd) -> + safe_call(ServerRef, Cmd, ?TIMEOUT). + + +%% @private +safe_call(ServerRef, Cmd, Timeout) -> + try + partisan_gen_server:call(ServerRef, Cmd, Timeout) + catch + _:Reason:_ -> + {error, Reason} + end. + + +%% @private +add(#state{} = State, GrainKey, Value) -> + add(#state{} = State, GrainKey, Value, partisan:node()). + + +%% @private +add(#state{crdt = CRDT0} = State, Key, Value, Node) -> + Tree = bondy_mst_crdt:tree(CRDT0), + + AWSet1 = + case bondy_mst:get(Tree, Key) of + undefined -> + state_awset:new(); + + AWSet0 -> + AWSet0 + end, + + {ok, AWSet} = state_type:mutate({add, Value}, Node, AWSet1), + CRDT = bondy_mst_crdt:put(CRDT0, Key, AWSet), + State#state{crdt = CRDT}. + + +%% @private +remove(State, GrainKey, Value) -> + remove(State, GrainKey, Value, partisan:node()). + + +%% @private +remove(State, Key, Value, Node) -> + remove(State, Key, Value, Node, #{}). + + +%% @private +remove(#state{crdt = CRDT0} = State, Key, Value, Node, Opts) -> + CRDT = crdt_remove(CRDT0, Key, Value, Node, Opts), + State#state{crdt = CRDT}. + + +%% @private +crdt_remove(CRDT, Key, Value, Node, Opts) -> + Tree = bondy_mst_crdt:tree(CRDT), + AWSet1 = + case bondy_mst:get(Tree, Key) of + undefined -> + state_awset:new(); + + AWSet0 -> + AWSet0 + end, + {ok, AWSet} = state_type:mutate({rmv, Value}, Node, AWSet1), + bondy_mst_crdt:put(CRDT, Key, AWSet, Opts). + + +%% @private +awset_remove(AWSet0, Value) -> + {ok, AWSet} = state_type:mutate({rmv, Value}, partisan:node(), AWSet0), + AWSet. + + +%% @private +is_monitored(Pid) -> + monitor_lookup(Pid) =/= undefined. + + +%% @private +monitor_lookup(Pid) -> + case ets:lookup(?MONITOR_TAB, Pid) of + [Monitor] -> + Monitor; + + _ -> + undefined + end. + +%% @private +lookup_local_pid(GrainRef) -> + case ets:match_object(?MONITOR_TAB, {'_', GrainRef, '_'}) of + [{Pid, GrainRef, _}] -> + Pid; + + _ -> + undefined + end. + + +%% @private +mst_merge_value(GrainKey, AWSet1, AWSet2) -> + %% We merge de CRDTs + AWSet3 = state_awset:merge(AWSet1, AWSet2), + %% We remove local grains that have been deactivated + AWSet = remove_deactivated(AWSet3), + ?LOG_DEBUG(#{ + description => "Merged values", + key => GrainKey, + rhs => AWSet1, + lhs => AWSet2, + result => AWSet + }), + ok = maybe_deactivate_local_duplicate(GrainKey, AWSet), + AWSet. + + +%% @private +-spec remove_deactivated(state_awset:state_awset()) -> + state_awset:state_awset(). + +remove_deactivated(AWSet) -> + Fun = fun(ProcRef, Acc) -> + maybe + true ?= partisan_remote_ref:is_local(ProcRef), + Pid ?= partisan_remote_ref:to_pid(ProcRef), + undefined ?= monitor_lookup(Pid), + %% Not monitored so it has been deactivated i.e. the peer node has a + %% stale entry. We remove it from the set. + ?LOG_DEBUG(#{ + message => "Removing grain from registry", + process_ref => ProcRef, + reason => deactivated + }), + awset_remove(Acc, ProcRef) + else + false -> + %% Not local, so we ignore it + Acc; + + {_Pid, _GrainRef, _Mref} -> + %% Monitored, so we ignore it + Acc + + end + end, + sets:fold(Fun, AWSet, state_awset:query(AWSet)). + + +%% This function assumes remove_deactivated/2 was called on AWSet before. +maybe_deactivate_local_duplicate(GrainKey, AWSet) -> + All = sets:to_list(state_awset:query(AWSet)), + + maybe + %% Partition based on locality + {[ProcRef], [_ | _] = Remotes} ?= + lists:partition(fun partisan_remote_ref:is_local/1, All), + %% We have duplicates, so we need to check if our local duplicate should + %% belong here. + false ?= safe_is_location_right(GrainKey, ProcRef), + %% The grain should not be here, so we will deactivate but only if + %% we can reach any of the remote duplicates + true ?= lists:any(fun ?MODULE:is_reachable/1, Remotes), + %% Since at least one remote grain is reachable, we deactivate the + %% local one + deactivate_grain(GrainKey, ProcRef) + else + _ -> + ok + end. + + +%% @private +maybe_deactivate_local_duplicates(#state{crdt = CRDT}) -> + Tree = bondy_mst_crdt:tree(CRDT), + Fun = fun({Key, AWSet}) -> maybe_deactivate_local_duplicate(Key, AWSet) end, + bondy_mst:foreach(Tree, Fun). + + +%% @private +safe_is_location_right({_, Mod}, LocalPRef) -> + try + Pid = partisan_remote_ref:to_pid(LocalPRef), + erleans_grain:is_location_right(Mod, Pid) + catch + Class:Reason:Stacktrace -> + ?LOG_WARNING(#{ + message => + "erleans_grain:is_location_right/2 failed. " + "Returning true by default", + implementing_module => Mod, + process_ref => LocalPRef, + class => Class, + reason => Reason, + stacktrace => Stacktrace + }), + true + end. + + +remove_stale(#state{crdt = CRDT} = State) -> + Tree = bondy_mst_crdt:tree(CRDT), + Fun = fun({Key, AWSet}, Acc) -> remove_stale(Acc, Key, AWSet) end, + bondy_mst:fold(Tree, Fun, State). + + +%% @private +remove_stale(State, Key, AWSet) -> + Set = state_awset:query(AWSet), + Fun = fun(ProcRef, Acc) -> + maybe + true ?= partisan_remote_ref:is_local(ProcRef), + Pid ?= partisan_remote_ref:to_pid(ProcRef), + undefined ?= monitor_lookup(Pid), + %% Not monitored so it has been deactivated i.e. the peer node has a + %% stale entry. We remove it from the set. + ?LOG_DEBUG(#{ + message => "Removing grain from registry", + process_ref => ProcRef, + reason => deactivated + }), + %% We disable broadcasting + remove(Acc, Key, ProcRef, partisan:node(), #{broadcast => false}) + else + _ -> + Acc + end + end, + sets:fold(Fun, State, Set). + + +%% @private +sort_conflicting_values(AWSet) -> + Set = state_awset:query(AWSet), + lists:sort( + fun(A, B) -> + Result = { + partisan_remote_ref:is_local(A), + partisan_remote_ref:is_local(B) + }, + case Result of + {true, _} -> + true; + + {_, true} -> + false; + + {false, false} -> + A =< B + end + end, + sets:to_list(Set) + ). + + +%% @private +%% Register the calling process with GrainRef unless another local +%% registration exists. +-spec do_register_name(t(), GrainRef :: erleans:grain_ref(), Pid :: pid()) -> + {ok, t()} | {{error, {already_in_use, partisan_remote_ref:p()}}, t()}. + +do_register_name(State, GrainRef, Pid) -> + do_register_name(State, GrainRef, Pid, strict). + + +%% @private +%% Register the calling process with GrainRef unless another local +%% registration exists. +-spec do_register_name( + t(), GrainRef :: erleans:grain_ref(), Pid :: pid(), strict | relaxed) -> + {ok, t()} | {{error, {already_in_use, partisan_remote_ref:p()}}, t()}. + +do_register_name(State0, GrainRef, Pid, Mode) when is_pid(Pid) -> + case monitor(GrainRef, Pid, Mode) of + ok -> + Key = grain_key(GrainRef), + Value = partisan_remote_ref:from_term(Pid), + State = add(State0, Key, Value), + {ok, State}; + + {error, _} = Error -> + {Error, State0} + end. + + +%% Used for testing only (see export of register_name/2) +do_register_name_test(State0, GrainRef, ProcRef) -> + Key = grain_key(GrainRef), + State = add(State0, Key, ProcRef, partisan:node(ProcRef)), + {ok, State}. + + +%% @private +-spec do_unregister_process(t(), Pid :: pid()) -> {ok, t()}. + +do_unregister_process(State0, Pid) when is_pid(Pid) -> + case monitor_lookup(Pid) of + {Pid, GrainRef, _} -> + Key = grain_key(GrainRef), + do_unregister_name(State0, Key, Pid); + undefined -> + {ok, State0} + end. + + +%% @private +-spec do_unregister_name(t(), GrainKey :: grain_key(), Pid :: pid()) -> + {ok, t()}. + +do_unregister_name(State0, GrainKey, Pid) when is_pid(Pid) -> + %% Demonitor + ok = demonitor(Pid), + true = ets:delete(?MONITOR_TAB, Pid), + + Value = partisan_remote_ref:from_term(Pid), + State = remove(State0, GrainKey, Value), + {ok, State}. + + +do_unregister_name_test(State0, GrainKey, ProcRef) -> + State = remove(State0, GrainKey, ProcRef, partisan:node(ProcRef)), + {ok, State}. + + +%% @private +grain_key(#{id := Id, implementing_module := Mod}) -> + {Id, Mod}. + + +%% @private +monitor(GrainRef, Pid, strict) when is_pid(Pid) -> + Mref = erlang:monitor(process, Pid), + + case ets:insert_new(?MONITOR_TAB, {Pid, GrainRef, Mref}) of + true -> + ok; + + false -> + true = erlang:demonitor(Mref, [flush]), + {OtherPid, GrainRef, _} = monitor_lookup(Pid), + {error, {already_in_use, partisan_remote_ref:from_term(OtherPid)}} + end; + +monitor(GrainRef, Pid, relaxed) when is_pid(Pid) -> + Mref = erlang:monitor(process, Pid), + true = ets:insert(?MONITOR_TAB, {Pid, GrainRef, Mref}), + ok. + + +%% @private +demonitor(Pid) -> + case ets:take(?MONITOR_TAB, Pid) of + [{Pid, _, Mref}] -> + true = erlang:demonitor(Mref, [flush]), + ok; + + [] -> + ok + end. + + +%% @private +-spec deactivate_grain(grain_key(), partisan_remote_ref:t()) -> ok. + +deactivate_grain(GrainKey, ProcRef) -> + %% This call is async (uses a cast) so we are safe to do it + case erleans_grain:deactivate(ProcRef) of + ok -> + ?LOG_NOTICE(#{ + description => "Succeded to deactivate duplicate", + grain => GrainKey, + pid => ProcRef + }), + ok; + + {error, Reason} when Reason == not_found; Reason == not_active -> + ?LOG_ERROR(#{ + description => "Failed to deactivate duplicate", + grain => GrainKey, + pid => ProcRef, + reason => Reason + }), + %% This is an inconsistency, we need to cleanup. + %% We ask the peer to do it, via a private cast (peer can be us) + partisan_gen_server:cast( + {?MODULE, partisan_remote_ref:node(ProcRef)}, + {force_unregister_name, GrainKey, ProcRef} + ); + + {error, Reason} -> + ?LOG_ERROR(#{ + description => "Failed to deactivate duplicate", + grain => GrainKey, + pid => ProcRef, + reason => Reason + }), + ok + end. + + +%% @private +whereis_stateless(GrainRef) -> + case gproc_pool:pick_worker(GrainRef) of + false -> + undefined; + Pid -> + partisan_remote_ref:from_term(Pid) + end. + + +%% @private +pick([], _) -> + undefined; + +pick(L, []) -> + pick(L, [unsafe]); + +pick([H], [unsafe]) -> + H; + +pick([H | _], [unsafe]) -> + H; + +pick(List, [safe]) -> + pick_alive(List). + + +%% @private +pick_alive([H | T]) -> + try partisan:is_process_alive(H) of + true -> + H; + + false -> + pick_alive(T) + + catch + error:_ -> + pick_alive(T) + end; + +pick_alive([]) -> + undefined. + + +%% @private +%% Returns a new list where all the process references are know to be +%% reachable. If the remote check fails, it returns false. +filter_alive(undefined) -> + []; + +filter_alive(ProcRefs) when is_list(ProcRefs) -> + lists:filter( + fun(ProcRef) -> + try + partisan:is_process_alive(ProcRef) + catch + _:_ -> + false + end + end, + ProcRefs + ). + + +%% @private +is_reachable(ProcRef) -> + try + partisan:is_connected(partisan:node(ProcRef)) + catch + _:_ -> + false + end. + + + +%% @private +%% Unregisters all local alive processes. +-spec unregister_all_local(t()) -> ok. + +unregister_all_local(State) -> + true = ets:safe_fixtable(?MONITOR_TAB, true), + try + unregister_local(State, ets:first(?MONITOR_TAB)) + catch + Class:Reason:Stacktrace -> + ?LOG_ERROR(#{ + message => "Unexpected error", + class => Class, + reason => Reason, + stacktrace => Stacktrace + }), + ok + after + true = ets:safe_fixtable(?MONITOR_TAB, false) + end. + + +%% @private +unregister_local(State0, Pid) when is_pid(Pid) -> + %% {Pid, GrainRef, MRef} + GrainRef = ets:lookup_element(?MONITOR_TAB, Pid, 2), + {ok, State} = do_unregister_name(State0, GrainRef, Pid), + unregister_local(State, ets:next(?MONITOR_TAB, Pid)); + +%% unregister_local(State, #{id := _} = GrainRef) -> +%% %% Ignore as we have two entries per registration +%% %% {Pid, GrainRef} and {GrainRef, Pid}, we just use the first +%% unregister_local(State, ets:next(?MONITOR_TAB, GrainRef)); + +unregister_local(_, '$end_of_table') -> + ok. + + + +%% ============================================================================= +%% TEST +%% ============================================================================= + + + +-ifdef(TEST). + + + +%% Registers the calling process with the `id` attribute of `GrainRef`. +%% This call is serialised the `erleans_pm` server process. +-spec register_name_(erleans:grain_ref(), partisan_remote_ref:p()) -> + ok + | {error, {already_in_use, partisan_remote_ref:p()}}. + +register_name_(GrainRef, ProcRef) -> + partisan_gen_server:call(?MODULE, {register_name_test, GrainRef, ProcRef}). + + + +%% It can only be called by the caller +%% This call is serialised the `erleans_pm` server process. +-spec unregister_name_(erleans:grain_ref(), partisan_remote_ref:p()) -> + ok | {error, badgrain | not_owner}. + +unregister_name_(#{id := _} = GrainRef, ProcRef) -> + partisan_gen_server:call( + ?MODULE, {unregister_name_test, GrainRef, ProcRef} + ). + + +%% Registers the calling process with the `id` attribute of `GrainRef`. +%% This call is serialised the `erleans_pm` server process. +-spec add_(erleans:grain_ref(), partisan_remote_ref:p()) -> + ok + | {error, {already_in_use, partisan_remote_ref:p()}}. + +add_(GrainRef, ProcRef) -> + partisan_gen_server:call(?MODULE, {add_test, GrainRef, ProcRef}). + + +%% It can only be called by the caller +%% This call is serialised the `erleans_pm` server process. +-spec remove_(erleans:grain_ref(), partisan_remote_ref:p()) -> + ok | {error, badgrain | not_owner}. + +remove_(#{id := _} = GrainRef, ProcRef) -> + partisan_gen_server:call( + ?MODULE, {remove_test, GrainRef, ProcRef} + ). + + +-endif. + + + diff --git a/src/erleans_pm.erl b/src/erleans_pm.erl index 6e55034..33a9901 100644 --- a/src/erleans_pm.erl +++ b/src/erleans_pm.erl @@ -19,75 +19,24 @@ -feature(maybe_expr, enable). --behaviour(bondy_mst_crdt). --behaviour(partisan_gen_server). --behaviour(partisan_plumtree_broadcast_handler). - -include_lib("kernel/include/logger.hrl"). --include_lib("partisan/include/partisan.hrl"). --include("erleans.hrl"). +-include("docs.hrl"). -moduledoc #{format => "text/markdown"}. ?MODULEDOC(""" -This module implements the `erleans_pm` server process, the Erleans -grain process registry as a State-based CRDT. - -The server state consists of the following elements: -* A set of local monitor references with form -`{pid(), erleans:grain_ref(), reference()}` for every local registration. -This is stored in a protected `ets` set table managed by the -`erleans_table_owner` process to ensure the table survives this -server's crashes. -* A distributed and globally-replicated set of mappings from -`grain_key()` to a single `partisan_remote_ref:p()`. -This is stored on `bondy_mst`. - -## Controls -* A grain registers itself and can only do it using its -`erleans:grain_ref()` as name. This is ensured by this server by -calling `erleans_grain:grain_ref()` on the process calling the -function `register_name/0`. There is no provision in the API for a process to -register another process. -* A grain unregisters itself. There is no provision in the API for a process to -unregister another process. - -## Events -* A local registered grain `DOWN` signal is received. - -## Garbage Collection - -* Tombstones are not garbage collected but this is not a major problem as we -use the `grain_ref()` as key for the MST, so the size of the MST is always -bounded to the max number of grains that ever existed. In the near future we -will support tombstone removal. +This module implements the Erleans grain process registry router. +It maintains the same API as the original erleans_pm but distributes +grains across N partitioned bondy_mst_crdt instances for better performance. +The router uses consistent hashing via gproc_pool to select the appropriate +partition for each grain based on its grain_key(). """). --define(PERSISTENT_KEY, {?MODULE, tree}). --define(TREE, persistent_term:get(?PERSISTENT_KEY)). --define(MONITOR_TAB, erleans_pm_monitor). -define(TIMEOUT, 15000). -%% This server may receive a huge amount of messages. -%% We make sure that they are stored off heap to avoid excessive GCs. --define(OPTS, [ - {channel, application:get_env(erleans, partisan_channel, undefined)}, - {spawn_opt, [{message_queue_data, off_heap}]} -]). - --record(state, { - crdt :: bondy_mst_crdt:t(), - partisan_channel :: partisan:channel(), - initial_sync = false :: boolean() -}). +-type grain_key() :: {GrainId :: any(), ImplMod :: module()}. --type t() :: #state{}. --type grain_key() :: {GrainId :: any(), ImplMod :: module()}. --type gossip_id() :: { - Peer :: bondy_mst_crdt:node_id(), - Root :: bondy_mst:hash() - }. -%% API +%% API - Maintains exact same interface as original erleans_pm -export([start_link/0]). -export([register_name/0]). -export([register_name/1]). @@ -96,32 +45,14 @@ will support tombstone removal. -export([whereis_name/2]). -export([grain_ref/1]). -export([to_list/0]). +-export([to_list/1]). -export([lookup/1]). -export([info/0]). - -%% BONDY_MST_CRDT CALLBACKS --export([broadcast/1]). --export([on_merge/1]). --export([send/2]). -export([sync/1]). +-export([sync/2]). -%% PARTISAN_PLUMTREE_BROADCAST_HANDLER CALLBACKS --export([broadcast_data/1]). --export([broadcast_channel/0]). --export([exchange/1]). --export([exchange/2]). --export([graft/1]). --export([is_stale/1]). --export([merge/2]). - -%% PARTISAN_GEN_SERVER CALLBACKS --export([init/1]). --export([handle_continue/2]). --export([handle_call/3]). --export([handle_cast/2]). --export([handle_info/2]). --export([terminate/2]). - +%% Partition selection +-export([select_partition/1]). %% TEST API -ifdef(TEST). @@ -129,42 +60,24 @@ will support tombstone removal. -export([remove_/2]). -export([register_name_/2]). -export([unregister_name_/2]). - -dialyzer({nowarn_function, register_name_/2}). -endif. --dialyzer({nowarn_function, register_name/0}). - --compile({no_auto_import, [monitor/2]}). --compile({no_auto_import, [monitor/3]}). --compile({no_auto_import, [demonitor/1]}). --compile({no_auto_import, [demonitor/2]}). - - %% ============================================================================= %% API %% ============================================================================= - - ?DOC(""" -Starts the `erleans_pm` server. +Starts the registry supervisor which manages N partition processes. """). +-spec start_link() -> {ok, pid()} | {error, term()}. start_link() -> - partisan_gen_server:start_link({local, ?MODULE}, ?MODULE, [], ?OPTS). - + erleans_registry_sup:start_link(). ?DOC(""" Registers the calling process with the `grain_key()` derived from its `erleans:grain_ref()`. -The duplicate check logic is executed by the caller concurrently while the -actual registration is serialised via the `erleans_pm` server process. - -Returns an error with the following reasons: -* `badgrain` if the calling process is not an Erleans grain. -* `timeout` if there was no response from the server within the requested time -* `{already_in_use, partisan_remote_ref:p()}` if there is already a process -registered for the same `grain_key()`. +Routes the call to the appropriate partition based on consistent hashing. """). -spec register_name() -> ok @@ -172,1290 +85,211 @@ registered for the same `grain_key()`. | {error, timeout} | {error, noproc} | {error, {already_in_use, partisan_remote_ref:p()}}. - register_name() -> register_name(?TIMEOUT). - -?DOC(""" -Registers the calling process with the `grain_key()` derived from its -`erleans:grain_ref()`. - -The duplicate check logic is executed by the caller concurrently while the -actual registration is serialised via the `erleans_pm` server process. - -Returns an error with the following reasons: -* `badgrain` if the calling process is not an Erleans grain. -* `timeout` if there was no response from the server within the requested time -* `{already_in_use, partisan_remote_ref:p()}` if there is already a process -registered for the same `grain_key()`. -"""). -spec register_name(timeout()) -> ok | {error, badgrain} | {error, timeout} | {error, {already_in_use, partisan_remote_ref:p()}}. - -register_name(Timeout) -> +register_name(_Timeout) -> case erleans:grain_ref() of undefined -> {error, badgrain}; - GrainRef -> - case lookup_local_pid(GrainRef) of - undefined -> - %% We get all known registrations order by location local < - %% node(), and then by node(). - Processes = lookup(GrainRef), - - case filter_alive(Processes) of - [] -> - safe_call( - ?MODULE, {register_name, GrainRef}, Timeout - ); - - [ProcRef|_] -> - %% We found at least one active grain that is - %% reachable, so we pick it. - {error, {already_in_use, ProcRef}} - end; - - Pid when Pid == self() -> - %% Idempotent, although it should not happen that the grain - %% calls us twice - ok; - - Pid when is_pid(Pid) -> - %% Anotehr local pid registered this name - ProcRef = partisan_remote_ref:from_term(Pid), - {error, {already_in_use, ProcRef}} - end + PartitionPid = select_partition(GrainRef), + erleans_registry_partition:register_name(PartitionPid, GrainRef) end. - ?DOC(""" -Unregisters a grain. This call fails with `badgrain` if the calling -process is not the original caller to `register_name/0`. - -This call is serialised through the `erleans_pm` server process. +Unregisters a grain from the appropriate partition. """). -spec unregister_name() -> ok | {error, badgrain}. - unregister_name() -> - %% Gets the calling process grain_ref case erleans:grain_ref() of undefined -> {error, badgrain}; - GrainRef -> - partisan_gen_server:call(?MODULE, {unregister_name, GrainRef}) + PartitionPid = select_partition(GrainRef), + erleans_registry_partition:unregister_name(PartitionPid, GrainRef) end. - ?DOC(""" -Returns a process reference for `GrainRef` unless there is no reference -in which case returns `undefined`. This function calls -`erleans_pm:whereis_name/2` passing the options `[safe]`. - -Notice that as we use an eventually consistent model and temporarily support -duplicated activations for a grain reference in different locations we could -have multiple instances in the global registry. This function chooses the -first reference in the list that represents a live process. Checking for -liveness incurs in a remote call for remote processes and thus can be -expensive in the presence of multiple instantiations. If you prefer to avoid -this check you can call `erleans_pm:whereis_name/2` passing [unsafe] as -the second argument. +Returns a process reference for `GrainRef` from the appropriate partition. """). -spec whereis_name(GrainRef :: erleans:grain_ref()) -> partisan_remote_ref:p() | undefined. - whereis_name(GrainRef) -> whereis_name(GrainRef, [safe]). - -?DOC(""" -Returns a process reference for `GrainRef` unless there is no reference -in which case returns `undefined`. -If the option `[safe]` is used it will return the process reference only if -its process is alive. Checking for liveness on remote processes incurs a -remote call. If there is no connection to the node in which the -process lives, it is deemed dead. - -If Opts is `[]` or `[unsafe]` the function will not check for liveness. -"""). -spec whereis_name(GrainRef :: erleans:grain_ref(), Opts :: [safe | unsafe]) -> partisan_remote_ref:p() | undefined. - -whereis_name(#{placement := stateless} = GrainRef, _) -> - whereis_stateless(GrainRef); - -whereis_name(#{placement := {stateless, _}} = GrainRef, _) -> - whereis_stateless(GrainRef); - -whereis_name(GrainRef, []) -> - whereis_name(GrainRef, [safe]); - -whereis_name(GrainRef, [_|T] = L) when T =/= [] -> - case lists:member(safe, L) of - true -> - whereis_name(GrainRef, [safe]); - - false -> - whereis_name(GrainRef, [unsafe]) - end; - -whereis_name(#{id := _} = GrainRef, [Flag]) -> - case lookup(GrainRef) of - [] -> - undefined; - - ProcRefs -> - pick(ProcRefs, [Flag]) - end. - +whereis_name(GrainRef, Opts) -> + PartitionPid = select_partition(GrainRef), + erleans_registry_partition:whereis_name(PartitionPid, GrainRef, Opts). ?DOC(""" -Lookups all the registered grains under name `GrainRef` and returns a list of -registered references for the ref. -The list sorts local references first. +Lookups all registered grains under name `GrainRef` from the appropriate partition. """). --spec lookup(GrainRef :: erleans:grain_ref() | grain_key()) -> - [partisan_remote_ref:p()]. - -lookup(#{id := _} = GrainRef) -> - lookup(grain_key(GrainRef)); - -lookup({_, _} = GrainKey) -> - case bondy_mst:get(?TREE, GrainKey) of - undefined -> - []; - - AWSet -> - sort_conflicting_values(AWSet) - end. - +-spec lookup(GrainRef :: erleans:grain_ref()) -> [partisan_remote_ref:p()]. +lookup(GrainRef) -> + PartitionPid = select_partition(GrainRef), + erleans_registry_partition:lookup(PartitionPid, GrainRef). ?DOC(""" -Returns the `erleans:grain_ref` for a pid or Partisan process reference. -This is more efficient than `erleans_grain:grain_ref` as it doesn't call the -grain process for local grains, which might be busy handling signals, but -instead uses this module's ets table used for monitoring. - -In case of a remote reference, this incurs in an RPC to the peer node where the -grain is activated. +Returns the `erleans:grain_ref` for a pid or process reference. """). -spec grain_ref(partisan:any_pid()) -> {ok, erleans:grain_ref()} | {error, timeout | any()}. - -grain_ref(Pid) when is_pid(Pid) -> - %% A local grain so we use the monitor table which is faster - case monitor_lookup(Pid) of - {Pid, GrainRef, _} -> - {ok, GrainRef}; - - undefined -> - {error, not_found} - end; - grain_ref(ProcRef) -> - %% We know this is not a pid so it must be a partisan process reference. - partisan:is_pid(ProcRef) orelse error({badarg, [ProcRef]}), - - case partisan_remote_ref:is_local(ProcRef) of - true -> - grain_ref(partisan_remote_ref:to_term(ProcRef)); - - false -> - %% We use RPC cause grain_ref uses ets directly so we avoid blocking - %% our peer process - Peer = partisan:node(ProcRef), - case partisan_rpc:call(Peer, ?MODULE, grain_ref, [ProcRef], 5000) of - {badrpc, Reason} -> - {error, Reason}; - - Result -> - Result - end - end. - + %% For grain_ref lookup, we need to try all partitions since we don't know + %% which partition the process belongs to. We can optimize this later. + try_all_partitions(fun(PartitionPid) -> + case erleans_registry_partition:grain_ref(PartitionPid, ProcRef) of + {ok, GrainRef} -> {found, GrainRef}; + {error, not_found} -> continue; + {error, Reason} -> {error, Reason} + end + end). ?DOC(""" -The same as calling `to_list([safe])`. +Returns the list of all registry entries from all partitions. """). -spec to_list() -> [{grain_key(), partisan_remote_ref:p()}]. - to_list() -> to_list([safe]). +-spec to_list([safe | unsafe]) -> [{grain_key(), partisan_remote_ref:p()}]. +to_list(Opts) -> + %% Collect results from all partitions + Workers = get_all_partition_pids(), + lists:flatten([ + erleans_registry_partition:to_list(PartitionPid, Opts) + || PartitionPid <- Workers + ]). ?DOC(""" -Returns the list of all registry entries. - -## Options -* `safe` - returns only entries for grains that are known to be alive -* `unsafe` - returns all entries without checking for liveness. +Returns information from all partitions. """). --spec to_list([safe | unsafe]) -> [{grain_key(), partisan_remote_ref:p()}]. - -to_list([Flag]) -> - L = bondy_mst:fold( - ?TREE, - fun({GrainKey, Value}, Acc) -> - case sets:to_list(state_awset:query(Value)) of - [] -> - Acc; - - L -> - case pick(L, [Flag]) of - undefined -> - Acc; - - ProcRef -> - [{GrainKey, ProcRef} | Acc] - end - end - end, - [] - ), - lists:reverse(L). - +info() -> + Workers = get_all_partition_pids(), + PartitionInfos = [ + erleans_registry_partition:info(PartitionPid) + || PartitionPid <- Workers + ], + #{ + num_partitions => length(Workers), + partitions => PartitionInfos + }. ?DOC(""" -Triggers a synchronisation exchange with a peer. -Calls `exchange/2` with an empty map as the second argument. +Triggers synchronization with a peer across all partitions. """). --spec sync(node()) -> {ok, pid()} | {error, term()}. - +-spec sync(node()) -> [ok | {error, term()}]. sync(Peer) -> sync(Peer, #{}). - -?DOC(""" -Triggers a synchronisation exchange with a peer. -"""). --spec sync(node(), map()) -> ok | {error, term()}. - +-spec sync(node(), map()) -> [ok | {error, term()}]. sync(Peer, Opts) -> - partisan_gen_server:call(?MODULE, {crdt_trigger, Peer, Opts}). - - -info() -> - partisan_gen_server:call(?MODULE, info). - - - -%% ============================================================================= -%% BONDY_MST_CRDT CALLBACKS -%% ============================================================================= - - -?DOC(""" -Implementation of the `bondy_mst_crdt` callback. -Casts message `Message` to this server on node `Peer` using `partisan`. -"""). -send(Peer, Message) -> - partisan_gen_server:cast({?MODULE, Peer}, {crdt_message, Message}). - - -?DOC(""" -Implementation of the `bondy_mst_crdt` callback. -Broadcasts message `Gossip` to peers using Plumtree (Epidemis broadcast trees). -"""). -broadcast(Gossip) -> - partisan:broadcast(Gossip, ?MODULE). - - -?DOC(""" -Implementation of the `bondy_mst_crdt` callback. -Removes stale entries and duplicates after merge. -"""). -on_merge(Peer) -> - partisan_gen_server:cast(?MODULE, {crdt_on_merge, Peer}). - - + Workers = get_all_partition_pids(), + [ + try + partisan_gen_server:call(PartitionPid, {crdt_trigger, Peer, Opts}) + catch + _:Reason -> {error, Reason} + end + || PartitionPid <- Workers + ]. %% ============================================================================= -%% PARTISAN_PLUMTREE_BROADCAST_HANDLER CALLBACKS +%% PARTITION SELECTION %% ============================================================================= - - -?DOC(""" -Implementation of the `partisan_plumtree_backend` callback. -Returns the channel to be used when broadcasting. -"""). --spec broadcast_channel() -> partisan:channel(). - -broadcast_channel() -> - application:get_env(erleans, partisan_broadcast_channel, undefined). - - -?DOC(""" -Implementation of the `partisan_plumtree_backend` callback. -Deconstructs a broadcast that is sent using `broadcast/2` returning the message -id and payload. - -> This function is part of the implementation of the -partisan_plumtree_broadcast_handler behaviour. -> You should never call it directly. -"""). --spec broadcast_data(Gossip :: bondy_mst_crdt:gossip()) -> - { - MessageId :: {bondy_mst_crdt:node_id(), bondy_mst:hash()}, - Payload :: bondy_mst_crdt:gossip() - }. - -broadcast_data(Gossip) -> - #{from := Peer, root := Root} = bondy_mst_crdt:gossip_data(Gossip), - {{Peer, Root}, Gossip}. - - ?DOC(""" -Implementation of the `partisan_plumtree_backend` callback. -Merges a remote copy of an object record sent via broadcast w/ the -local view for the key contained in the message id. If the remote copy is -causally older than the current data stored then `false` is returned and no -updates are merged. Otherwise, the remote copy is merged (possibly -generating siblings) and `true` is returned. - -> This function is part of the implementation of the -partisan_plumtree_broadcast_handler behaviour. -> You should never call it directly. -"""). --spec merge(GossipId :: gossip_id(), Payload :: bondy_mst_crdt:gossip()) -> - boolean(). - -merge(_Id, Gossip) -> - partisan_gen_server:call(?MODULE, {crdt_merge, Gossip}). - - -?DOC(""" -Implementation of the `partisan_plumtree_backend` callback. -Same as merge/2 but merges the object on `Node' - -> This function is part of the implementation of the -partisan_plumtree_broadcast_handler behaviour. -> You should never call it directly. +Selects the appropriate partition for a given GrainRef using gproc_pool. """). --spec merge( - Peer :: node(), - Root :: bondy_mst:hash(), - Payload :: bondy_mst_crdt:gossip()) -> boolean(). - -merge(Peer, _Root, Gossip) -> - partisan_gen_server:call({?MODULE, Peer}, {crdt_merge, Gossip}). - - -?DOC(""" -Implementation of the `partisan_plumtree_backend` callback. -When a peer broadcasts a message it does it to the nodes in its eager-push set -only, but also simultaneously sends I_HAVE notifications to nodes in its -lazy-push set instead of the entire message. This callback is the one that -Plumtree calls when receiving an I_HAVE message. - -The main idea is: -“I have seen a broadcast message with this root. If you need it, let me know.” - -This saves bandwidth, because instead of blindly sending every neighbor the full -payload, the node sends just the root hash. The lazy neighbors can decide -whether they need the full message or not. - -If function returns `true` then Plumtree will do nothing. However,if it returns -`false` then Plumtree will `graft` the message from the peer and send it to us. - -> This function is part of the implementation of the -partisan_plumtree_broadcast_handler behaviour. -> You should never call it directly. -"""). --spec is_stale(gossip_id()) -> boolean(). - -is_stale({Peer, Root}) -> - %% In our case the I_HAVE message is the root of the peer's tree, so we - %% always return `true` signaling Plumtree that we do not need the message, - %% and we send ourself a message to potentially init a merge with the peer - %% i.e. in this case we take the job of synchonising the CRDT in out hands - %% instead of relying on Plumtree. - ok = partisan_gen_server:cast(?MODULE, {crdt_maybe_merge, Peer, Root}), - true. - - -?DOC(""" -Implementation of the `partisan_plumtree_backend` callback. -In Plumtree this is used to return the object associated with the given prefixed -message id if the currently stored version has an equal context. Otherwise -returning the atom `stale`. - -Because it assumes that a grafted context can only be causally older than -the local view, a `stale` response means there is another message that -subsumes the grafted one. - -> This function is part of the implementation of the -partisan_plumtree_broadcast_handler behaviour. -> You should never call it directly. -"""). --spec graft(gossip_id()) -> - stale | {ok, bondy_mst_crdt:gossip()} | {error, term()}. - -graft({_Peer, _Root}) -> - %% In our case, the message_id is just the peer's root hash, so in case - %% we contain the root we return a Gossip message with our root. Otherwise - %% we return 'stale'. - %% partisan_gen_server:call(?MODULE, {crdt_graft, Peer, Root}). - {error, disabled}. - - -?DOC(""" -Calls `sync/1`. -"""). --spec exchange(node()) -> {ok, pid()} | {error, term()}. - -exchange(Peer) -> - exchange(Peer, #{}). - - -?DOC(""" -Calls `sync/2`. -"""). --spec exchange(node(), map()) -> ok | {error, term()}. - -exchange(Peer, Opts) -> - sync(Peer, Opts). - - - -%% ============================================================================= -%% PARTISAN_GEN_SERVER BEHAVIOR CALLBACKS -%% ============================================================================ - - - --spec init(Args :: term()) -> {ok, State :: t()}. - -init(_) -> - %% Trap exists otherwise terminate/1 won't be called when shutdown by - %% supervisor. - erlang:process_flag(trap_exit, true), - - %% Create or claim ets table. - %% If this server crashes, data will be preserved. - {ok, ?MONITOR_TAB} = erleans_table_owner:add_or_claim( - ?MONITOR_TAB, - [ - set, - protected, - named_table, - {keypos, 1}, - {write_concurrency, true}, - {read_concurrency, true}, - {decentralized_counters, true} - ] - ), - - %% We monitor all nodes so that we can cleanup our view of the registry - partisan:monitor_nodes(true), - - {channel, Channel} = lists:keyfind(channel, 1, partisan_gen:get_opts()), - - %% We wrap the tree using the exchange module - Node = partisan:node(), - Opts = #{ - %% MST opts - hash_algorithm => sha256, - merger => fun mst_merge_value/3, - store => bondy_mst_ets_store, - store_opts => #{ - name => atom_to_binary(?MODULE), - persistent => true - }, - %% CRDT opts - callback_mod => ?MODULE, - max_merges => 1, - max_merges_per_root => 1, - max_versions => 10, - version_ttl => timer:seconds(30), - fwd_bcast => false, - consistency_model => eventual - }, - - %% We create an ets-based MST bound to this process. - %% The ets table will be garbage collected if this process terminates. - CRDT = bondy_mst_crdt:new(Node, Opts), - Tree = bondy_mst_crdt:tree(CRDT), - - %% ets-based trees support read_concurrency (option store_opts.persistent) - %% so we can cache and share it using persistent_term to avoid a call to - %% this process. - ok = persistent_term:put(?PERSISTENT_KEY, Tree), - - State = #state{ - crdt = CRDT, - partisan_channel = Channel - }, - - {ok, State, {continue, monitor_existing}}. - - -handle_continue(monitor_existing, State0) -> - %% This prevents any grain to be registered as we are blocking the server - %% until we finish. - %% We fold the claimed ?MONITOR_TAB table to find any existing - %% registrations. In case the table is new, it would be empty. Otherwise, - %% we would iterate over registrations that were done by a previous - %% instance of this server before it crashed. - %% We re-register/monitor alive pids and remove dead ones. - Fun = fun - ({Pid, GrainRef, _OldMRef}, Acc0) -> - case erlang:is_process_alive(Pid) of - true -> - %% The process is still alive, but the monitor has died with - %% the previous instance of this gen_server, so we monitor - %% again. We use relaxed mode which allows us to update the - %% existing registration on ?MONITOR_TAB and the MST. - {_, Acc} = do_register_name(Acc0, GrainRef, Pid, relaxed), - Acc; - - false -> - %% The process has died, so we unregister. This will also - %% remove the registration from the MST. - {_, Acc} = do_unregister_name(Acc0, GrainRef, Pid), - Acc - end - end, - State = lists:foldl(Fun, State0, ets:tab2list(?MONITOR_TAB)), - - %% We should now have all existing local grains re-registered on this - %% server and broadcast messages sent to cluster peers. - {noreply, State}; - -handle_continue(_, State) -> - {noreply, State}. - -handle_call({register_name, GrainRef}, {Caller, _}, State0) -when is_pid(Caller) -> - {Reply, State} = do_register_name(State0, GrainRef, Caller), - {reply, Reply, State}; - -handle_call({register_name, _}, _From, State) -> - %% A call from a remote node, not allowed - {reply, {error, not_local}, State}; - -handle_call({unregister_name, GrainRef}, {Caller, _}, State0) -when is_pid(Caller) -> - {Reply, State} = do_unregister_name(State0, GrainRef, Caller), - {reply, Reply, State}; - -handle_call({unregister_name, _}, _From, State) -> - %% A call from a remote node, now allowed - {reply, {error, not_local}, State}; - -handle_call({register_name_test, GrainRef, ProcRef}, _From, State0) -> - %% Used for testing only - {Reply, State} = do_register_name_test(State0, GrainRef, ProcRef), - {reply, Reply, State}; - -handle_call({unregister_name_test, GrainRef, ProcRef}, _From, State0) -> - %% Used for testing only - Key = grain_key(GrainRef), - {Reply, State} = do_unregister_name_test(State0, Key, ProcRef), - {reply, Reply, State}; - -handle_call({add_test, GrainRef, ProcRef}, _From, State0) -> - %% Used for testing only - Key = grain_key(GrainRef), - State = add(State0, Key, ProcRef, partisan_remote_ref:node(ProcRef)), - {reply, ok, State}; - -handle_call({remove_test, GrainRef, ProcRef}, _From, State0) -> - %% Used for testing only - Key = grain_key(GrainRef), - State = remove(State0, Key, ProcRef, partisan_remote_ref:node(ProcRef)), - {reply, ok, State}; - -handle_call({crdt_merge, Gossip}, _From, State) -> - CRDT0 = State#state.crdt, - Root0 = bondy_mst_crdt:root(CRDT0), - CRDT = bondy_mst_crdt:handle(CRDT0, Gossip), - Root = bondy_mst_crdt:root(CRDT), - - %% Required by Plumtree. - %% Merges a remote copy of an object record sent via broadcast w/ the - %% local view for the key contained in the message id. If the remote copy is - %% causally older than the current data stored then `false` is returned and - %% no updates are merged. Otherwise, the remote copy is merged (possibly - %% generating siblings) and `true` is returned. - %% Since we will performing a merge if required during - %% bondy_mst_crdt:handle/2 we reply `false`. - Reply = Root =/= Root0, - {reply, Reply, State#state{crdt = CRDT}}; - -handle_call({crdt_trigger, Peer, _Opts}, _From, State) -> - Reply = bondy_mst_crdt:trigger(State#state.crdt, Peer), - {reply, Reply, State}; - -handle_call(info, _From, State) -> - Reply = #{ - tree => #{ - root => bondy_mst_crdt:root(State#state.crdt) - }, - local_registry => #{ - memory => ets:info(?MONITOR_TAB, memory), - size => ets:info(?MONITOR_TAB, size) - } - }, - {reply, Reply, State}; - -handle_call(_Request, _From, State) -> - {reply, {error, unknown_call}, State}. - - --spec handle_cast(Request :: term(), State :: t()) -> - {noreply, NewState :: t()}. - -handle_cast({crdt_maybe_merge, Peer, Root}, State) -> - Root == bondy_mst_crdt:root(State#state.crdt) - andalso bondy_mst_crdt:trigger(State#state.crdt, Peer), - {noreply, State}; - -handle_cast({crdt_on_merge, _Peer}, #state{initial_sync = false} = State0) -> - State = remove_stale(State0#state{initial_sync = true}), - ok = maybe_deactivate_local_duplicates(State), - {noreply, State}; - -handle_cast({crdt_on_merge, _Peer}, #state{initial_sync = true} = State) -> - ok = maybe_deactivate_local_duplicates(State), - {noreply, State}; - -handle_cast({crdt_message, Msg}, State) -> - %% Fwd message to bondy_mst_crdt - CRDT = bondy_mst_crdt:handle(State#state.crdt, Msg), - {noreply, State#state{crdt = CRDT}}; - -handle_cast({force_unregister_name, GrainKey, ProcRef}, State0) -> - %% Internal case to deal with inconsistencies - case partisan_remote_ref:is_local(ProcRef) of - true -> - Pid = partisan_remote_ref:to_pid(ProcRef), - {_, State} = do_unregister_name(State0, GrainKey, Pid), - {noreply, State}; - +-spec select_partition(erleans:grain_ref()) -> pid(). +select_partition(GrainRef) -> + GrainKey = grain_key(GrainRef), + case gproc_pool:pick_worker(erleans_registry_pool, GrainKey) of false -> - {noreply, State0} - end; - -handle_cast(_Request, State) -> - {noreply, State}. - - --spec handle_info(Message :: term(), State :: t()) -> - {noreply, NewState :: t()}. - -handle_info({'ETS-TRANSFER', erleans_pm_monitor, _, []}, State) -> - {noreply, State}; - -handle_info({nodedown, Node}, State) -> - CRDT = bondy_mst_crdt:cancel_merge(State#state.crdt, Node), - {noreply, State#state{crdt = CRDT}}; - -handle_info({nodeup, _Node}, State) -> - {noreply, State}; - -handle_info({'DOWN', MRef, process, Pid, _Info}, State0) when is_pid(Pid) -> - %% Registered (monitored) grain exit - ?LOG_INFO("Grain down ~p", [{Pid, MRef}]), - {_, State} = do_unregister_process(State0, Pid), - {noreply, State}; - -handle_info(Event, State) -> - ?LOG_INFO("Received unknown event ~p", [Event]), - {noreply, State}. - - --spec terminate( - Reason :: (normal | shutdown | {shutdown, term()} | term()), - State :: t()) -> ok. - -terminate(_Reason, State) -> - ok = unregister_all_local(State), - _ = persistent_term:erase(?PERSISTENT_KEY), - ok. - - + error({no_partition_available, GrainKey}); + Pid when is_pid(Pid) -> + Pid + end. %% ============================================================================= %% PRIVATE %% ============================================================================= - -%% @private -safe_call(ServerRef, Cmd) -> - safe_call(ServerRef, Cmd, ?TIMEOUT). - - -%% @private -safe_call(ServerRef, Cmd, Timeout) -> - try - partisan_gen_server:call(ServerRef, Cmd, Timeout) - catch - _:Reason:_ -> - {error, Reason} - end. - - -%% @private -add(#state{} = State, GrainKey, Value) -> - add(#state{} = State, GrainKey, Value, partisan:node()). - - -%% @private -add(#state{crdt = CRDT0} = State, Key, Value, Node) -> - Tree = bondy_mst_crdt:tree(CRDT0), - - AWSet1 = - case bondy_mst:get(Tree, Key) of - undefined -> - state_awset:new(); - - AWSet0 -> - AWSet0 - end, - - {ok, AWSet} = state_type:mutate({add, Value}, Node, AWSet1), - CRDT = bondy_mst_crdt:put(CRDT0, Key, AWSet), - State#state{crdt = CRDT}. - - -%% @private -remove(State, GrainKey, Value) -> - remove(State, GrainKey, Value, partisan:node()). - - -%% @private -remove(State, Key, Value, Node) -> - remove(State, Key, Value, Node, #{}). - - -%% @private -remove(#state{crdt = CRDT0} = State, Key, Value, Node, Opts) -> - CRDT = crdt_remove(CRDT0, Key, Value, Node, Opts), - State#state{crdt = CRDT}. - - -%% @private -crdt_remove(CRDT, Key, Value, Node, Opts) -> - Tree = bondy_mst_crdt:tree(CRDT), - AWSet1 = - case bondy_mst:get(Tree, Key) of - undefined -> - state_awset:new(); - - AWSet0 -> - AWSet0 - end, - {ok, AWSet} = state_type:mutate({rmv, Value}, Node, AWSet1), - bondy_mst_crdt:put(CRDT, Key, AWSet, Opts). - - -%% @private -awset_remove(AWSet0, Value) -> - {ok, AWSet} = state_type:mutate({rmv, Value}, partisan:node(), AWSet0), - AWSet. - - -%% @private -is_monitored(Pid) -> - monitor_lookup(Pid) =/= undefined. - - -%% @private -monitor_lookup(Pid) -> - case ets:lookup(?MONITOR_TAB, Pid) of - [Monitor] -> - Monitor; - - _ -> - undefined - end. - -%% @private -lookup_local_pid(GrainRef) -> - case ets:match_object(?MONITOR_TAB, {'_', GrainRef, '_'}) of - [{Pid, GrainRef, _}] -> - Pid; - - _ -> - undefined - end. - - -%% @private -mst_merge_value(GrainKey, AWSet1, AWSet2) -> - %% We merge de CRDTs - AWSet3 = state_awset:merge(AWSet1, AWSet2), - %% We remove local grains that have been deactivated - AWSet = remove_deactivated(AWSet3), - ?LOG_DEBUG(#{ - description => "Merged values", - key => GrainKey, - rhs => AWSet1, - lhs => AWSet2, - result => AWSet - }), - ok = maybe_deactivate_local_duplicate(GrainKey, AWSet), - AWSet. - - -%% @private --spec remove_deactivated(state_awset:state_awset()) -> - state_awset:state_awset(). - -remove_deactivated(AWSet) -> - Fun = fun(ProcRef, Acc) -> - maybe - true ?= partisan_remote_ref:is_local(ProcRef), - Pid ?= partisan_remote_ref:to_pid(ProcRef), - undefined ?= monitor_lookup(Pid), - %% Not monitored so it has been deactivated i.e. the peer node has a - %% stale entry. We remove it from the set. - ?LOG_DEBUG(#{ - message => "Removing grain from registry", - process_ref => ProcRef, - reason => deactivated - }), - awset_remove(Acc, ProcRef) - else - false -> - %% Not local, so we ignore it - Acc; - - {_Pid, _GrainRef, _Mref} -> - %% Monitored, so we ignore it - Acc - - end - end, - sets:fold(Fun, AWSet, state_awset:query(AWSet)). - - -%% This function assumes remove_deactivated/2 was called on AWSet before. -maybe_deactivate_local_duplicate(GrainKey, AWSet) -> - All = sets:to_list(state_awset:query(AWSet)), - - maybe - %% Partition based on locality - {[ProcRef], [_ | _] = Remotes} ?= - lists:partition(fun partisan_remote_ref:is_local/1, All), - %% We have duplicates, so we need to check if our local duplicate should - %% belong here. - false ?= safe_is_location_right(GrainKey, ProcRef), - %% The grain should not be here, so we will deactivate but only if - %% we can reach any of the remote duplicates - true ?= lists:any(fun ?MODULE:is_reachable/1, Remotes), - %% Since at least one remote grain is reachable, we deactivate the - %% local one - deactivate_grain(GrainKey, ProcRef) - else - _ -> - ok - end. - - -%% @private -maybe_deactivate_local_duplicates(#state{crdt = CRDT}) -> - Tree = bondy_mst_crdt:tree(CRDT), - Fun = fun({Key, AWSet}) -> maybe_deactivate_local_duplicate(Key, AWSet) end, - bondy_mst:foreach(Tree, Fun). - - -%% @private -safe_is_location_right({_, Mod}, LocalPRef) -> - try - Pid = partisan_remote_ref:to_pid(LocalPRef), - erleans_grain:is_location_right(Mod, Pid) - catch - Class:Reason:Stacktrace -> - ?LOG_WARNING(#{ - message => - "erleans_grain:is_location_right/2 failed. " - "Returning true by default", - implementing_module => Mod, - process_ref => LocalPRef, - class => Class, - reason => Reason, - stacktrace => Stacktrace - }), - true - end. - - -remove_stale(#state{crdt = CRDT} = State) -> - Tree = bondy_mst_crdt:tree(CRDT), - Fun = fun({Key, AWSet}, Acc) -> remove_stale(Acc, Key, AWSet) end, - bondy_mst:fold(Tree, Fun, State). - - -%% @private -remove_stale(State, Key, AWSet) -> - Set = state_awset:query(AWSet), - Fun = fun(ProcRef, Acc) -> - maybe - true ?= partisan_remote_ref:is_local(ProcRef), - Pid ?= partisan_remote_ref:to_pid(ProcRef), - undefined ?= monitor_lookup(Pid), - %% Not monitored so it has been deactivated i.e. the peer node has a - %% stale entry. We remove it from the set. - ?LOG_DEBUG(#{ - message => "Removing grain from registry", - process_ref => ProcRef, - reason => deactivated - }), - %% We disable broadcasting - remove(Acc, Key, ProcRef, partisan:node(), #{broadcast => false}) - else - _ -> - Acc - end - end, - sets:fold(Fun, State, Set). - - -%% @private -sort_conflicting_values(AWSet) -> - Set = state_awset:query(AWSet), - lists:sort( - fun(A, B) -> - Result = { - partisan_remote_ref:is_local(A), - partisan_remote_ref:is_local(B) - }, - case Result of - {true, _} -> - true; - - {_, true} -> - false; - - {false, false} -> - A =< B - end - end, - sets:to_list(Set) - ). - - -%% @private -%% Register the calling process with GrainRef unless another local -%% registration exists. --spec do_register_name(t(), GrainRef :: erleans:grain_ref(), Pid :: pid()) -> - {ok, t()} | {{error, {already_in_use, partisan_remote_ref:p()}}, t()}. - -do_register_name(State, GrainRef, Pid) -> - do_register_name(State, GrainRef, Pid, strict). - - -%% @private -%% Register the calling process with GrainRef unless another local -%% registration exists. --spec do_register_name( - t(), GrainRef :: erleans:grain_ref(), Pid :: pid(), strict | relaxed) -> - {ok, t()} | {{error, {already_in_use, partisan_remote_ref:p()}}, t()}. - -do_register_name(State0, GrainRef, Pid, Mode) when is_pid(Pid) -> - case monitor(GrainRef, Pid, Mode) of - ok -> - Key = grain_key(GrainRef), - Value = partisan_remote_ref:from_term(Pid), - State = add(State0, Key, Value), - {ok, State}; - - {error, _} = Error -> - {Error, State0} - end. - - -%% Used for testing only (see export of register_name/2) -do_register_name_test(State0, GrainRef, ProcRef) -> - Key = grain_key(GrainRef), - State = add(State0, Key, ProcRef, partisan:node(ProcRef)), - {ok, State}. - - -%% @private --spec do_unregister_process(t(), Pid :: pid()) -> {ok, t()}. - -do_unregister_process(State0, Pid) when is_pid(Pid) -> - case monitor_lookup(Pid) of - {Pid, GrainRef, _} -> - Key = grain_key(GrainRef), - do_unregister_name(State0, Key, Pid); - undefined -> - {ok, State0} - end. - - -%% @private --spec do_unregister_name(t(), GrainKey :: grain_key(), Pid :: pid()) -> - {ok, t()}. - -do_unregister_name(State0, GrainKey, Pid) when is_pid(Pid) -> - %% Demonitor - ok = demonitor(Pid), - true = ets:delete(?MONITOR_TAB, Pid), - - Value = partisan_remote_ref:from_term(Pid), - State = remove(State0, GrainKey, Value), - {ok, State}. - - -do_unregister_name_test(State0, GrainKey, ProcRef) -> - State = remove(State0, GrainKey, ProcRef, partisan:node(ProcRef)), - {ok, State}. - - %% @private +-spec grain_key(erleans:grain_ref()) -> {term(), module()}. grain_key(#{id := Id, implementing_module := Mod}) -> {Id, Mod}. - -%% @private -monitor(GrainRef, Pid, strict) when is_pid(Pid) -> - Mref = erlang:monitor(process, Pid), - - case ets:insert_new(?MONITOR_TAB, {Pid, GrainRef, Mref}) of - true -> - ok; - - false -> - true = erlang:demonitor(Mref, [flush]), - {OtherPid, GrainRef, _} = monitor_lookup(Pid), - {error, {already_in_use, partisan_remote_ref:from_term(OtherPid)}} - end; - -monitor(GrainRef, Pid, relaxed) when is_pid(Pid) -> - Mref = erlang:monitor(process, Pid), - true = ets:insert(?MONITOR_TAB, {Pid, GrainRef, Mref}), - ok. - - %% @private -demonitor(Pid) -> - case ets:take(?MONITOR_TAB, Pid) of - [{Pid, _, Mref}] -> - true = erlang:demonitor(Mref, [flush]), - ok; +-spec get_num_partitions() -> pos_integer(). +get_num_partitions() -> + erleans_config:get(pm_partitions, 1). +%% @private +-spec get_all_partition_pids() -> [pid()]. +get_all_partition_pids() -> + case gproc_pool:active_workers(erleans_registry_pool) of [] -> - ok - end. - - -%% @private --spec deactivate_grain(grain_key(), partisan_remote_ref:t()) -> ok. - -deactivate_grain(GrainKey, ProcRef) -> - %% This call is async (uses a cast) so we are safe to do it - case erleans_grain:deactivate(ProcRef) of - ok -> - ?LOG_NOTICE(#{ - description => "Succeded to deactivate duplicate", - grain => GrainKey, - pid => ProcRef - }), - ok; - - {error, Reason} when Reason == not_found; Reason == not_active -> - ?LOG_ERROR(#{ - description => "Failed to deactivate duplicate", - grain => GrainKey, - pid => ProcRef, - reason => Reason - }), - %% This is an inconsistency, we need to cleanup. - %% We ask the peer to do it, via a private cast (peer can be us) - partisan_gen_server:cast( - {?MODULE, partisan_remote_ref:node(ProcRef)}, - {force_unregister_name, GrainKey, ProcRef} - ); - - {error, Reason} -> - ?LOG_ERROR(#{ - description => "Failed to deactivate duplicate", - grain => GrainKey, - pid => ProcRef, - reason => Reason - }), - ok - end. - - -%% @private -whereis_stateless(GrainRef) -> - case gproc_pool:pick_worker(GrainRef) of - false -> - undefined; - Pid -> - partisan_remote_ref:from_term(Pid) + []; + Workers -> + [Pid || {_, Pid} <- Workers] end. - %% @private -pick([], _) -> - undefined; - -pick(L, []) -> - pick(L, [unsafe]); - -pick([H], [unsafe]) -> - H; - -pick([H | _], [unsafe]) -> - H; - -pick(List, [safe]) -> - pick_alive(List). - +-spec try_all_partitions(fun((pid()) -> continue | {found, term()} | {error, term()})) -> + {ok, term()} | {error, not_found}. +try_all_partitions(Fun) -> + Workers = get_all_partition_pids(), + try_partitions(Fun, Workers). %% @private -pick_alive([H | T]) -> - try partisan:is_process_alive(H) of - true -> - H; +try_partitions(_Fun, []) -> + {error, not_found}; - false -> - pick_alive(T) - - catch - error:_ -> - pick_alive(T) - end; - -pick_alive([]) -> - undefined. - - -%% @private -%% Returns a new list where all the process references are know to be -%% reachable. If the remote check fails, it returns false. -filter_alive(undefined) -> - []; - -filter_alive(ProcRefs) when is_list(ProcRefs) -> - lists:filter( - fun(ProcRef) -> - try - partisan:is_process_alive(ProcRef) - catch - _:_ -> - false - end - end, - ProcRefs - ). - - -%% @private -is_reachable(ProcRef) -> +try_partitions(Fun, [PartitionPid | Rest]) -> try - partisan:is_connected(partisan:node(ProcRef)) + case Fun(PartitionPid) of + continue -> + try_partitions(Fun, Rest); + {found, Result} -> + {ok, Result}; + {error, _} = Error -> + Error + end catch _:_ -> - false + try_partitions(Fun, Rest) end. - - -%% @private -%% Unregisters all local alive processes. --spec unregister_all_local(t()) -> ok. - -unregister_all_local(State) -> - true = ets:safe_fixtable(?MONITOR_TAB, true), - try - unregister_local(State, ets:first(?MONITOR_TAB)) - catch - Class:Reason:Stacktrace -> - ?LOG_ERROR(#{ - message => "Unexpected error", - class => Class, - reason => Reason, - stacktrace => Stacktrace - }), - ok - after - true = ets:safe_fixtable(?MONITOR_TAB, false) - end. - - -%% @private -unregister_local(State0, Pid) when is_pid(Pid) -> - %% {Pid, GrainRef, MRef} - GrainRef = ets:lookup_element(?MONITOR_TAB, Pid, 2), - {ok, State} = do_unregister_name(State0, GrainRef, Pid), - unregister_local(State, ets:next(?MONITOR_TAB, Pid)); - -%% unregister_local(State, #{id := _} = GrainRef) -> -%% %% Ignore as we have two entries per registration -%% %% {Pid, GrainRef} and {GrainRef, Pid}, we just use the first -%% unregister_local(State, ets:next(?MONITOR_TAB, GrainRef)); - -unregister_local(_, '$end_of_table') -> - ok. - - - %% ============================================================================= %% TEST %% ============================================================================= - - -ifdef(TEST). - - -%% Registers the calling process with the `id` attribute of `GrainRef`. -%% This call is serialised the `erleans_pm` server process. --spec register_name_(erleans:grain_ref(), partisan_remote_ref:p()) -> - ok - | {error, {already_in_use, partisan_remote_ref:p()}}. - -register_name_(GrainRef, ProcRef) -> - partisan_gen_server:call(?MODULE, {register_name_test, GrainRef, ProcRef}). - - - -%% It can only be called by the caller -%% This call is serialised the `erleans_pm` server process. --spec unregister_name_(erleans:grain_ref(), partisan_remote_ref:p()) -> - ok | {error, badgrain | not_owner}. - -unregister_name_(#{id := _} = GrainRef, ProcRef) -> - partisan_gen_server:call( - ?MODULE, {unregister_name_test, GrainRef, ProcRef} - ). - - -%% Registers the calling process with the `id` attribute of `GrainRef`. -%% This call is serialised the `erleans_pm` server process. --spec add_(erleans:grain_ref(), partisan_remote_ref:p()) -> - ok - | {error, {already_in_use, partisan_remote_ref:p()}}. - +%% For testing - route to appropriate partition add_(GrainRef, ProcRef) -> - partisan_gen_server:call(?MODULE, {add_test, GrainRef, ProcRef}). + PartitionPid = select_partition(GrainRef), + erleans_registry_partition:add_(PartitionPid, GrainRef, ProcRef). +remove_(GrainRef, ProcRef) -> + PartitionPid = select_partition(GrainRef), + erleans_registry_partition:remove_(PartitionPid, GrainRef, ProcRef). -%% It can only be called by the caller -%% This call is serialised the `erleans_pm` server process. --spec remove_(erleans:grain_ref(), partisan_remote_ref:p()) -> - ok | {error, badgrain | not_owner}. - -remove_(#{id := _} = GrainRef, ProcRef) -> - partisan_gen_server:call( - ?MODULE, {remove_test, GrainRef, ProcRef} - ). - - --endif. - +register_name_(GrainRef, ProcRef) -> + PartitionPid = select_partition(GrainRef), + erleans_registry_partition:register_name_(PartitionPid, GrainRef, ProcRef). +unregister_name_(GrainRef, ProcRef) -> + PartitionPid = select_partition(GrainRef), + erleans_registry_partition:unregister_name_(PartitionPid, GrainRef, ProcRef). +-endif. \ No newline at end of file diff --git a/src/erleans_registry_partition.erl b/src/erleans_registry_partition.erl new file mode 100644 index 0000000..481cf4f --- /dev/null +++ b/src/erleans_registry_partition.erl @@ -0,0 +1,975 @@ +%% ----------------------------------------------------------------------------- +%% Copyright Tristan Sloughter 2019. All Rights Reserved. +%% Copyright Leapsight 2020 - 2023. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% ----------------------------------------------------------------------------- + +-module(erleans_registry_partition). + +-feature(maybe_expr, enable). + +-behaviour(bondy_mst_crdt). +-behaviour(partisan_gen_server). +-behaviour(partisan_plumtree_broadcast_handler). + +-include_lib("kernel/include/logger.hrl"). +-include_lib("partisan/include/partisan.hrl"). +-include("erleans.hrl"). + +-moduledoc #{format => "text/markdown"}. +?MODULEDOC(""" +This module implements a single partition of the Erleans grain process registry. +Each partition is a State-based CRDT using bondy_mst that manages a subset of grains. + +The server state consists of the following elements: +* A set of local monitor references with form +`{pid(), erleans:grain_ref(), reference()}` for every local registration. +This is stored in a protected `ets` set table managed by the +`erleans_table_owner` process to ensure the table survives this +server's crashes. +* A distributed and globally-replicated set of mappings from +`grain_key()` to a single `partisan_remote_ref:p()`. +This is stored on `bondy_mst`. +"""). + +-define(PERSISTENT_KEY(PartitionId), {?MODULE, tree, PartitionId}). +-define(TREE(PartitionId), persistent_term:get(?PERSISTENT_KEY(PartitionId))). +-define(MONITOR_TAB(PartitionId), list_to_atom("erleans_registry_partition_monitor_" ++ integer_to_list(PartitionId))). +-define(TIMEOUT, 15000). + +%% This server may receive a huge amount of messages. +%% We make sure that they are stored off heap to avoid excessive GCs. +-define(OPTS, [ + {channel, application:get_env(erleans, partisan_channel, undefined)}, + {spawn_opt, [{message_queue_data, off_heap}]} +]). + +-record(state, { + partition_id :: pos_integer(), + crdt :: bondy_mst_crdt:t(), + partisan_channel :: partisan:channel(), + initial_sync = false :: boolean() +}). + +-type t() :: #state{}. +-type grain_key() :: {GrainId :: any(), ImplMod :: module()}. +-type gossip_id() :: { + Peer :: bondy_mst_crdt:node_id(), + Root :: bondy_mst:hash() + }. + +%% API - Partition-specific functions (called by erleans_pm router) +-export([start_link/2]). +-export([register_name/2]). +-export([unregister_name/2]). +-export([whereis_name/2]). +-export([whereis_name/3]). +-export([lookup/2]). +-export([grain_ref/2]). +-export([to_list/1]). +-export([to_list/2]). +-export([info/1]). + +%% BONDY_MST_CRDT CALLBACKS +-export([broadcast/1]). +-export([on_merge/1]). +-export([send/2]). +-export([sync/1]). + +%% PARTISAN_PLUMTREE_BROADCAST_HANDLER CALLBACKS +-export([broadcast_data/1]). +-export([broadcast_channel/0]). +-export([exchange/1]). +-export([exchange/2]). +-export([graft/1]). +-export([is_stale/1]). +-export([merge/2]). + +%% PARTISAN_GEN_SERVER CALLBACKS +-export([init/1]). +-export([handle_continue/2]). +-export([handle_call/3]). +-export([handle_cast/2]). +-export([handle_info/2]). +-export([terminate/2]). + +%% TEST API +-ifdef(TEST). + -export([add_/3]). + -export([remove_/3]). + -export([register_name_/3]). + -export([unregister_name_/3]). +-endif. + +-compile({no_auto_import, [monitor/2]}). +-compile({no_auto_import, [monitor/3]}). +-compile({no_auto_import, [demonitor/1]}). +-compile({no_auto_import, [demonitor/2]}). + + +%% ============================================================================= +%% API +%% ============================================================================= + +?DOC(""" +Starts a partition server with given ID and registers it in gproc_pool. +"""). +-spec start_link(pos_integer(), atom()) -> {ok, pid()} | {error, term()}. +start_link(PartitionId, PoolName) -> + partisan_gen_server:start_link(?MODULE, [PartitionId, PoolName], ?OPTS). + +?DOC(""" +Registers a grain in this specific partition. +"""). +-spec register_name(pid(), erleans:grain_ref()) -> + ok | {error, badgrain} | {error, timeout} | {error, {already_in_use, partisan_remote_ref:p()}}. +register_name(PartitionPid, GrainRef) -> + partisan_gen_server:call(PartitionPid, {register_name, GrainRef}, ?TIMEOUT). + +?DOC(""" +Unregisters a grain from this specific partition. +"""). +-spec unregister_name(pid(), erleans:grain_ref()) -> ok | {error, badgrain}. +unregister_name(PartitionPid, GrainRef) -> + partisan_gen_server:call(PartitionPid, {unregister_name, GrainRef}). + +?DOC(""" +Returns a process reference for GrainRef from this partition. +"""). +-spec whereis_name(pid(), erleans:grain_ref()) -> + partisan_remote_ref:p() | undefined. +whereis_name(PartitionPid, GrainRef) -> + whereis_name(PartitionPid, GrainRef, [safe]). + +?DOC(""" +Returns a process reference for GrainRef from this partition with options. +"""). +-spec whereis_name(pid(), erleans:grain_ref(), [safe | unsafe]) -> + partisan_remote_ref:p() | undefined. +whereis_name(PartitionPid, GrainRef, Opts) -> + partisan_gen_server:call(PartitionPid, {whereis_name, GrainRef, Opts}). + +?DOC(""" +Lookups all registered grains under name GrainRef in this partition. +"""). +-spec lookup(pid(), erleans:grain_ref()) -> [partisan_remote_ref:p()]. +lookup(PartitionPid, GrainRef) -> + partisan_gen_server:call(PartitionPid, {lookup, GrainRef}). + +?DOC(""" +Returns the grain_ref for a process reference from this partition. +"""). +-spec grain_ref(pid(), partisan:any_pid()) -> + {ok, erleans:grain_ref()} | {error, timeout | any()}. +grain_ref(PartitionPid, ProcRef) -> + partisan_gen_server:call(PartitionPid, {grain_ref, ProcRef}). + +?DOC(""" +Returns list of all registry entries from this partition. +"""). +-spec to_list(pid()) -> [{grain_key(), partisan_remote_ref:p()}]. +to_list(PartitionPid) -> + to_list(PartitionPid, [safe]). + +-spec to_list(pid(), [safe | unsafe]) -> [{grain_key(), partisan_remote_ref:p()}]. +to_list(PartitionPid, Opts) -> + partisan_gen_server:call(PartitionPid, {to_list, Opts}). + +info(PartitionPid) -> + partisan_gen_server:call(PartitionPid, info). + +%% ============================================================================= +%% BONDY_MST_CRDT CALLBACKS +%% ============================================================================= + +?DOC(""" +Implementation of the `bondy_mst_crdt` callback. +Casts message `Message` to this server on node `Peer` using `partisan`. +"""). +send(Peer, Message) -> + partisan_gen_server:cast({?MODULE, Peer}, {crdt_message, Message}). + +?DOC(""" +Implementation of the `bondy_mst_crdt` callback. +Broadcasts message `Gossip` to peers using Plumtree. +"""). +broadcast(Gossip) -> + partisan:broadcast(Gossip, ?MODULE). + +?DOC(""" +Implementation of the `bondy_mst_crdt` callback. +Removes stale entries and duplicates after merge. +"""). +on_merge(Peer) -> + partisan_gen_server:cast(?MODULE, {crdt_on_merge, Peer}). + +%% ============================================================================= +%% PARTISAN_PLUMTREE_BROADCAST_HANDLER CALLBACKS +%% ============================================================================= + +?DOC(""" +Implementation of the `partisan_plumtree_backend` callback. +Returns the channel to be used when broadcasting. +"""). +-spec broadcast_channel() -> partisan:channel(). +broadcast_channel() -> + application:get_env(erleans, partisan_broadcast_channel, undefined). + +?DOC(""" +Implementation of the `partisan_plumtree_backend` callback. +"""). +-spec broadcast_data(Gossip :: bondy_mst_crdt:gossip()) -> + {MessageId :: {bondy_mst_crdt:node_id(), bondy_mst:hash()}, Payload :: bondy_mst_crdt:gossip()}. +broadcast_data(Gossip) -> + #{from := Peer, root := Root} = bondy_mst_crdt:gossip_data(Gossip), + {{Peer, Root}, Gossip}. + +-spec merge(GossipId :: gossip_id(), Payload :: bondy_mst_crdt:gossip()) -> boolean(). +merge(_Id, Gossip) -> + partisan_gen_server:call(?MODULE, {crdt_merge, Gossip}). + +-spec merge(Peer :: node(), Root :: bondy_mst:hash(), Payload :: bondy_mst_crdt:gossip()) -> boolean(). +merge(Peer, _Root, Gossip) -> + partisan_gen_server:call({?MODULE, Peer}, {crdt_merge, Gossip}). + +-spec is_stale(gossip_id()) -> boolean(). +is_stale({Peer, Root}) -> + ok = partisan_gen_server:cast(?MODULE, {crdt_maybe_merge, Peer, Root}), + true. + +-spec graft(gossip_id()) -> stale | {ok, bondy_mst_crdt:gossip()} | {error, term()}. +graft({_Peer, _Root}) -> + {error, disabled}. + +-spec exchange(node()) -> {ok, pid()} | {error, term()}. +exchange(Peer) -> + exchange(Peer, #{}). + +-spec exchange(node(), map()) -> ok | {error, term()}. +exchange(Peer, Opts) -> + sync(Peer, Opts). + +?DOC(""" +Triggers a synchronisation exchange with a peer. +"""). +sync(Peer) -> + sync(Peer, #{}). + +sync(Peer, Opts) -> + partisan_gen_server:call(?MODULE, {crdt_trigger, Peer, Opts}). + +%% ============================================================================= +%% PARTISAN_GEN_SERVER BEHAVIOR CALLBACKS +%% ============================================================================= + +-spec init([pos_integer() | atom()]) -> {ok, State :: t()}. +init([PartitionId, PoolName]) -> + erlang:process_flag(trap_exit, true), + + MonitorTab = ?MONITOR_TAB(PartitionId), + {ok, MonitorTab} = erleans_table_owner:add_or_claim( + MonitorTab, + [ + set, + protected, + named_table, + {keypos, 1}, + {write_concurrency, true}, + {read_concurrency, true}, + {decentralized_counters, true} + ] + ), + + partisan:monitor_nodes(true), + + {channel, Channel} = lists:keyfind(channel, 1, partisan_gen:get_opts()), + + Node = partisan:node(), + Opts = #{ + hash_algorithm => sha256, + merger => fun(GrainKey, AWSet1, AWSet2) -> + mst_merge_value(PartitionId, GrainKey, AWSet1, AWSet2) + end, + store => bondy_mst_ets_store, + store_opts => #{ + name => atom_to_binary(list_to_atom("erleans_registry_partition_" ++ integer_to_list(PartitionId))), + persistent => true + }, + callback_mod => ?MODULE, + max_merges => 1, + max_merges_per_root => 1, + max_versions => 10, + version_ttl => timer:seconds(30), + fwd_bcast => false, + consistency_model => eventual + }, + + CRDT = bondy_mst_crdt:new(Node, Opts), + Tree = bondy_mst_crdt:tree(CRDT), + + ok = persistent_term:put(?PERSISTENT_KEY(PartitionId), Tree), + + State = #state{ + partition_id = PartitionId, + crdt = CRDT, + partisan_channel = Channel + }, + + + %% Connect to gproc_pool immediately + case gproc_pool:connect_worker(PoolName, {partition, PartitionId}) of + true -> + ?LOG_INFO("Successfully connected partition ~p to pool ~p", [PartitionId, PoolName]); + Error -> + ?LOG_ERROR("Failed to connect partition ~p to pool ~p: ~p", [PartitionId, PoolName, Error]), + error({pool_connection_failed, Error}) + end, + + {ok, State, {continue, monitor_existing}}. + +handle_continue(monitor_existing, #state{partition_id = PartitionId} = State0) -> + MonitorTab = ?MONITOR_TAB(PartitionId), + Fun = fun + ({Pid, GrainRef, _OldMRef}, Acc0) -> + case erlang:is_process_alive(Pid) of + true -> + {_, Acc} = do_register_name(Acc0, GrainRef, Pid, relaxed), + Acc; + false -> + {_, Acc} = do_unregister_name(Acc0, GrainRef, Pid), + Acc + end + end, + State = lists:foldl(Fun, State0, ets:tab2list(MonitorTab)), + {noreply, State}; + +handle_continue(_, State) -> + {noreply, State}. + +handle_call({register_name, GrainRef}, {Caller, _}, State0) when is_pid(Caller) -> + case lookup_local_pid(State0#state.partition_id, GrainRef) of + undefined -> + Processes = do_lookup(State0, GrainRef), + case filter_alive(Processes) of + [] -> + {Reply, State} = do_register_name(State0, GrainRef, Caller), + {reply, Reply, State}; + [ProcRef|_] -> + {reply, {error, {already_in_use, ProcRef}}, State0} + end; + Pid when Pid == Caller -> + {reply, ok, State0}; + Pid when is_pid(Pid) -> + ProcRef = partisan_remote_ref:from_term(Pid), + {reply, {error, {already_in_use, ProcRef}}, State0} + end; + +handle_call({register_name, _}, _From, State) -> + {reply, {error, not_local}, State}; + +handle_call({unregister_name, GrainRef}, {Caller, _}, State0) when is_pid(Caller) -> + {Reply, State} = do_unregister_name(State0, GrainRef, Caller), + {reply, Reply, State}; + +handle_call({unregister_name, _}, _From, State) -> + {reply, {error, not_local}, State}; + +handle_call({whereis_name, #{placement := stateless} = GrainRef, _}, _From, State) -> + Reply = whereis_stateless(GrainRef), + {reply, Reply, State}; + +handle_call({whereis_name, #{placement := {stateless, _}} = GrainRef, _}, _From, State) -> + Reply = whereis_stateless(GrainRef), + {reply, Reply, State}; + +handle_call({whereis_name, GrainRef, Opts}, _From, State) -> + case do_lookup(State, GrainRef) of + [] -> + {reply, undefined, State}; + ProcRefs -> + Reply = pick(ProcRefs, Opts), + {reply, Reply, State} + end; + +handle_call({lookup, GrainRef}, _From, State) -> + Reply = do_lookup(State, GrainRef), + {reply, Reply, State}; + +handle_call({grain_ref, Pid}, _From, State) when is_pid(Pid) -> + Reply = case monitor_lookup(State#state.partition_id, Pid) of + {Pid, GrainRef, _} -> + {ok, GrainRef}; + undefined -> + {error, not_found} + end, + {reply, Reply, State}; + +handle_call({grain_ref, ProcRef}, _From, State) -> + Reply = case partisan_remote_ref:is_local(ProcRef) of + true -> + {_, Reply0, _} = handle_call({grain_ref, partisan_remote_ref:to_term(ProcRef)}, undefined, State), + Reply0; + false -> + Peer = partisan:node(ProcRef), + case partisan_rpc:call(Peer, ?MODULE, grain_ref, [self(), ProcRef], 5000) of + {badrpc, Reason} -> + {error, Reason}; + Result -> + Result + end + end, + {reply, Reply, State}; + +handle_call({to_list, Opts}, _From, #state{partition_id = _PartitionId, crdt = CRDT} = State) -> + Tree = bondy_mst_crdt:tree(CRDT), + L = bondy_mst:fold( + Tree, + fun({GrainKey, Value}, Acc) -> + case sets:to_list(state_awset:query(Value)) of + [] -> + Acc; + L -> + case pick(L, Opts) of + undefined -> + Acc; + ProcRef -> + [{GrainKey, ProcRef} | Acc] + end + end + end, + [] + ), + {reply, lists:reverse(L), State}; + +handle_call({crdt_merge, Gossip}, _From, State) -> + CRDT0 = State#state.crdt, + Root0 = bondy_mst_crdt:root(CRDT0), + CRDT = bondy_mst_crdt:handle(CRDT0, Gossip), + Root = bondy_mst_crdt:root(CRDT), + Reply = Root =/= Root0, + {reply, Reply, State#state{crdt = CRDT}}; + +handle_call({crdt_trigger, Peer, _Opts}, _From, State) -> + Reply = bondy_mst_crdt:trigger(State#state.crdt, Peer), + {reply, Reply, State}; + +handle_call(info, _From, #state{partition_id = PartitionId, crdt = CRDT} = State) -> + MonitorTab = ?MONITOR_TAB(PartitionId), + Reply = #{ + partition_id => PartitionId, + tree => #{ + root => bondy_mst_crdt:root(CRDT) + }, + local_registry => #{ + memory => ets:info(MonitorTab, memory), + size => ets:info(MonitorTab, size) + } + }, + {reply, Reply, State}; + +%% TEST API - These handle_call clauses are used by the test suite +handle_call({register_name_test, GrainRef, ProcRef}, _From, State0) -> + %% Used for testing only + {Reply, State} = do_register_name_test(State0, GrainRef, ProcRef), + {reply, Reply, State}; + +handle_call({unregister_name_test, GrainRef, ProcRef}, _From, State0) -> + %% Used for testing only + Key = grain_key(GrainRef), + {Reply, State} = do_unregister_name_test(State0, Key, ProcRef), + {reply, Reply, State}; + +handle_call({add_test, GrainRef, ProcRef}, _From, State0) -> + %% Used for testing only + Key = grain_key(GrainRef), + State = add(State0, Key, ProcRef, partisan_remote_ref:node(ProcRef)), + {reply, ok, State}; + +handle_call({remove_test, GrainRef, ProcRef}, _From, State0) -> + %% Used for testing only + Key = grain_key(GrainRef), + State = remove(State0, Key, ProcRef, partisan_remote_ref:node(ProcRef)), + {reply, ok, State}; + +handle_call(_Request, _From, State) -> + {reply, {error, unknown_call}, State}. + +-spec handle_cast(Request :: term(), State :: t()) -> {noreply, NewState :: t()}. +handle_cast({crdt_maybe_merge, Peer, Root}, State) -> + Root == bondy_mst_crdt:root(State#state.crdt) + andalso bondy_mst_crdt:trigger(State#state.crdt, Peer), + {noreply, State}; + +handle_cast({crdt_on_merge, _Peer}, #state{initial_sync = false} = State0) -> + State = remove_stale(State0#state{initial_sync = true}), + ok = maybe_deactivate_local_duplicates(State), + {noreply, State}; + +handle_cast({crdt_on_merge, _Peer}, #state{initial_sync = true} = State) -> + ok = maybe_deactivate_local_duplicates(State), + {noreply, State}; + +handle_cast({crdt_message, Msg}, State) -> + CRDT = bondy_mst_crdt:handle(State#state.crdt, Msg), + {noreply, State#state{crdt = CRDT}}; + +handle_cast({force_unregister_name, GrainKey, ProcRef}, State0) -> + case partisan_remote_ref:is_local(ProcRef) of + true -> + Pid = partisan_remote_ref:to_pid(ProcRef), + {_, State} = do_unregister_name_by_key(State0, GrainKey, Pid), + {noreply, State}; + false -> + {noreply, State0} + end; + +handle_cast(_Request, State) -> + {noreply, State}. + +-spec handle_info(Message :: term(), State :: t()) -> {noreply, NewState :: t()}. +handle_info({'ETS-TRANSFER', _, _, []}, State) -> + {noreply, State}; + +handle_info({nodedown, Node}, State) -> + CRDT = bondy_mst_crdt:cancel_merge(State#state.crdt, Node), + {noreply, State#state{crdt = CRDT}}; + +handle_info({nodeup, _Node}, State) -> + {noreply, State}; + +handle_info({'DOWN', MRef, process, Pid, _Info}, State0) when is_pid(Pid) -> + ?LOG_INFO("Grain down ~p", [{Pid, MRef}]), + {_, State} = do_unregister_process(State0, Pid), + {noreply, State}; + +handle_info(Event, State) -> + ?LOG_INFO("Received unknown event ~p", [Event]), + {noreply, State}. + +-spec terminate(Reason :: (normal | shutdown | {shutdown, term()} | term()), State :: t()) -> ok. +terminate(_Reason, #state{partition_id = PartitionId} = State) -> + ok = unregister_all_local(State), + _ = persistent_term:erase(?PERSISTENT_KEY(PartitionId)), + ok. + +%% ============================================================================= +%% PRIVATE +%% ============================================================================= + + +do_lookup(#state{partition_id = PartitionId, crdt = CRDT}, #{id := _} = GrainRef) -> + do_lookup(#state{partition_id = PartitionId, crdt = CRDT}, grain_key(GrainRef)); + +do_lookup(#state{partition_id = PartitionId}, {_, _} = GrainKey) -> + Tree = ?TREE(PartitionId), + case bondy_mst:get(Tree, GrainKey) of + undefined -> + []; + AWSet -> + sort_conflicting_values(AWSet) + end. + +add(#state{} = State, GrainKey, Value) -> + add(#state{} = State, GrainKey, Value, partisan:node()). + +add(#state{crdt = CRDT0} = State, Key, Value, Node) -> + Tree = bondy_mst_crdt:tree(CRDT0), + AWSet1 = case bondy_mst:get(Tree, Key) of + undefined -> + state_awset:new(); + AWSet0 -> + AWSet0 + end, + {ok, AWSet} = state_type:mutate({add, Value}, Node, AWSet1), + CRDT = bondy_mst_crdt:put(CRDT0, Key, AWSet), + State#state{crdt = CRDT}. + +remove(State, GrainKey, Value) -> + remove(State, GrainKey, Value, partisan:node()). + +remove(State, Key, Value, Node) -> + remove(State, Key, Value, Node, #{}). + +remove(#state{crdt = CRDT0} = State, Key, Value, Node, Opts) -> + CRDT = crdt_remove(CRDT0, Key, Value, Node, Opts), + State#state{crdt = CRDT}. + +crdt_remove(CRDT, Key, Value, Node, Opts) -> + Tree = bondy_mst_crdt:tree(CRDT), + AWSet1 = case bondy_mst:get(Tree, Key) of + undefined -> + state_awset:new(); + AWSet0 -> + AWSet0 + end, + {ok, AWSet} = state_type:mutate({rmv, Value}, Node, AWSet1), + bondy_mst_crdt:put(CRDT, Key, AWSet, Opts). + +monitor_lookup(PartitionId, Pid) -> + MonitorTab = ?MONITOR_TAB(PartitionId), + case ets:lookup(MonitorTab, Pid) of + [Monitor] -> + Monitor; + _ -> + undefined + end. + +lookup_local_pid(PartitionId, GrainRef) -> + MonitorTab = ?MONITOR_TAB(PartitionId), + case ets:match_object(MonitorTab, {'_', GrainRef, '_'}) of + [{Pid, GrainRef, _}] -> + Pid; + _ -> + undefined + end. + +mst_merge_value(PartitionId, GrainKey, AWSet1, AWSet2) -> + AWSet3 = state_awset:merge(AWSet1, AWSet2), + AWSet = remove_deactivated(PartitionId, AWSet3), + ?LOG_DEBUG(#{ + description => "Merged values", + partition_id => PartitionId, + key => GrainKey, + rhs => AWSet1, + lhs => AWSet2, + result => AWSet + }), + ok = maybe_deactivate_local_duplicate(PartitionId, GrainKey, AWSet), + AWSet. + +remove_deactivated(PartitionId, AWSet) -> + Fun = fun(ProcRef, Acc) -> + maybe + true ?= partisan_remote_ref:is_local(ProcRef), + Pid ?= partisan_remote_ref:to_pid(ProcRef), + undefined ?= monitor_lookup(PartitionId, Pid), + ?LOG_DEBUG(#{ + message => "Removing grain from registry", + partition_id => PartitionId, + process_ref => ProcRef, + reason => deactivated + }), + awset_remove(Acc, ProcRef) + else + false -> + Acc; + {_Pid, _GrainRef, _Mref} -> + Acc + end + end, + sets:fold(Fun, AWSet, state_awset:query(AWSet)). + +awset_remove(AWSet0, Value) -> + {ok, AWSet} = state_type:mutate({rmv, Value}, partisan:node(), AWSet0), + AWSet. + +maybe_deactivate_local_duplicate(_PartitionId, GrainKey, AWSet) -> + All = sets:to_list(state_awset:query(AWSet)), + maybe + {[ProcRef], [_ | _] = Remotes} ?= + lists:partition(fun partisan_remote_ref:is_local/1, All), + false ?= safe_is_location_right(GrainKey, ProcRef), + true ?= lists:any(fun is_reachable/1, Remotes), + deactivate_grain(GrainKey, ProcRef) + else + _ -> + ok + end. + +maybe_deactivate_local_duplicates(#state{crdt = CRDT, partition_id = PartitionId}) -> + Tree = bondy_mst_crdt:tree(CRDT), + Fun = fun({Key, AWSet}) -> maybe_deactivate_local_duplicate(PartitionId, Key, AWSet) end, + bondy_mst:foreach(Tree, Fun). + +safe_is_location_right({_, Mod}, LocalPRef) -> + try + Pid = partisan_remote_ref:to_pid(LocalPRef), + erleans_grain:is_location_right(Mod, Pid) + catch + Class:Reason:Stacktrace -> + ?LOG_WARNING(#{ + message => + "erleans_grain:is_location_right/2 failed. " + "Returning true by default", + implementing_module => Mod, + process_ref => LocalPRef, + class => Class, + reason => Reason, + stacktrace => Stacktrace + }), + true + end. + +remove_stale(#state{crdt = CRDT} = State) -> + Tree = bondy_mst_crdt:tree(CRDT), + Fun = fun({Key, AWSet}, Acc) -> remove_stale(Acc, Key, AWSet) end, + bondy_mst:fold(Tree, Fun, State). + +remove_stale(State, Key, AWSet) -> + Set = state_awset:query(AWSet), + Fun = fun(ProcRef, Acc) -> + maybe + true ?= partisan_remote_ref:is_local(ProcRef), + Pid ?= partisan_remote_ref:to_pid(ProcRef), + undefined ?= monitor_lookup(Acc#state.partition_id, Pid), + ?LOG_DEBUG(#{ + message => "Removing grain from registry", + process_ref => ProcRef, + reason => deactivated + }), + remove(Acc, Key, ProcRef, partisan:node(), #{broadcast => false}) + else + _ -> + Acc + end + end, + sets:fold(Fun, State, Set). + +sort_conflicting_values(AWSet) -> + Set = state_awset:query(AWSet), + lists:sort( + fun(A, B) -> + Result = { + partisan_remote_ref:is_local(A), + partisan_remote_ref:is_local(B) + }, + case Result of + {true, _} -> + true; + {_, true} -> + false; + {false, false} -> + A =< B + end + end, + sets:to_list(Set) + ). + +do_register_name(State, GrainRef, Pid) -> + do_register_name(State, GrainRef, Pid, strict). + +do_register_name(#state{partition_id = PartitionId} = State0, GrainRef, Pid, Mode) when is_pid(Pid) -> + case monitor(PartitionId, GrainRef, Pid, Mode) of + ok -> + Key = grain_key(GrainRef), + Value = partisan_remote_ref:from_term(Pid), + State = add(State0, Key, Value), + {ok, State}; + {error, _} = Error -> + {Error, State0} + end. + +do_unregister_process(#state{partition_id = PartitionId} = State0, Pid) when is_pid(Pid) -> + case monitor_lookup(PartitionId, Pid) of + {Pid, GrainRef, _} -> + do_unregister_name(State0, GrainRef, Pid); + undefined -> + {ok, State0} + end. + +do_unregister_name(#state{partition_id = PartitionId} = State0, GrainRef, Pid) when is_pid(Pid) -> + MonitorTab = ?MONITOR_TAB(PartitionId), + ok = demonitor(MonitorTab, Pid), + true = ets:delete(MonitorTab, Pid), + Key = grain_key(GrainRef), + Value = partisan_remote_ref:from_term(Pid), + State = remove(State0, Key, Value), + {ok, State}. + +do_unregister_name_by_key(#state{partition_id = PartitionId} = State0, GrainKey, Pid) when is_pid(Pid) -> + MonitorTab = ?MONITOR_TAB(PartitionId), + ok = demonitor(MonitorTab, Pid), + true = ets:delete(MonitorTab, Pid), + Value = partisan_remote_ref:from_term(Pid), + State = remove(State0, GrainKey, Value), + {ok, State}. + +grain_key(#{id := Id, implementing_module := Mod}) -> + {Id, Mod}. + +%% @private +do_register_name_test(State0, GrainRef, ProcRef) -> + Key = grain_key(GrainRef), + State = add(State0, Key, ProcRef, partisan:node(ProcRef)), + {ok, State}. + +%% @private +do_unregister_name_test(State0, GrainKey, ProcRef) -> + State = remove(State0, GrainKey, ProcRef, partisan:node(ProcRef)), + {ok, State}. + +monitor(PartitionId, GrainRef, Pid, strict) when is_pid(Pid) -> + MonitorTab = ?MONITOR_TAB(PartitionId), + Mref = erlang:monitor(process, Pid), + case ets:insert_new(MonitorTab, {Pid, GrainRef, Mref}) of + true -> + ok; + false -> + true = erlang:demonitor(Mref, [flush]), + {OtherPid, GrainRef, _} = monitor_lookup(PartitionId, Pid), + {error, {already_in_use, partisan_remote_ref:from_term(OtherPid)}} + end; + +monitor(PartitionId, GrainRef, Pid, relaxed) when is_pid(Pid) -> + MonitorTab = ?MONITOR_TAB(PartitionId), + Mref = erlang:monitor(process, Pid), + true = ets:insert(MonitorTab, {Pid, GrainRef, Mref}), + ok. + +demonitor(MonitorTab, Pid) -> + case ets:take(MonitorTab, Pid) of + [{Pid, _, Mref}] -> + true = erlang:demonitor(Mref, [flush]), + ok; + [] -> + ok + end. + +deactivate_grain(GrainKey, ProcRef) -> + case erleans_grain:deactivate(ProcRef) of + ok -> + ?LOG_NOTICE(#{ + description => "Succeeded to deactivate duplicate", + grain => GrainKey, + pid => ProcRef + }), + ok; + {error, Reason} when Reason == not_found; Reason == not_active -> + ?LOG_ERROR(#{ + description => "Failed to deactivate duplicate", + grain => GrainKey, + pid => ProcRef, + reason => Reason + }), + partisan_gen_server:cast( + {?MODULE, partisan_remote_ref:node(ProcRef)}, + {force_unregister_name, GrainKey, ProcRef} + ); + {error, Reason} -> + ?LOG_ERROR(#{ + description => "Failed to deactivate duplicate", + grain => GrainKey, + pid => ProcRef, + reason => Reason + }), + ok + end. + +whereis_stateless(GrainRef) -> + case gproc_pool:pick_worker(GrainRef) of + false -> + undefined; + Pid -> + partisan_remote_ref:from_term(Pid) + end. + +pick([], _) -> + undefined; + +pick(L, []) -> + pick(L, [unsafe]); + +pick([H], [unsafe]) -> + H; + +pick([H | _], [unsafe]) -> + H; + +pick(List, [safe]) -> + pick_alive(List). + +pick_alive([H | T]) -> + try partisan:is_process_alive(H) of + true -> + H; + false -> + pick_alive(T) + catch + error:_ -> + pick_alive(T) + end; + +pick_alive([]) -> + undefined. + +filter_alive(undefined) -> + []; + +filter_alive(ProcRefs) when is_list(ProcRefs) -> + lists:filter( + fun(ProcRef) -> + try + partisan:is_process_alive(ProcRef) + catch + _:_ -> + false + end + end, + ProcRefs + ). + +is_reachable(ProcRef) -> + try + partisan:is_connected(partisan:node(ProcRef)) + catch + _:_ -> + false + end. + +unregister_all_local(#state{partition_id = PartitionId} = State) -> + MonitorTab = ?MONITOR_TAB(PartitionId), + true = ets:safe_fixtable(MonitorTab, true), + try + unregister_local(State, ets:first(MonitorTab)) + catch + Class:Reason:Stacktrace -> + ?LOG_ERROR(#{ + message => "Unexpected error", + class => Class, + reason => Reason, + stacktrace => Stacktrace + }), + ok + after + true = ets:safe_fixtable(MonitorTab, false) + end. + +unregister_local(#state{partition_id = PartitionId} = State0, Pid) when is_pid(Pid) -> + MonitorTab = ?MONITOR_TAB(PartitionId), + GrainRef = ets:lookup_element(MonitorTab, Pid, 2), + {ok, State} = do_unregister_name(State0, GrainRef, Pid), + unregister_local(State, ets:next(MonitorTab, Pid)); + +unregister_local(_, '$end_of_table') -> + ok. + +%% ============================================================================= +%% TEST +%% ============================================================================= + +-ifdef(TEST). + +register_name_(PartitionPid, GrainRef, ProcRef) -> + partisan_gen_server:call(PartitionPid, {register_name_test, GrainRef, ProcRef}). + +unregister_name_(PartitionPid, GrainRef, ProcRef) -> + partisan_gen_server:call(PartitionPid, {unregister_name_test, GrainRef, ProcRef}). + +add_(PartitionPid, GrainRef, ProcRef) -> + partisan_gen_server:call(PartitionPid, {add_test, GrainRef, ProcRef}). + +remove_(PartitionPid, GrainRef, ProcRef) -> + partisan_gen_server:call(PartitionPid, {remove_test, GrainRef, ProcRef}). + +-endif. \ No newline at end of file diff --git a/src/erleans_registry_sup.erl b/src/erleans_registry_sup.erl new file mode 100644 index 0000000..9b42414 --- /dev/null +++ b/src/erleans_registry_sup.erl @@ -0,0 +1,87 @@ +%% ----------------------------------------------------------------------------- +%% Copyright Tristan Sloughter 2019. All Rights Reserved. +%% Copyright Leapsight 2020 - 2023. All Rights Reserved. +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%% ----------------------------------------------------------------------------- + +-module(erleans_registry_sup). + +-behaviour(supervisor). + +-include_lib("kernel/include/logger.hrl"). + +%% API +-export([start_link/0]). + +%% Supervisor callbacks +-export([init/1]). + +-define(POOL_NAME, erleans_registry_pool). + +%% ============================================================================= +%% API +%% ============================================================================= + +-spec start_link() -> {ok, pid()} | {error, term()}. +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +%% ============================================================================= +%% SUPERVISOR CALLBACKS +%% ============================================================================= + +-spec init([]) -> {ok, {supervisor:sup_flags(), [supervisor:child_spec()]}}. +init([]) -> + %% Get number of partitions from configuration + N = erleans_config:get(pm_partitions, 1), + + ?LOG_INFO("Starting erleans registry with ~p partitions", [N]), + + %% Create the gproc_pool first + try + gproc_pool:new(?POOL_NAME, hash, [{size, N}]), + ?LOG_INFO("Created gproc_pool ~p with size ~p", [?POOL_NAME, N]), + + %% Add workers to the pool for each partition + [gproc_pool:add_worker(?POOL_NAME, {partition, PartitionId}, PartitionId) + || PartitionId <- lists:seq(1, N)], + ?LOG_INFO("Added ~p workers to pool ~p", [N, ?POOL_NAME]) + catch + error:exists -> + ?LOG_INFO("gproc_pool ~p already exists", [?POOL_NAME]); + Error:Reason -> + ?LOG_ERROR("Failed to create gproc_pool ~p: ~p:~p", [?POOL_NAME, Error, Reason]), + error({gproc_pool_creation_failed, Error, Reason}) + end, + + %% Create child specs for N partition workers + Children = [ + #{ + id => {erleans_registry_partition, PartitionId}, + start => {erleans_registry_partition, start_link, [PartitionId, ?POOL_NAME]}, + restart => permanent, + shutdown => 5000, + type => worker, + modules => [erleans_registry_partition] + } || PartitionId <- lists:seq(1, N) + ], + + %% Supervisor flags + SupFlags = #{ + strategy => one_for_one, + intensity => 5, + period => 10 + }, + + {ok, {SupFlags, Children}}. \ No newline at end of file diff --git a/src/erleans_sup.erl b/src/erleans_sup.erl index e3e9983..f4a5cc7 100644 --- a/src/erleans_sup.erl +++ b/src/erleans_sup.erl @@ -43,16 +43,16 @@ init([Config]) -> restart => permanent, type => worker, shutdown => 5000}, - #{id => erleans_pm, - start => {erleans_pm, start_link, []}, - restart => permanent, - type => worker, - shutdown => 5000}, #{id => erleans_config, start => {erleans_config, start_link, [Config]}, restart => permanent, type => worker, shutdown => 5000}, + #{id => erleans_registry_sup, + start => {erleans_registry_sup, start_link, []}, + restart => permanent, + type => supervisor, + shutdown => infinity}, #{id => erleans_providers_sup, start => {erleans_providers_sup, start_link, []}, restart => permanent, diff --git a/test/dist_lifecycle_SUITE.erl b/test/dist_lifecycle_SUITE.erl index c81a426..0982df1 100644 --- a/test/dist_lifecycle_SUITE.erl +++ b/test/dist_lifecycle_SUITE.erl @@ -29,6 +29,7 @@ init_per_suite(Config) -> application:load(erleans), application:set_env(partisan, peer_port, 10200), application:set_env(partisan, pid_encoding, false), + application:set_env(partisan, partisan_peer_service_manager, partisan_pluggable_peer_service_manager), %% lower gossip interval of partisan membership so it triggers more often %% in tests application:set_env(partisan, periodic_enabled, true), @@ -240,6 +241,7 @@ start_nodes([{Node, PeerPort} | T], Acc) -> {application, set_env, [partisan, periodic_enabled, true]}, {application, set_env, [partisan, periodic_interval, 100]}, {application, set_env, [partisan, peer_port, PeerPort]}, + {application, set_env, [partisan, partisan_peer_service_manager, partisan_pluggable_peer_service_manager]}, {application, ensure_all_started, [partisan]}, {application, ensure_all_started, [bondy_mst]}, {application, ensure_all_started, [erleans]} diff --git a/test/partition_logic_test.erl b/test/partition_logic_test.erl new file mode 100644 index 0000000..7a19fe5 --- /dev/null +++ b/test/partition_logic_test.erl @@ -0,0 +1,158 @@ +%%%-------------------------------------------------------------------- +%%% @doc +%%% Standalone test to analyze partition selection logic +%%% @end +%%%-------------------------------------------------------------------- +-module(partition_logic_test). + +-export([test_partition_logic/0]). + +-include_lib("eunit/include/eunit.hrl"). + +test_partition_logic() -> + %% Start applications + application:ensure_all_started(gproc), + application:load(erleans), + application:set_env(erleans, pm_partitions, 4), + application:ensure_all_started(erleans), + + io:format("=== Testing Partition Selection Logic ===~n"), + + %% Test 1: Analyze gproc_pool setup + analyze_gproc_pool(), + + %% Test 2: Test grain_key generation + test_grain_key_generation(), + + %% Test 3: Test consistent routing + test_consistent_routing(), + + %% Test 4: Test distribution + test_distribution(), + + %% Clean up + application:stop(erleans), + ok. + +analyze_gproc_pool() -> + io:format("~n--- Analyzing gproc_pool setup ---~n"), + + %% Check if pool exists + PoolName = erleans_registry_pool, + + try + Workers = gproc_pool:active_workers(PoolName), + io:format("Pool ~p has ~p active workers:~n", [PoolName, length(Workers)]), + [io:format(" Worker: ~p -> PID: ~p~n", [WorkerKey, WorkerPid]) + || {WorkerKey, WorkerPid} <- Workers], + + %% Test pool strategy + io:format("Pool strategy: hash~n"), + io:format("Pool size: ~p~n", [length(Workers)]) + catch + Error:Reason -> + io:format("ERROR accessing pool: ~p:~p~n", [Error, Reason]) + end. + +test_grain_key_generation() -> + io:format("~n--- Testing grain_key generation ---~n"), + + %% Create some test grain references + TestGrains = [ + #{id => <<"grain-1">>, implementing_module => test_grain, placement => prefer_local}, + #{id => <<"grain-2">>, implementing_module => test_grain, placement => prefer_local}, + #{id => <<"grain-3">>, implementing_module => test_grain, placement => prefer_local}, + #{id => 123, implementing_module => my_grain, placement => prefer_local}, + #{id => {complex, key}, implementing_module => other_grain, placement => prefer_local} + ], + + [begin + GrainKey = grain_key(Grain), + io:format("Grain ~p -> Key: ~p~n", [maps:get(id, Grain), GrainKey]) + end || Grain <- TestGrains]. + +test_consistent_routing() -> + io:format("~n--- Testing consistent routing ---~n"), + + %% Test the same grain multiple times + TestGrain = #{id => <<"consistency-test">>, implementing_module => test_grain, placement => prefer_local}, + + Results = [begin + try + Pid = erleans_pm:select_partition(TestGrain), + {ok, Pid} + catch + Error:Reason -> {error, {Error, Reason}} + end + end || _ <- lists:seq(1, 5)], + + io:format("Routing results for same grain:~n"), + [io:format(" Attempt ~p: ~p~n", [N, Result]) || {N, Result} <- lists:zip(lists:seq(1, 5), Results)], + + %% Check consistency + case Results of + [{ok, FirstPid} | _] -> + AllSame = lists:all(fun({ok, Pid}) -> Pid == FirstPid; (_) -> false end, Results), + io:format("All results consistent: ~p~n", [AllSame]); + _ -> + io:format("ERROR: No successful routing~n") + end. + +test_distribution() -> + io:format("~n--- Testing distribution across partitions ---~n"), + + %% Create many grains to test distribution + NumGrains = 20, + TestGrains = [#{id => list_to_binary("grain-" ++ integer_to_list(N)), + implementing_module => test_grain, + placement => prefer_local} + || N <- lists:seq(1, NumGrains)], + + %% Get partition assignments + Assignments = [begin + try + Pid = erleans_pm:select_partition(Grain), + GrainKey = grain_key(Grain), + + %% Get partition info to find partition ID + Info = partisan_gen_server:call(Pid, info), + PartitionId = maps:get(partition_id, Info), + + {GrainKey, PartitionId, Pid} + catch + Error:Reason -> + {grain_key(Grain), error, {Error, Reason}} + end + end || Grain <- TestGrains], + + io:format("Grain distribution:~n"), + [io:format(" ~p -> Partition ~p (PID: ~p)~n", [GrainKey, PartitionId, Pid]) + || {GrainKey, PartitionId, Pid} <- Assignments, PartitionId =/= error], + + %% Count distribution + PartitionCounts = lists:foldl( + fun({_, PartitionId, _}, Acc) when is_integer(PartitionId) -> + maps:update_with(PartitionId, fun(X) -> X + 1 end, 1, Acc); + (_, Acc) -> Acc + end, + #{}, + Assignments + ), + + io:format("Partition counts: ~p~n", [PartitionCounts]), + + %% Check if distribution is reasonable + Counts = maps:values(PartitionCounts), + case Counts of + [] -> + io:format("ERROR: No successful assignments~n"); + _ -> + MinCount = lists:min(Counts), + MaxCount = lists:max(Counts), + UsedPartitions = length(Counts), + io:format("Used partitions: ~p/4, Min: ~p, Max: ~p~n", [UsedPartitions, MinCount, MaxCount]) + end. + +%% Helper function (copied from erleans_pm) +grain_key(#{id := Id, implementing_module := Mod}) -> + {Id, Mod}. \ No newline at end of file diff --git a/test/registry_partition_SUITE.erl b/test/registry_partition_SUITE.erl new file mode 100644 index 0000000..8834df9 --- /dev/null +++ b/test/registry_partition_SUITE.erl @@ -0,0 +1,385 @@ +%%%-------------------------------------------------------------------- +%%% @author Alejandro Miguez +%%% @doc +%%% Test suite for partitioned registry implementation. +%%% Verifies that grains are consistently routed to the same partition +%%% and that the partitioned system works correctly. +%%% @end +%%%-------------------------------------------------------------------- +-module(registry_partition_SUITE). + +-compile(export_all). + +-include_lib("eunit/include/eunit.hrl"). +-include_lib("common_test/include/ct.hrl"). + +-include("test_utils.hrl"). + +%% Test cases +all() -> + [ + partition_consistency, + partition_distribution, + cross_partition_lookup, + partition_isolation, + router_aggregation, + grain_ref_lookup, + partition_info + ]. + +init_per_suite(Config) -> + %% Ensure we're using partitioned configuration + application:load(erleans), + application:set_env(erleans, pm_partitions, 4), + Config. + +end_per_suite(_Config) -> + application:unload(erleans), + ok. + +init_per_testcase(_TestCase, Config) -> + %% Start erleans fresh for each test + {ok, _} = application:ensure_all_started(erleans), + Config. + +end_per_testcase(_TestCase, _Config) -> + application:stop(erleans), + ok. + +%%==================================================================== +%% Test Cases +%%==================================================================== + +partition_consistency(_Config) -> + ct:log("Testing that the same grain always goes to the same partition"), + + %% First, let's verify our pool setup + verify_pool_setup(), + + %% Create multiple grain references with same ID + GrainRef1 = erleans:get_grain(test_grain, <<"consistent-grain-1">>), + GrainRef2 = erleans:get_grain(test_grain, <<"consistent-grain-2">>), + GrainRef3 = erleans:get_grain(test_grain, <<"consistent-grain-3">>), + + %% Test the underlying gproc_pool logic directly + test_gproc_pool_consistency(GrainRef1), + test_gproc_pool_consistency(GrainRef2), + test_gproc_pool_consistency(GrainRef3), + + %% Get partition assignments multiple times + Partition1_A = get_grain_partition(GrainRef1), + Partition1_B = get_grain_partition(GrainRef1), % Should be same as A + Partition1_C = get_grain_partition(GrainRef1), % Should be same as A + + Partition2_A = get_grain_partition(GrainRef2), + Partition2_B = get_grain_partition(GrainRef2), % Should be same as A + + Partition3_A = get_grain_partition(GrainRef3), + Partition3_B = get_grain_partition(GrainRef3), % Should be same as A + + %% Verify consistency - same grain always goes to same partition + ?assertEqual(Partition1_A, Partition1_B), + ?assertEqual(Partition1_B, Partition1_C), + ?assertEqual(Partition2_A, Partition2_B), + ?assertEqual(Partition3_A, Partition3_B), + + ct:log("Grain1 consistently assigned to partition ~p", [Partition1_A]), + ct:log("Grain2 consistently assigned to partition ~p", [Partition2_A]), + ct:log("Grain3 consistently assigned to partition ~p", [Partition3_A]), + + ok. + +partition_distribution(_Config) -> + ct:log("Testing that grains are distributed across multiple partitions"), + + %% Create many grains to test distribution + NumGrains = 100, + Grains = [erleans:get_grain(test_grain, list_to_binary("grain-" ++ integer_to_list(N))) + || N <- lists:seq(1, NumGrains)], + + %% Get partition for each grain + Partitions = [get_grain_partition(Grain) || Grain <- Grains], + + %% Count grains per partition + PartitionCounts = count_partitions(Partitions), + + ct:log("Partition distribution: ~p", [PartitionCounts]), + + %% Verify we're using multiple partitions (at least 2 out of 4) + NumUsedPartitions = length(PartitionCounts), + ?assert(NumUsedPartitions >= 2), + + %% Verify no partition is completely empty (reasonable distribution) + MaxCount = lists:max([Count || {_Partition, Count} <- PartitionCounts]), + MinCount = lists:min([Count || {_Partition, Count} <- PartitionCounts]), + + %% Distribution shouldn't be too skewed (max shouldn't be more than 3x min) + ?assert(MaxCount =< (MinCount * 3)), + + ok. + +cross_partition_lookup(_Config) -> + ct:log("Testing lookup operations across partitions"), + + %% Create grains that will likely go to different partitions + Grain1 = erleans:get_grain(test_grain, <<"cross-lookup-1">>), + Grain2 = erleans:get_grain(test_grain, <<"cross-lookup-2">>), + Grain3 = erleans:get_grain(test_grain, <<"cross-lookup-3">>), + Grain4 = erleans:get_grain(test_grain, <<"cross-lookup-4">>), + + %% Register the grains by activating them + ?assertEqual({ok, 1}, test_grain:activated_counter(Grain1)), + ?assertEqual({ok, 1}, test_grain:activated_counter(Grain2)), + ?assertEqual({ok, 1}, test_grain:activated_counter(Grain3)), + ?assertEqual({ok, 1}, test_grain:activated_counter(Grain4)), + + timer:sleep(100), % Allow registration to complete + + %% Test whereis_name works for all grains regardless of partition + ProcRef1 = erleans_pm:whereis_name(Grain1), + ProcRef2 = erleans_pm:whereis_name(Grain2), + ProcRef3 = erleans_pm:whereis_name(Grain3), + ProcRef4 = erleans_pm:whereis_name(Grain4), + + ?assert(ProcRef1 =/= undefined), + ?assert(ProcRef2 =/= undefined), + ?assert(ProcRef3 =/= undefined), + ?assert(ProcRef4 =/= undefined), + + %% Test lookup works for all grains + ?assert(erleans_pm:lookup(Grain1) =/= []), + ?assert(erleans_pm:lookup(Grain2) =/= []), + ?assert(erleans_pm:lookup(Grain3) =/= []), + ?assert(erleans_pm:lookup(Grain4) =/= []), + + ct:log("Successfully looked up grains across partitions"), + ok. + +partition_isolation(_Config) -> + ct:log("Testing that each partition manages its own grains"), + + %% Create grains and ensure they go to specific partitions + TestGrains = [ + erleans:get_grain(test_grain, <<"isolation-test-1">>), + erleans:get_grain(test_grain, <<"isolation-test-2">>), + erleans:get_grain(test_grain, <<"isolation-test-3">>) + ], + + %% Activate grains + [?assertEqual({ok, 1}, test_grain:activated_counter(Grain)) || Grain <- TestGrains], + timer:sleep(100), + + %% Get partition assignments + PartitionAssignments = [{Grain, get_grain_partition(Grain)} || Grain <- TestGrains], + + ct:log("Partition assignments: ~p", [PartitionAssignments]), + + %% Group grains by partition + PartitionGroups = group_by_partition(PartitionAssignments), + + ct:log("Grains grouped by partition: ~p", [PartitionGroups]), + + %% For each partition, verify it only knows about its own grains + [verify_partition_isolation(Partition, Grains, TestGrains) + || {Partition, Grains} <- PartitionGroups], + + ok. + +router_aggregation(_Config) -> + ct:log("Testing that router correctly aggregates data from all partitions"), + + %% Create several grains + TestGrains = [ + erleans:get_grain(test_grain, <<"aggregation-1">>), + erleans:get_grain(test_grain, <<"aggregation-2">>), + erleans:get_grain(test_grain, <<"aggregation-3">>), + erleans:get_grain(test_grain, <<"aggregation-4">>) + ], + + %% Activate all grains + [?assertEqual({ok, 1}, test_grain:activated_counter(Grain)) || Grain <- TestGrains], + timer:sleep(100), + + %% Test to_list aggregates from all partitions + AllEntries = erleans_pm:to_list(), + GrainKeys = [grain_key(Grain) || Grain <- TestGrains], + + %% Verify all our test grains are in the aggregated list + [?assert(lists:keymember(GrainKey, 1, AllEntries)) || GrainKey <- GrainKeys], + + %% Test info aggregates from all partitions + Info = erleans_pm:info(), + ?assert(maps:is_key(num_partitions, Info)), + ?assert(maps:is_key(partitions, Info)), + + NumPartitions = maps:get(num_partitions, Info), + PartitionInfos = maps:get(partitions, Info), + + ?assertEqual(4, NumPartitions), % We configured 4 partitions + ?assertEqual(4, length(PartitionInfos)), % Should have info for all 4 + + ct:log("Successfully aggregated data from ~p partitions", [NumPartitions]), + ok. + +grain_ref_lookup(_Config) -> + ct:log("Testing grain_ref lookup across partitions"), + + %% Create and activate a grain + Grain = erleans:get_grain(test_grain, <<"grain-ref-test">>), + ?assertEqual({ok, 1}, test_grain:activated_counter(Grain)), + timer:sleep(100), + + %% Get the process reference + ProcRef = erleans_pm:whereis_name(Grain), + ?assert(ProcRef =/= undefined), + + %% Test grain_ref lookup (this searches all partitions) + {ok, FoundGrain} = erleans_pm:grain_ref(ProcRef), + + %% Should find the same grain + ?assertEqual(Grain, FoundGrain), + + ct:log("Successfully found grain ~p from process ~p", [FoundGrain, ProcRef]), + ok. + +partition_info(_Config) -> + ct:log("Testing partition-specific info functions"), + + %% Get global info + GlobalInfo = erleans_pm:info(), + + %% Verify structure + ?assert(maps:is_key(num_partitions, GlobalInfo)), + ?assert(maps:is_key(partitions, GlobalInfo)), + + NumPartitions = maps:get(num_partitions, GlobalInfo), + PartitionInfos = maps:get(partitions, GlobalInfo), + + ?assertEqual(4, NumPartitions), + ?assertEqual(4, length(PartitionInfos)), + + %% Each partition info should have required fields + [begin + ?assert(maps:is_key(partition_id, PartInfo)), + ?assert(maps:is_key(tree, PartInfo)), + ?assert(maps:is_key(local_registry, PartInfo)) + end || PartInfo <- PartitionInfos], + + ct:log("Partition info structure verified"), + ok. + +%%==================================================================== +%% Helper Functions +%%==================================================================== + +%% Get which partition a grain is assigned to +get_grain_partition(GrainRef) -> + PartitionPid = erleans_pm:select_partition(GrainRef), + Info = partisan_gen_server:call(PartitionPid, info), + maps:get(partition_id, Info). + +%% Convert grain ref to grain key for comparison +grain_key(#{id := Id, implementing_module := Mod}) -> + {Id, Mod}. + +%% Get partition PID by partition ID +get_partition_pid_by_id(PartitionId) -> + %% Get all active workers from gproc_pool + case gproc_pool:active_workers(erleans_registry_pool) of + [] -> + undefined; + Workers -> + %% Find the worker for this partition ID + case lists:keyfind({partition, PartitionId}, 1, Workers) of + {{partition, PartitionId}, Pid} -> Pid; + false -> undefined + end + end. + +%% Count how many grains are assigned to each partition +count_partitions(Partitions) -> + Counts = lists:foldl( + fun(Partition, Acc) -> + maps:update_with(Partition, fun(X) -> X + 1 end, 1, Acc) + end, + #{}, + Partitions + ), + maps:to_list(Counts). + +%% Group grains by their partition assignment +group_by_partition(PartitionAssignments) -> + Groups = lists:foldl( + fun({Grain, Partition}, Acc) -> + maps:update_with(Partition, fun(Grains) -> [Grain | Grains] end, [Grain], Acc) + end, + #{}, + PartitionAssignments + ), + maps:to_list(Groups). + +%% Verify that a partition only knows about its assigned grains +verify_partition_isolation(PartitionId, PartitionGrains, AllTestGrains) -> + ct:log("Verifying partition ~p isolation", [PartitionId]), + + %% Get partition PID via gproc_pool + PartitionPid = get_partition_pid_by_id(PartitionId), + ?assert(PartitionPid =/= undefined), + + %% Check each test grain + [begin + case lists:member(Grain, PartitionGrains) of + true -> + %% This grain should be found in this partition + Result = erleans_registry_partition:lookup(PartitionPid, Grain), + ?assert(Result =/= [], {should_find_grain, Grain, in_partition, PartitionId}); + false -> + %% This grain should NOT be found in this partition + Result = erleans_registry_partition:lookup(PartitionPid, Grain), + ?assertEqual([], Result, {should_not_find_grain, Grain, in_partition, PartitionId}) + end + end || Grain <- AllTestGrains], + + ct:log("Partition ~p isolation verified", [PartitionId]), + ok. + +%% Verify gproc_pool setup is correct +verify_pool_setup() -> + PoolName = erleans_registry_pool, + + %% Check pool exists and has workers + case gproc_pool:active_workers(PoolName) of + [] -> + ct:fail("No active workers in pool ~p", [PoolName]); + Workers -> + ct:log("Pool ~p has ~p workers: ~p", [PoolName, length(Workers), Workers]), + + %% Verify we have 4 workers as configured + ?assertEqual(4, length(Workers)), + + %% Verify worker keys are correct + ExpectedKeys = [{partition, I} || I <- [1,2,3,4]], + ActualKeys = [Key || {Key, _Pid} <- Workers], + SortedExpected = lists:sort(ExpectedKeys), + SortedActual = lists:sort(ActualKeys), + ?assertEqual(SortedExpected, SortedActual), + + ct:log("Pool setup verified correctly") + end. + +%% Test gproc_pool consistency directly +test_gproc_pool_consistency(GrainRef) -> + GrainKey = grain_key(GrainRef), + PoolName = erleans_registry_pool, + + %% Call gproc_pool:pick_worker multiple times + Results = [gproc_pool:pick_worker(PoolName, GrainKey) || _ <- lists:seq(1, 5)], + + ct:log("gproc_pool consistency for ~p: ~p", [GrainKey, Results]), + + %% Verify all results are the same + [FirstResult | RestResults] = Results, + ?assert(FirstResult =/= false), + [?assertEqual(FirstResult, Result) || Result <- RestResults], + + ok. \ No newline at end of file diff --git a/test/sys.config b/test/sys.config index 3696fc4..76ee57d 100644 --- a/test/sys.config +++ b/test/sys.config @@ -4,7 +4,9 @@ {providers, #{in_memory => #{module => erleans_provider_ets, args => #{}}}}, - {default_provider, in_memory}]}, + {default_provider, in_memory}, + + {pm_partitions, 4}]}, {plumtree, [{broadcast_exchange_timer, 60000}, {broadcast_mods, [lasp_plumtree_backend]}]}, From 7e35eccf3b9da8ead11c67159aa5aab8a38b73a7 Mon Sep 17 00:00:00 2001 From: Alejandro Miguez Date: Tue, 19 Aug 2025 19:24:39 -0300 Subject: [PATCH 02/14] Upgrade partisan to v5.0.3 version --- rebar.config | 2 +- rebar.lock | 37 ++++++++++--------------------------- 2 files changed, 11 insertions(+), 28 deletions(-) diff --git a/rebar.config b/rebar.config index a2cc4ae..58b4e15 100644 --- a/rebar.config +++ b/rebar.config @@ -20,7 +20,7 @@ {partisan, { git, "http://github.com/lasp-lang/partisan.git", - {tag, "v5.0.2"} + {tag, "v5.0.3"} }}, {bondy_mst,{ git, diff --git a/rebar.lock b/rebar.lock index 8296cbb..b8ac54f 100644 --- a/rebar.lock +++ b/rebar.lock @@ -13,15 +13,10 @@ 1}, {<<"bondy_mst">>, {git,"https://github.com/bondy-io/bondy_mst.git", - {ref,"171ca4ebb16f239481f17a5b87562fa5afe82e92"}}, + {ref,"4a0da7062ac8435a8e3f59d3d58c1e5476ec1ec6"}}, 0}, {<<"cf">>,{pkg,<<"cf">>,<<"0.3.1">>},1}, - {<<"eqwalizer_support">>, - {git_subdir,"https://github.com/whatsapp/eqwalizer.git", - {ref,"4ffc91b44a54afd9606dd9160e78cbac55e13c08"}, - "eqwalizer_support"}, - 2}, - {<<"erlware_commons">>,{pkg,<<"erlware_commons">>,<<"1.7.0">>},0}, + {<<"erlware_commons">>,{pkg,<<"erlware_commons">>,<<"1.8.1">>},0}, {<<"gproc">>,{pkg,<<"gproc">>,<<"1.0.0">>},0}, {<<"key_value">>, {git,"https://github.com/leapsight/key_value.git", @@ -31,21 +26,13 @@ {git,"https://github.com/leapsight/ksuid.git", {ref,"315e954846ec5f8d300bec52b88aca60015a9bc2"}}, 2}, - {<<"leveled">>, - {git,"https://github.com/martinsumner/leveled.git", - {ref,"c642575caa470c37078907339a5643a5fa5377e7"}}, - 1}, - {<<"lz4">>, - {git,"https://github.com/nhs-riak/erlang-lz4", - {ref,"ca24fd820269047a764fc38272665b6a918dd407"}}, - 2}, {<<"maps_utils">>, {git,"https://github.com/Leapsight/maps_utils.git", {ref,"afa2da62e0e691ce33d1008cc424c994fff339bf"}}, 2}, {<<"memory">>, {git,"https://github.com/Leapsight/memory.git", - {ref,"53ec8d53454912015346d712f4ece5eaeb162b0c"}}, + {ref,"09fac9e5aab8411380adc8b8c88a23bb5a7aef22"}}, 1}, {<<"opentelemetry_api">>, {git,"https://github.com/open-telemetry/opentelemetry-erlang-api.git", @@ -53,10 +40,10 @@ 0}, {<<"partisan">>, {git,"http://github.com/lasp-lang/partisan.git", - {ref,"990cfc6aa9c0ff6d4f451714125cd095e8f26fbe"}}, + {ref,"c5309d479b2beddfdd32f1c00338a8a771a745be"}}, 0}, {<<"quickrand">>,{pkg,<<"quickrand">>,<<"2.0.7">>},1}, - {<<"resulto">>,{pkg,<<"resulto">>,<<"0.2.3">>},0}, + {<<"resulto">>,{pkg,<<"resulto">>,<<"0.2.5">>},0}, {<<"sbroker">>, {git,"https://github.com/Leapsight/sbroker.git", {ref,"c17ffb198dcba9719bba42072c50743902a618f2"}}, @@ -67,27 +54,23 @@ {git,"https://github.com/leapsight/utils.git", {ref,"cfd05ab4a02c2bfbd0ab16ae233de099a5710e21"}}, 1}, - {<<"uuid">>,{pkg,<<"uuid_erl">>,<<"2.0.7">>},0}, - {<<"zstd">>, - {git,"https://github.com/nhs-riak/zstd-erlang", - {ref,"08935690642d838d228623720b520d53db978def"}}, - 2}]}. + {<<"uuid">>,{pkg,<<"uuid_erl">>,<<"2.0.7">>},0}]}. [ {pkg_hash,[ {<<"cf">>, <<"5CB902239476E141EA70A740340233782D363A31EEA8AD37049561542E6CD641">>}, - {<<"erlware_commons">>, <<"00F336AE1AB5880253F64217A3988998A54EFFDEB6EF2BFF1A1F8AE667075983">>}, + {<<"erlware_commons">>, <<"444DC0301777CEAE70E1D2AA3C08A511CC08BEA7AE2E7E7F31ADB4C8C3C1BCA8">>}, {<<"gproc">>, <<"AA9EC57F6C9FF065B16D96924168D7C7157CD1FD457680EFE4B1274F456FA500">>}, {<<"quickrand">>, <<"D2BD76676A446E6A058D678444B7FDA1387B813710D1AF6D6E29BB92186C8820">>}, - {<<"resulto">>, <<"876057F56E77B337EB0D0040F272ECAB8FBEB72DDB2FB8C01FF972647E8F19E7">>}, + {<<"resulto">>, <<"FE36BF7A4A0334C35256A0305CE8780BA0368337A99B3E750EA6FB632C5D5A31">>}, {<<"telemetry">>, <<"FEDEBBAE410D715CF8E7062C96A1EF32EC22E764197F70CDA73D82778D61E7A2">>}, {<<"types">>, <<"5782B67231E8C174FE2835395E71E669FE0121076779D2A09F1C0D58EE0E2F13">>}, {<<"uuid">>, <<"B2078D2CC814F53AFA52D36C91E08962C7E7373585C623F4C0EA6DFB04B2AF94">>}]}, {pkg_hash_ext,[ {<<"cf">>, <<"315E8D447D3A4B02BCDBFA397AD03BBB988A6E0AA6F44D3ADD0F4E3C3BF97672">>}, - {<<"erlware_commons">>, <<"C0850EF4FFC031BA61393392316B512EBADF731C66F6E1E3FB014F35F877B33B">>}, + {<<"erlware_commons">>, <<"58CF939C7F173FFEC383A251EE74DCBD13FB27014B1721D224C7583A275965F9">>}, {<<"gproc">>, <<"109F253C2787DE8A371A51179D4973230CBEC6239EE673FA12216A5CE7E4F902">>}, {<<"quickrand">>, <<"B8ACBF89A224BC217C3070CA8BEBC6EB236DBE7F9767993B274084EA044D35F0">>}, - {<<"resulto">>, <<"FE11CF48F53D54B280BC2EBC3975E7E7718B5D1FB30DB848B4A9407806969025">>}, + {<<"resulto">>, <<"4A2168C35FFFC6257BD8D566BED26560D053506DF80196B643E2B10F01A88A76">>}, {<<"telemetry">>, <<"7015FC8919DBE63764F4B4B87A95B7C0996BD539E0D499BE6EC9D7F3875B79E6">>}, {<<"types">>, <<"04285239F4954C5EDE56F78ED7778EDE24E3F2E997F7B16402A167AF0CC2658A">>}, {<<"uuid">>, <<"4E4C5CA3461DC47C5E157ED42AA3981A053B7A186792AF972A27B14A9489324E">>}]} From 21a0482d870dd4c4d0ea4faab9a145c8cba92f6d Mon Sep 17 00:00:00 2001 From: Alejandro Miguez Date: Tue, 19 Aug 2025 22:02:14 -0300 Subject: [PATCH 03/14] All these functions (on_merge/1, merge/2, is_stale/1, and sync/2) are CRDT callback functions that should operate on the current partition process instance, not try to send messages to a non-existent globally registered process. --- src/erleans_registry_partition.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/erleans_registry_partition.erl b/src/erleans_registry_partition.erl index 481cf4f..3515bab 100644 --- a/src/erleans_registry_partition.erl +++ b/src/erleans_registry_partition.erl @@ -212,7 +212,7 @@ Implementation of the `bondy_mst_crdt` callback. Removes stale entries and duplicates after merge. """). on_merge(Peer) -> - partisan_gen_server:cast(?MODULE, {crdt_on_merge, Peer}). + partisan_gen_server:cast(self(), {crdt_on_merge, Peer}). %% ============================================================================= %% PARTISAN_PLUMTREE_BROADCAST_HANDLER CALLBACKS @@ -237,7 +237,7 @@ broadcast_data(Gossip) -> -spec merge(GossipId :: gossip_id(), Payload :: bondy_mst_crdt:gossip()) -> boolean(). merge(_Id, Gossip) -> - partisan_gen_server:call(?MODULE, {crdt_merge, Gossip}). + partisan_gen_server:call(self(), {crdt_merge, Gossip}). -spec merge(Peer :: node(), Root :: bondy_mst:hash(), Payload :: bondy_mst_crdt:gossip()) -> boolean(). merge(Peer, _Root, Gossip) -> @@ -245,7 +245,7 @@ merge(Peer, _Root, Gossip) -> -spec is_stale(gossip_id()) -> boolean(). is_stale({Peer, Root}) -> - ok = partisan_gen_server:cast(?MODULE, {crdt_maybe_merge, Peer, Root}), + ok = partisan_gen_server:cast(self(), {crdt_maybe_merge, Peer, Root}), true. -spec graft(gossip_id()) -> stale | {ok, bondy_mst_crdt:gossip()} | {error, term()}. @@ -267,7 +267,7 @@ sync(Peer) -> sync(Peer, #{}). sync(Peer, Opts) -> - partisan_gen_server:call(?MODULE, {crdt_trigger, Peer, Opts}). + partisan_gen_server:call(self(), {crdt_trigger, Peer, Opts}). %% ============================================================================= %% PARTISAN_GEN_SERVER BEHAVIOR CALLBACKS From cdbfb221037e5f2c32198bd56d8d685467da9c16 Mon Sep 17 00:00:00 2001 From: Alejandro Miguez Date: Tue, 19 Aug 2025 22:32:33 -0300 Subject: [PATCH 04/14] Each partition registers itself with a unique name: {erleans_registry_partition, PartitionId} --- src/erleans_registry_partition.erl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/erleans_registry_partition.erl b/src/erleans_registry_partition.erl index 3515bab..04f321d 100644 --- a/src/erleans_registry_partition.erl +++ b/src/erleans_registry_partition.erl @@ -127,7 +127,8 @@ Starts a partition server with given ID and registers it in gproc_pool. """). -spec start_link(pos_integer(), atom()) -> {ok, pid()} | {error, term()}. start_link(PartitionId, PoolName) -> - partisan_gen_server:start_link(?MODULE, [PartitionId, PoolName], ?OPTS). + Name = {?MODULE, PartitionId}, + partisan_gen_server:start_link({local, Name}, ?MODULE, [PartitionId, PoolName], ?OPTS). ?DOC(""" Registers a grain in this specific partition. From 02e1e7aa8254b334ef510c677df402e91264047f Mon Sep 17 00:00:00 2001 From: Alejandro Miguez Date: Tue, 19 Aug 2025 22:42:07 -0300 Subject: [PATCH 05/14] Each partition registers itself with a unique name: list_to_atom("erleans_registry_partition_" ++ integer_to_list(PartitionId)) --- src/erleans_registry_partition.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/erleans_registry_partition.erl b/src/erleans_registry_partition.erl index 04f321d..9024156 100644 --- a/src/erleans_registry_partition.erl +++ b/src/erleans_registry_partition.erl @@ -127,7 +127,7 @@ Starts a partition server with given ID and registers it in gproc_pool. """). -spec start_link(pos_integer(), atom()) -> {ok, pid()} | {error, term()}. start_link(PartitionId, PoolName) -> - Name = {?MODULE, PartitionId}, + Name = list_to_atom("erleans_registry_partition_" ++ integer_to_list(PartitionId)), partisan_gen_server:start_link({local, Name}, ?MODULE, [PartitionId, PoolName], ?OPTS). ?DOC(""" From b11be63116e792a6ee4ef782e09b0ccd220b5ae2 Mon Sep 17 00:00:00 2001 From: Alejandro Miguez Date: Tue, 19 Aug 2025 22:53:08 -0300 Subject: [PATCH 06/14] Fix an inconsistency error between the registry supervisor and partitions workers with the registered names --- src/erleans_registry_partition.erl | 12 ++++++++++-- src/erleans_registry_sup.erl | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/erleans_registry_partition.erl b/src/erleans_registry_partition.erl index 9024156..bb96318 100644 --- a/src/erleans_registry_partition.erl +++ b/src/erleans_registry_partition.erl @@ -71,6 +71,7 @@ This is stored on `bondy_mst`. %% API - Partition-specific functions (called by erleans_pm router) -export([start_link/2]). +-export([partition_name/1]). -export([register_name/2]). -export([unregister_name/2]). -export([whereis_name/2]). @@ -122,12 +123,19 @@ This is stored on `bondy_mst`. %% API %% ============================================================================= +?DOC(""" +Returns the registered name for a partition ID. +"""). +-spec partition_name(pos_integer()) -> atom(). +partition_name(PartitionId) -> + list_to_atom("erleans_registry_partition_" ++ integer_to_list(PartitionId)). + ?DOC(""" Starts a partition server with given ID and registers it in gproc_pool. """). -spec start_link(pos_integer(), atom()) -> {ok, pid()} | {error, term()}. start_link(PartitionId, PoolName) -> - Name = list_to_atom("erleans_registry_partition_" ++ integer_to_list(PartitionId)), + Name = partition_name(PartitionId), partisan_gen_server:start_link({local, Name}, ?MODULE, [PartitionId, PoolName], ?OPTS). ?DOC(""" @@ -304,7 +312,7 @@ init([PartitionId, PoolName]) -> end, store => bondy_mst_ets_store, store_opts => #{ - name => atom_to_binary(list_to_atom("erleans_registry_partition_" ++ integer_to_list(PartitionId))), + name => atom_to_binary(partition_name(PartitionId)), persistent => true }, callback_mod => ?MODULE, diff --git a/src/erleans_registry_sup.erl b/src/erleans_registry_sup.erl index 9b42414..c51178c 100644 --- a/src/erleans_registry_sup.erl +++ b/src/erleans_registry_sup.erl @@ -68,7 +68,7 @@ init([]) -> %% Create child specs for N partition workers Children = [ #{ - id => {erleans_registry_partition, PartitionId}, + id => erleans_registry_partition:partition_name(PartitionId), start => {erleans_registry_partition, start_link, [PartitionId, ?POOL_NAME]}, restart => permanent, shutdown => 5000, From 146c09d36d23911738089903ea46239a7cd960ac Mon Sep 17 00:00:00 2001 From: Alejandro Miguez Date: Tue, 19 Aug 2025 23:26:09 -0300 Subject: [PATCH 07/14] CRDT callbacks - All broadcast to all partitions for global coordination --- src/erleans_registry_partition.erl | 36 +++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/src/erleans_registry_partition.erl b/src/erleans_registry_partition.erl index bb96318..378ca1f 100644 --- a/src/erleans_registry_partition.erl +++ b/src/erleans_registry_partition.erl @@ -207,13 +207,24 @@ Implementation of the `bondy_mst_crdt` callback. Casts message `Message` to this server on node `Peer` using `partisan`. """). send(Peer, Message) -> - partisan_gen_server:cast({?MODULE, Peer}, {crdt_message, Message}). + %% Send to all partitions since this is CRDT coordination + Pids = erleans_pm:get_all_partition_pids(), + [partisan_gen_server:cast(Pid, {crdt_message, Message}) || Pid <- Pids], + ok. ?DOC(""" Implementation of the `bondy_mst_crdt` callback. Broadcasts message `Gossip` to peers using Plumtree. """). broadcast(Gossip) -> + %% For broadcast, we can either: + %% 1. Route based on gossip data peer, or + %% 2. Broadcast to all partitions + %% Let's use all partitions since broadcast should be global + Pids = erleans_pm:get_all_partition_pids(), + lists:foreach(fun(Pid) -> + partisan_gen_server:cast(Pid, {crdt_broadcast, Gossip}) + end, Pids), partisan:broadcast(Gossip, ?MODULE). ?DOC(""" @@ -221,7 +232,9 @@ Implementation of the `bondy_mst_crdt` callback. Removes stale entries and duplicates after merge. """). on_merge(Peer) -> - partisan_gen_server:cast(self(), {crdt_on_merge, Peer}). + %% Notify all partitions about merge completion + Pids = erleans_pm:get_all_partition_pids(), + [partisan_gen_server:cast(Pid, {crdt_on_merge, Peer}) || Pid <- Pids]. %% ============================================================================= %% PARTISAN_PLUMTREE_BROADCAST_HANDLER CALLBACKS @@ -246,7 +259,11 @@ broadcast_data(Gossip) -> -spec merge(GossipId :: gossip_id(), Payload :: bondy_mst_crdt:gossip()) -> boolean(). merge(_Id, Gossip) -> - partisan_gen_server:call(self(), {crdt_merge, Gossip}). + %% Merge on all partitions since each has its own CRDT + Pids = erleans_pm:get_all_partition_pids(), + Results = [partisan_gen_server:call(Pid, {crdt_merge, Gossip}) || Pid <- Pids], + %% Return true if any partition handled it successfully + lists:any(fun(R) -> R =:= true end, Results). -spec merge(Peer :: node(), Root :: bondy_mst:hash(), Payload :: bondy_mst_crdt:gossip()) -> boolean(). merge(Peer, _Root, Gossip) -> @@ -254,7 +271,9 @@ merge(Peer, _Root, Gossip) -> -spec is_stale(gossip_id()) -> boolean(). is_stale({Peer, Root}) -> - ok = partisan_gen_server:cast(self(), {crdt_maybe_merge, Peer, Root}), + %% Check staleness across all partitions + Pids = erleans_pm:get_all_partition_pids(), + [partisan_gen_server:cast(Pid, {crdt_maybe_merge, Peer, Root}) || Pid <- Pids], true. -spec graft(gossip_id()) -> stale | {ok, bondy_mst_crdt:gossip()} | {error, term()}. @@ -276,7 +295,14 @@ sync(Peer) -> sync(Peer, #{}). sync(Peer, Opts) -> - partisan_gen_server:call(self(), {crdt_trigger, Peer, Opts}). + %% Trigger sync on all partitions + Pids = erleans_pm:get_all_partition_pids(), + Results = [partisan_gen_server:call(Pid, {crdt_trigger, Peer, Opts}) || Pid <- Pids], + %% Return the first successful result + case lists:dropwhile(fun({error, _}) -> true; (_) -> false end, Results) of + [First | _] -> First; + [] -> {error, no_partitions_available} + end. %% ============================================================================= %% PARTISAN_GEN_SERVER BEHAVIOR CALLBACKS From afb4eea5e76f313be7514cf900433d30fd21c961 Mon Sep 17 00:00:00 2001 From: Alejandro Miguez Date: Wed, 20 Aug 2025 13:41:05 -0300 Subject: [PATCH 08/14] Upgrade bondy_mst version supporting MFA config used in erleans_registry_partition --- rebar.config | 2 +- rebar.lock | 2 +- src/erleans_pm.erl | 74 ++++--- src/erleans_registry_partition.erl | 306 +++++++++++++++++++++++++---- 4 files changed, 318 insertions(+), 66 deletions(-) diff --git a/rebar.config b/rebar.config index 58b4e15..090ef69 100644 --- a/rebar.config +++ b/rebar.config @@ -25,7 +25,7 @@ {bondy_mst,{ git, "https://github.com/bondy-io/bondy_mst.git", - {branch, "without-rocksdb"} + {branch, "feature/alejandro-miguez.add_mfa_support"} }}, {types, "0.1.8"}, {sbroker, { diff --git a/rebar.lock b/rebar.lock index b8ac54f..ea86672 100644 --- a/rebar.lock +++ b/rebar.lock @@ -13,7 +13,7 @@ 1}, {<<"bondy_mst">>, {git,"https://github.com/bondy-io/bondy_mst.git", - {ref,"4a0da7062ac8435a8e3f59d3d58c1e5476ec1ec6"}}, + {ref,"2c9419bb7424e8b175bcbb057784b8088ea222d8"}}, 0}, {<<"cf">>,{pkg,<<"cf">>,<<"0.3.1">>},1}, {<<"erlware_commons">>,{pkg,<<"erlware_commons">>,<<"1.8.1">>},0}, diff --git a/src/erleans_pm.erl b/src/erleans_pm.erl index 33a9901..40ca5d5 100644 --- a/src/erleans_pm.erl +++ b/src/erleans_pm.erl @@ -48,11 +48,10 @@ partition for each grain based on its grain_key(). -export([to_list/1]). -export([lookup/1]). -export([info/0]). --export([sync/1]). --export([sync/2]). %% Partition selection -export([select_partition/1]). +-export([get_all_partition_pids/0]). %% TEST API -ifdef(TEST). @@ -62,21 +61,36 @@ partition for each grain based on its grain_key(). -export([unregister_name_/2]). -endif. + + %% ============================================================================= %% API %% ============================================================================= + + ?DOC(""" Starts the registry supervisor which manages N partition processes. """). -spec start_link() -> {ok, pid()} | {error, term()}. + start_link() -> erleans_registry_sup:start_link(). + ?DOC(""" Registers the calling process with the `grain_key()` derived from its `erleans:grain_ref()`. +The duplicate check logic is executed by the caller concurrently while the +actual registration is serialised via the `erleans_pm` server process. + +Returns an error with the following reasons: +* `badgrain` if the calling process is not an Erleans grain. +* `timeout` if there was no response from the server within the requested time +* `{already_in_use, partisan_remote_ref:p()}` if there is already a process +registered for the same `grain_key()`. + Routes the call to the appropriate partition based on consistent hashing. """). -spec register_name() -> @@ -85,14 +99,17 @@ Routes the call to the appropriate partition based on consistent hashing. | {error, timeout} | {error, noproc} | {error, {already_in_use, partisan_remote_ref:p()}}. + register_name() -> register_name(?TIMEOUT). + -spec register_name(timeout()) -> ok | {error, badgrain} | {error, timeout} | {error, {already_in_use, partisan_remote_ref:p()}}. + register_name(_Timeout) -> case erleans:grain_ref() of undefined -> @@ -102,10 +119,12 @@ register_name(_Timeout) -> erleans_registry_partition:register_name(PartitionPid, GrainRef) end. + ?DOC(""" Unregisters a grain from the appropriate partition. """). -spec unregister_name() -> ok | {error, badgrain}. + unregister_name() -> case erleans:grain_ref() of undefined -> @@ -115,33 +134,41 @@ unregister_name() -> erleans_registry_partition:unregister_name(PartitionPid, GrainRef) end. + ?DOC(""" Returns a process reference for `GrainRef` from the appropriate partition. """). -spec whereis_name(GrainRef :: erleans:grain_ref()) -> partisan_remote_ref:p() | undefined. + whereis_name(GrainRef) -> whereis_name(GrainRef, [safe]). + -spec whereis_name(GrainRef :: erleans:grain_ref(), Opts :: [safe | unsafe]) -> partisan_remote_ref:p() | undefined. + whereis_name(GrainRef, Opts) -> PartitionPid = select_partition(GrainRef), erleans_registry_partition:whereis_name(PartitionPid, GrainRef, Opts). + ?DOC(""" Lookups all registered grains under name `GrainRef` from the appropriate partition. """). -spec lookup(GrainRef :: erleans:grain_ref()) -> [partisan_remote_ref:p()]. + lookup(GrainRef) -> PartitionPid = select_partition(GrainRef), erleans_registry_partition:lookup(PartitionPid, GrainRef). + ?DOC(""" Returns the `erleans:grain_ref` for a pid or process reference. """). -spec grain_ref(partisan:any_pid()) -> {ok, erleans:grain_ref()} | {error, timeout | any()}. + grain_ref(ProcRef) -> %% For grain_ref lookup, we need to try all partitions since we don't know %% which partition the process belongs to. We can optimize this later. @@ -153,14 +180,18 @@ grain_ref(ProcRef) -> end end). + ?DOC(""" Returns the list of all registry entries from all partitions. """). -spec to_list() -> [{grain_key(), partisan_remote_ref:p()}]. + to_list() -> to_list([safe]). + -spec to_list([safe | unsafe]) -> [{grain_key(), partisan_remote_ref:p()}]. + to_list(Opts) -> %% Collect results from all partitions Workers = get_all_partition_pids(), @@ -169,6 +200,7 @@ to_list(Opts) -> || PartitionPid <- Workers ]). + ?DOC(""" Returns information from all partitions. """). @@ -183,33 +215,18 @@ info() -> partitions => PartitionInfos }. -?DOC(""" -Triggers synchronization with a peer across all partitions. -"""). --spec sync(node()) -> [ok | {error, term()}]. -sync(Peer) -> - sync(Peer, #{}). - --spec sync(node(), map()) -> [ok | {error, term()}]. -sync(Peer, Opts) -> - Workers = get_all_partition_pids(), - [ - try - partisan_gen_server:call(PartitionPid, {crdt_trigger, Peer, Opts}) - catch - _:Reason -> {error, Reason} - end - || PartitionPid <- Workers - ]. %% ============================================================================= %% PARTITION SELECTION %% ============================================================================= + + ?DOC(""" Selects the appropriate partition for a given GrainRef using gproc_pool. """). -spec select_partition(erleans:grain_ref()) -> pid(). + select_partition(GrainRef) -> GrainKey = grain_key(GrainRef), case gproc_pool:pick_worker(erleans_registry_pool, GrainKey) of @@ -219,22 +236,24 @@ select_partition(GrainRef) -> Pid end. + + %% ============================================================================= %% PRIVATE %% ============================================================================= + + %% @private -spec grain_key(erleans:grain_ref()) -> {term(), module()}. + grain_key(#{id := Id, implementing_module := Mod}) -> {Id, Mod}. -%% @private --spec get_num_partitions() -> pos_integer(). -get_num_partitions() -> - erleans_config:get(pm_partitions, 1). %% @private -spec get_all_partition_pids() -> [pid()]. + get_all_partition_pids() -> case gproc_pool:active_workers(erleans_registry_pool) of [] -> @@ -243,13 +262,16 @@ get_all_partition_pids() -> [Pid || {_, Pid} <- Workers] end. + %% @private -spec try_all_partitions(fun((pid()) -> continue | {found, term()} | {error, term()})) -> {ok, term()} | {error, not_found}. + try_all_partitions(Fun) -> Workers = get_all_partition_pids(), try_partitions(Fun, Workers). + %% @private try_partitions(_Fun, []) -> {error, not_found}; @@ -269,10 +291,14 @@ try_partitions(Fun, [PartitionPid | Rest]) -> try_partitions(Fun, Rest) end. + + %% ============================================================================= %% TEST %% ============================================================================= + + -ifdef(TEST). %% For testing - route to appropriate partition diff --git a/src/erleans_registry_partition.erl b/src/erleans_registry_partition.erl index 378ca1f..abb437c 100644 --- a/src/erleans_registry_partition.erl +++ b/src/erleans_registry_partition.erl @@ -84,6 +84,7 @@ This is stored on `bondy_mst`. %% BONDY_MST_CRDT CALLBACKS -export([broadcast/1]). +-export([crdt_callback/3]). -export([on_merge/1]). -export([send/2]). -export([sync/1]). @@ -119,197 +120,357 @@ This is stored on `bondy_mst`. -compile({no_auto_import, [demonitor/2]}). + %% ============================================================================= %% API %% ============================================================================= + + ?DOC(""" Returns the registered name for a partition ID. """). -spec partition_name(pos_integer()) -> atom(). + partition_name(PartitionId) -> list_to_atom("erleans_registry_partition_" ++ integer_to_list(PartitionId)). + ?DOC(""" Starts a partition server with given ID and registers it in gproc_pool. """). -spec start_link(pos_integer(), atom()) -> {ok, pid()} | {error, term()}. + start_link(PartitionId, PoolName) -> Name = partition_name(PartitionId), partisan_gen_server:start_link({local, Name}, ?MODULE, [PartitionId, PoolName], ?OPTS). + ?DOC(""" Registers a grain in this specific partition. """). -spec register_name(pid(), erleans:grain_ref()) -> - ok | {error, badgrain} | {error, timeout} | {error, {already_in_use, partisan_remote_ref:p()}}. + ok + | {error, badgrain} + | {error, timeout} + | {error, {already_in_use, partisan_remote_ref:p()}}. + register_name(PartitionPid, GrainRef) -> partisan_gen_server:call(PartitionPid, {register_name, GrainRef}, ?TIMEOUT). + ?DOC(""" Unregisters a grain from this specific partition. """). -spec unregister_name(pid(), erleans:grain_ref()) -> ok | {error, badgrain}. + unregister_name(PartitionPid, GrainRef) -> partisan_gen_server:call(PartitionPid, {unregister_name, GrainRef}). + ?DOC(""" Returns a process reference for GrainRef from this partition. """). -spec whereis_name(pid(), erleans:grain_ref()) -> partisan_remote_ref:p() | undefined. + whereis_name(PartitionPid, GrainRef) -> whereis_name(PartitionPid, GrainRef, [safe]). + ?DOC(""" Returns a process reference for GrainRef from this partition with options. """). -spec whereis_name(pid(), erleans:grain_ref(), [safe | unsafe]) -> partisan_remote_ref:p() | undefined. + whereis_name(PartitionPid, GrainRef, Opts) -> partisan_gen_server:call(PartitionPid, {whereis_name, GrainRef, Opts}). + ?DOC(""" Lookups all registered grains under name GrainRef in this partition. """). -spec lookup(pid(), erleans:grain_ref()) -> [partisan_remote_ref:p()]. + lookup(PartitionPid, GrainRef) -> partisan_gen_server:call(PartitionPid, {lookup, GrainRef}). + ?DOC(""" Returns the grain_ref for a process reference from this partition. """). -spec grain_ref(pid(), partisan:any_pid()) -> {ok, erleans:grain_ref()} | {error, timeout | any()}. + grain_ref(PartitionPid, ProcRef) -> partisan_gen_server:call(PartitionPid, {grain_ref, ProcRef}). + ?DOC(""" Returns list of all registry entries from this partition. """). -spec to_list(pid()) -> [{grain_key(), partisan_remote_ref:p()}]. + to_list(PartitionPid) -> to_list(PartitionPid, [safe]). -spec to_list(pid(), [safe | unsafe]) -> [{grain_key(), partisan_remote_ref:p()}]. + to_list(PartitionPid, Opts) -> partisan_gen_server:call(PartitionPid, {to_list, Opts}). + info(PartitionPid) -> partisan_gen_server:call(PartitionPid, info). + + %% ============================================================================= %% BONDY_MST_CRDT CALLBACKS %% ============================================================================= + + +?DOC(""" +CRDT callback router that routes to specific partition by name. +This function is called by bondy_mst_crdt with callback_mfa pattern. +The callback_mfa calls this function as: crdt_callback(PartitionName, Function, Args) +where Args is the list of arguments passed to the callback. +"""). +%% CRDT callback router - handles callback_mfa pattern correctly +crdt_callback(PartitionName, Function, Args) -> + ?LOG_DEBUG("CRDT callback: partition=~p, function=~p, args=~p", [PartitionName, Function, Args]), + + case Function of + send -> + [Peer, Message] = Args, + send_to_partition(PartitionName, Peer, Message); + broadcast -> + [Gossip] = Args, + broadcast_gossip(Gossip); + on_merge -> + [Peer] = Args, + on_merge_partition(PartitionName, Peer); + _ -> + error({unknown_crdt_callback, Function, Args}) + end. + + ?DOC(""" Implementation of the `bondy_mst_crdt` callback. Casts message `Message` to this server on node `Peer` using `partisan`. """). -send(Peer, Message) -> - %% Send to all partitions since this is CRDT coordination - Pids = erleans_pm:get_all_partition_pids(), - [partisan_gen_server:cast(Pid, {crdt_message, Message}) || Pid <- Pids], - ok. +send_to_partition(PartitionName, Peer, Message) -> + partisan_gen_server:cast({PartitionName, Peer}, {crdt_message, Message}). + ?DOC(""" Implementation of the `bondy_mst_crdt` callback. -Broadcasts message `Gossip` to peers using Plumtree. +Broadcasts message `Gossip` to peers using Plumtree (Epidemis broadcast trees). """). -broadcast(Gossip) -> - %% For broadcast, we can either: - %% 1. Route based on gossip data peer, or - %% 2. Broadcast to all partitions - %% Let's use all partitions since broadcast should be global - Pids = erleans_pm:get_all_partition_pids(), - lists:foreach(fun(Pid) -> - partisan_gen_server:cast(Pid, {crdt_broadcast, Gossip}) - end, Pids), +broadcast_gossip(Gossip) -> partisan:broadcast(Gossip, ?MODULE). + ?DOC(""" Implementation of the `bondy_mst_crdt` callback. Removes stale entries and duplicates after merge. """). +on_merge_partition(PartitionName, Peer) -> + partisan_gen_server:cast(PartitionName, {crdt_on_merge, Peer}). + + +%% Legacy callback functions - kept for backward compatibility +send(Peer, Message) -> + partisan_gen_server:cast({?MODULE, Peer}, {crdt_message, Message}). + +broadcast(Gossip) -> + partisan:broadcast(Gossip, ?MODULE). + on_merge(Peer) -> - %% Notify all partitions about merge completion - Pids = erleans_pm:get_all_partition_pids(), - [partisan_gen_server:cast(Pid, {crdt_on_merge, Peer}) || Pid <- Pids]. + partisan_gen_server:cast(?MODULE, {crdt_on_merge, Peer}). + + %% ============================================================================= %% PARTISAN_PLUMTREE_BROADCAST_HANDLER CALLBACKS %% ============================================================================= + + ?DOC(""" Implementation of the `partisan_plumtree_backend` callback. Returns the channel to be used when broadcasting. """). -spec broadcast_channel() -> partisan:channel(). + broadcast_channel() -> application:get_env(erleans, partisan_broadcast_channel, undefined). + ?DOC(""" Implementation of the `partisan_plumtree_backend` callback. +Deconstructs a broadcast that is sent using `broadcast/2` returning the message +id and payload. + +> This function is part of the implementation of the +partisan_plumtree_broadcast_handler behaviour. +> You should never call it directly. """). -spec broadcast_data(Gossip :: bondy_mst_crdt:gossip()) -> - {MessageId :: {bondy_mst_crdt:node_id(), bondy_mst:hash()}, Payload :: bondy_mst_crdt:gossip()}. + { + MessageId :: {bondy_mst_crdt:node_id(), bondy_mst:hash()}, + Payload :: bondy_mst_crdt:gossip() + }. + broadcast_data(Gossip) -> #{from := Peer, root := Root} = bondy_mst_crdt:gossip_data(Gossip), {{Peer, Root}, Gossip}. --spec merge(GossipId :: gossip_id(), Payload :: bondy_mst_crdt:gossip()) -> boolean(). + +?DOC(""" +Implementation of the `partisan_plumtree_backend` callback. +Merges a remote copy of an object record sent via broadcast w/ the +local view for the key contained in the message id. If the remote copy is +causally older than the current data stored then `false` is returned and no +updates are merged. Otherwise, the remote copy is merged (possibly +generating siblings) and `true` is returned. + +> This function is part of the implementation of the +partisan_plumtree_broadcast_handler behaviour. +> You should never call it directly. +"""). +-spec merge(GossipId :: gossip_id(), Payload :: bondy_mst_crdt:gossip()) -> + boolean(). + merge(_Id, Gossip) -> - %% Merge on all partitions since each has its own CRDT - Pids = erleans_pm:get_all_partition_pids(), - Results = [partisan_gen_server:call(Pid, {crdt_merge, Gossip}) || Pid <- Pids], - %% Return true if any partition handled it successfully - lists:any(fun(R) -> R =:= true end, Results). + partisan_gen_server:call(?MODULE, {crdt_merge, Gossip}). + + +?DOC(""" +Implementation of the `partisan_plumtree_backend` callback. +Same as merge/2 but merges the object on `Node' + +> This function is part of the implementation of the +partisan_plumtree_broadcast_handler behaviour. +> You should never call it directly. +"""). +-spec merge( + Peer :: node(), + Root :: bondy_mst:hash(), + Payload :: bondy_mst_crdt:gossip()) -> boolean(). --spec merge(Peer :: node(), Root :: bondy_mst:hash(), Payload :: bondy_mst_crdt:gossip()) -> boolean(). merge(Peer, _Root, Gossip) -> partisan_gen_server:call({?MODULE, Peer}, {crdt_merge, Gossip}). + +?DOC(""" +Implementation of the `partisan_plumtree_backend` callback. +When a peer broadcasts a message it does it to the nodes in its eager-push set +only, but also simultaneously sends I_HAVE notifications to nodes in its +lazy-push set instead of the entire message. This callback is the one that +Plumtree calls when receiving an I_HAVE message. + +The main idea is: +“I have seen a broadcast message with this root. If you need it, let me know.” + +This saves bandwidth, because instead of blindly sending every neighbor the full +payload, the node sends just the root hash. The lazy neighbors can decide +whether they need the full message or not. + +If function returns `true` then Plumtree will do nothing. However,if it returns +`false` then Plumtree will `graft` the message from the peer and send it to us. + +> This function is part of the implementation of the +partisan_plumtree_broadcast_handler behaviour. +> You should never call it directly. +"""). -spec is_stale(gossip_id()) -> boolean(). + is_stale({Peer, Root}) -> - %% Check staleness across all partitions - Pids = erleans_pm:get_all_partition_pids(), - [partisan_gen_server:cast(Pid, {crdt_maybe_merge, Peer, Root}) || Pid <- Pids], + %% In our case the I_HAVE message is the root of the peer's tree, so we + %% always return `true` signaling Plumtree that we do not need the message, + %% and we send ourself a message to potentially init a merge with the peer + %% i.e. in this case we take the job of synchonising the CRDT in out hands + %% instead of relying on Plumtree. + ok = partisan_gen_server:cast(?MODULE, {crdt_maybe_merge, Peer, Root}), true. --spec graft(gossip_id()) -> stale | {ok, bondy_mst_crdt:gossip()} | {error, term()}. + +?DOC(""" +Implementation of the `partisan_plumtree_backend` callback. +In Plumtree this is used to return the object associated with the given prefixed +message id if the currently stored version has an equal context. Otherwise +returning the atom `stale`. + +Because it assumes that a grafted context can only be causally older than +the local view, a `stale` response means there is another message that +subsumes the grafted one. + +> This function is part of the implementation of the +partisan_plumtree_broadcast_handler behaviour. +> You should never call it directly. +"""). +-spec graft(gossip_id()) -> + stale | {ok, bondy_mst_crdt:gossip()} | {error, term()}. + graft({_Peer, _Root}) -> + %% In our case, the message_id is just the peer's root hash, so in case + %% we contain the root we return a Gossip message with our root. Otherwise + %% we return 'stale'. + %% partisan_gen_server:call(?MODULE, {crdt_graft, Peer, Root}). {error, disabled}. + +?DOC(""" +Calls `sync/1`. +"""). -spec exchange(node()) -> {ok, pid()} | {error, term()}. + exchange(Peer) -> exchange(Peer, #{}). + +?DOC(""" +Calls `sync/2`. +"""). -spec exchange(node(), map()) -> ok | {error, term()}. + exchange(Peer, Opts) -> sync(Peer, Opts). ?DOC(""" Triggers a synchronisation exchange with a peer. +Calls `exchange/2` with an empty map as the second argument. """). +-spec sync(node()) -> {ok, pid()} | {error, term()}. + sync(Peer) -> sync(Peer, #{}). + +?DOC(""" +Triggers a synchronisation exchange with a peer. +"""). +-spec sync(node(), map()) -> ok | {error, term()}. + sync(Peer, Opts) -> - %% Trigger sync on all partitions - Pids = erleans_pm:get_all_partition_pids(), - Results = [partisan_gen_server:call(Pid, {crdt_trigger, Peer, Opts}) || Pid <- Pids], - %% Return the first successful result - case lists:dropwhile(fun({error, _}) -> true; (_) -> false end, Results) of - [First | _] -> First; - [] -> {error, no_partitions_available} - end. + partisan_gen_server:call(?MODULE, {crdt_trigger, Peer, Opts}). + + %% ============================================================================= %% PARTISAN_GEN_SERVER BEHAVIOR CALLBACKS %% ============================================================================= + + -spec init([pos_integer() | atom()]) -> {ok, State :: t()}. + init([PartitionId, PoolName]) -> + %% Trap exists otherwise terminate/1 won't be called when shutdown by + %% supervisor. erlang:process_flag(trap_exit, true), MonitorTab = ?MONITOR_TAB(PartitionId), @@ -326,10 +487,12 @@ init([PartitionId, PoolName]) -> ] ), + %% We monitor all nodes so that we can cleanup our view of the registry partisan:monitor_nodes(true), {channel, Channel} = lists:keyfind(channel, 1, partisan_gen:get_opts()), + %% We wrap the tree using the exchange module Node = partisan:node(), Opts = #{ hash_algorithm => sha256, @@ -341,7 +504,9 @@ init([PartitionId, PoolName]) -> name => atom_to_binary(partition_name(PartitionId)), persistent => true }, - callback_mod => ?MODULE, + %% CRDT opts + %% Use callback_mfa to route calls to this specific partition instance + callback_mfa => {?MODULE, crdt_callback, [partition_name(PartitionId)]}, max_merges => 1, max_merges_per_root => 1, max_versions => 10, @@ -350,9 +515,14 @@ init([PartitionId, PoolName]) -> consistency_model => eventual }, + %% We create an ets-based MST bound to this process. + %% The ets table will be garbage collected if this process terminates. CRDT = bondy_mst_crdt:new(Node, Opts), Tree = bondy_mst_crdt:tree(CRDT), + %% ets-based trees support read_concurrency (option store_opts.persistent) + %% so we can cache and share it using persistent_term to avoid a call to + %% this process. ok = persistent_term:put(?PERSISTENT_KEY(PartitionId), Tree), State = #state{ @@ -373,25 +543,43 @@ init([PartitionId, PoolName]) -> {ok, State, {continue, monitor_existing}}. + handle_continue(monitor_existing, #state{partition_id = PartitionId} = State0) -> MonitorTab = ?MONITOR_TAB(PartitionId), + %% This prevents any grain to be registered as we are blocking the server + %% until we finish. + %% We fold the claimed ?MONITOR_TAB table to find any existing + %% registrations. In case the table is new, it would be empty. Otherwise, + %% we would iterate over registrations that were done by a previous + %% instance of this server before it crashed. + %% We re-register/monitor alive pids and remove dead ones. Fun = fun ({Pid, GrainRef, _OldMRef}, Acc0) -> case erlang:is_process_alive(Pid) of true -> + %% The process is still alive, but the monitor has died with + %% the previous instance of this gen_server, so we monitor + %% again. We use relaxed mode which allows us to update the + %% existing registration on ?MONITOR_TAB and the MST. {_, Acc} = do_register_name(Acc0, GrainRef, Pid, relaxed), Acc; false -> + %% The process has died, so we unregister. This will also + %% remove the registration from the MST. {_, Acc} = do_unregister_name(Acc0, GrainRef, Pid), Acc end end, State = lists:foldl(Fun, State0, ets:tab2list(MonitorTab)), + + %% We should now have all existing local grains re-registered on this + %% server and broadcast messages sent to cluster peers. {noreply, State}; handle_continue(_, State) -> {noreply, State}. + handle_call({register_name, GrainRef}, {Caller, _}, State0) when is_pid(Caller) -> case lookup_local_pid(State0#state.partition_id, GrainRef) of undefined -> @@ -492,6 +680,15 @@ handle_call({crdt_merge, Gossip}, _From, State) -> Root0 = bondy_mst_crdt:root(CRDT0), CRDT = bondy_mst_crdt:handle(CRDT0, Gossip), Root = bondy_mst_crdt:root(CRDT), + + %% Required by Plumtree. + %% Merges a remote copy of an object record sent via broadcast w/ the + %% local view for the key contained in the message id. If the remote copy is + %% causally older than the current data stored then `false` is returned and + %% no updates are merged. Otherwise, the remote copy is merged (possibly + %% generating siblings) and `true` is returned. + %% Since we will performing a merge if required during + %% bondy_mst_crdt:handle/2 we reply `false`. Reply = Root =/= Root0, {reply, Reply, State#state{crdt = CRDT}}; @@ -598,11 +795,14 @@ terminate(_Reason, #state{partition_id = PartitionId} = State) -> _ = persistent_term:erase(?PERSISTENT_KEY(PartitionId)), ok. + + %% ============================================================================= %% PRIVATE %% ============================================================================= + do_lookup(#state{partition_id = PartitionId, crdt = CRDT}, #{id := _} = GrainRef) -> do_lookup(#state{partition_id = PartitionId, crdt = CRDT}, grain_key(GrainRef)); @@ -670,7 +870,9 @@ lookup_local_pid(PartitionId, GrainRef) -> end. mst_merge_value(PartitionId, GrainKey, AWSet1, AWSet2) -> + %% We merge de CRDTs AWSet3 = state_awset:merge(AWSet1, AWSet2), + %% We remove local grains that have been deactivated AWSet = remove_deactivated(PartitionId, AWSet3), ?LOG_DEBUG(#{ description => "Merged values", @@ -689,6 +891,8 @@ remove_deactivated(PartitionId, AWSet) -> true ?= partisan_remote_ref:is_local(ProcRef), Pid ?= partisan_remote_ref:to_pid(ProcRef), undefined ?= monitor_lookup(PartitionId, Pid), + %% Not monitored so it has been deactivated i.e. the peer node has a + %% stale entry. We remove it from the set. ?LOG_DEBUG(#{ message => "Removing grain from registry", partition_id => PartitionId, @@ -698,8 +902,10 @@ remove_deactivated(PartitionId, AWSet) -> awset_remove(Acc, ProcRef) else false -> + %% Not local, so we ignore it Acc; {_Pid, _GrainRef, _Mref} -> + %% Monitored, so we ignore it Acc end end, @@ -709,13 +915,21 @@ awset_remove(AWSet0, Value) -> {ok, AWSet} = state_type:mutate({rmv, Value}, partisan:node(), AWSet0), AWSet. +%% This function assumes remove_deactivated/2 was called on AWSet before. maybe_deactivate_local_duplicate(_PartitionId, GrainKey, AWSet) -> All = sets:to_list(state_awset:query(AWSet)), maybe + %% Partition based on locality {[ProcRef], [_ | _] = Remotes} ?= lists:partition(fun partisan_remote_ref:is_local/1, All), + %% We have duplicates, so we need to check if our local duplicate should + %% belong here. false ?= safe_is_location_right(GrainKey, ProcRef), + %% The grain should not be here, so we will deactivate but only if + %% we can reach any of the remote duplicates true ?= lists:any(fun is_reachable/1, Remotes), + %% Since at least one remote grain is reachable, we deactivate the + %% local one deactivate_grain(GrainKey, ProcRef) else _ -> @@ -758,6 +972,8 @@ remove_stale(State, Key, AWSet) -> true ?= partisan_remote_ref:is_local(ProcRef), Pid ?= partisan_remote_ref:to_pid(ProcRef), undefined ?= monitor_lookup(Acc#state.partition_id, Pid), + %% Not monitored so it has been deactivated i.e. the peer node has a + %% stale entry. We remove it from the set. ?LOG_DEBUG(#{ message => "Removing grain from registry", process_ref => ProcRef, @@ -887,6 +1103,8 @@ deactivate_grain(GrainKey, ProcRef) -> pid => ProcRef, reason => Reason }), + %% This is an inconsistency, we need to cleanup. + %% We ask the peer to do it, via a private cast (peer can be us) partisan_gen_server:cast( {?MODULE, partisan_remote_ref:node(ProcRef)}, {force_unregister_name, GrainKey, ProcRef} @@ -938,6 +1156,10 @@ pick_alive([H | T]) -> pick_alive([]) -> undefined. + +%% @private +%% Returns a new list where all the process references are know to be +%% reachable. If the remote check fails, it returns false. filter_alive(undefined) -> []; @@ -989,10 +1211,14 @@ unregister_local(#state{partition_id = PartitionId} = State0, Pid) when is_pid(P unregister_local(_, '$end_of_table') -> ok. + + %% ============================================================================= %% TEST %% ============================================================================= + + -ifdef(TEST). register_name_(PartitionPid, GrainRef, ProcRef) -> From e9b4384b7c362782d33d1afd09a7827b57328f8c Mon Sep 17 00:00:00 2001 From: Alejandro Miguez Date: Wed, 20 Aug 2025 15:38:01 -0300 Subject: [PATCH 09/14] Fixes in bondy_mst_crdt & partisan_plumtree callbacks --- src/erleans_pm.erl | 5 +- src/erleans_registry_partition.erl | 75 ++++++++++++++++++++++++++++-- 2 files changed, 75 insertions(+), 5 deletions(-) diff --git a/src/erleans_pm.erl b/src/erleans_pm.erl index 40ca5d5..282ec19 100644 --- a/src/erleans_pm.erl +++ b/src/erleans_pm.erl @@ -52,6 +52,7 @@ partition for each grain based on its grain_key(). %% Partition selection -export([select_partition/1]). -export([get_all_partition_pids/0]). +-export([grain_key/1]). %% TEST API -ifdef(TEST). @@ -244,7 +245,9 @@ select_partition(GrainRef) -> -%% @private +?DOC(""" +Extracts the grain key from a grain reference for consistent hashing. +"""). -spec grain_key(erleans:grain_ref()) -> {term(), module()}. grain_key(#{id := Id, implementing_module := Mod}) -> diff --git a/src/erleans_registry_partition.erl b/src/erleans_registry_partition.erl index abb437c..214f23f 100644 --- a/src/erleans_registry_partition.erl +++ b/src/erleans_registry_partition.erl @@ -345,7 +345,8 @@ partisan_plumtree_broadcast_handler behaviour. boolean(). merge(_Id, Gossip) -> - partisan_gen_server:call(?MODULE, {crdt_merge, Gossip}). + PartitionPid = select_partition_for_gossip(Gossip), + partisan_gen_server:call(PartitionPid, {crdt_merge, Gossip}). ?DOC(""" @@ -362,7 +363,18 @@ partisan_plumtree_broadcast_handler behaviour. Payload :: bondy_mst_crdt:gossip()) -> boolean(). merge(Peer, _Root, Gossip) -> - partisan_gen_server:call({?MODULE, Peer}, {crdt_merge, Gossip}). + %% For remote calls, we need the registered name, not the PID + PartitionPid = select_partition_for_gossip(Gossip), + case erlang:process_info(PartitionPid, registered_name) of + {registered_name, PartitionName} -> + try + partisan_gen_server:call({PartitionName, Peer}, {crdt_merge, Gossip}) + catch + _:_ -> false % Return false if the call fails + end; + undefined -> + false % Return false if partition not registered + end. ?DOC(""" @@ -394,7 +406,22 @@ is_stale({Peer, Root}) -> %% and we send ourself a message to potentially init a merge with the peer %% i.e. in this case we take the job of synchonising the CRDT in out hands %% instead of relying on Plumtree. - ok = partisan_gen_server:cast(?MODULE, {crdt_maybe_merge, Peer, Root}), + %% + %% Since we don't have gossip context here, we broadcast to all partitions + %% Each partition will check if it's stale for this peer/root combination + + + %% TODO: it could be wrong!!!! + %% Option 1: Route to All Partitions (Current - but inefficient) + %% Option 2: Always Return true (Skip Plumtree optimization) + %% Option 3: Use a Single Coordinator Partition + %% Option 4: Hash the Peer Node for Distribution + %% Option 5: initiate a merge with the same partition on ther Peer node + + %% Determine which partition this is by looking at the calling process + {registered_name, PartitionName} = erlang:process_info(self(), registered_name), + %% Cast to the same partition on the peer node + partisan_gen_server:cast({PartitionName, Peer}, {crdt_maybe_merge, partisan:node(), Root}), true. @@ -456,7 +483,24 @@ Triggers a synchronisation exchange with a peer. -spec sync(node(), map()) -> ok | {error, term()}. sync(Peer, Opts) -> - partisan_gen_server:call(?MODULE, {crdt_trigger, Peer, Opts}). + %% Option 1: Parallel async calls (most efficient) + %% Option 2: Parallel calls with timeout + %% Option 3: Sequential calls (least efficient) + %% Trigger sync on all partitions since sync is a global operation + Workers = erleans_pm:get_all_partition_pids(), + [partisan_gen_server:cast(Pid, {crdt_trigger, Peer, Opts}) || Pid <- Workers], + ok. + + % Results = [partisan_gen_server:call(Pid, {crdt_trigger, Peer, Opts}) || Pid <- Workers], + % %% Return ok if any partition succeeded, otherwise return the first error + % case lists:any(fun(Result) -> Result =:= ok end, Results) of + % true -> ok; + % false -> + % case lists:keyfind(error, 1, Results) of + % false -> ok; + % Error -> Error + % end + % end. @@ -1212,6 +1256,29 @@ unregister_local(_, '$end_of_table') -> ok. +%% @private +%% Selects the appropriate partition PID for a gossip message based on the grain key +select_partition_for_gossip(Gossip) -> + #{key := Key} = bondy_mst_crdt:gossip_data(Gossip), + case Key of + undefined -> + %% For sync messages without specific key, use the first partition + [FirstPid | _] = erleans_pm:get_all_partition_pids(), + FirstPid; + {Id, Mod} -> + %% Key is already in grain_key format, create grain_ref and route + GrainRef = #{id => Id, implementing_module => Mod}, + erleans_pm:select_partition(GrainRef); + GrainKey -> + %% Fallback for other key formats - log to understand usage + ?LOG_WARNING("Unexpected grain key format in gossip: ~p", [GrainKey]), + GrainRef = #{id => GrainKey, implementing_module => undefined}, + erleans_pm:select_partition(GrainRef) + end. + + + + %% ============================================================================= %% TEST From bac55bd8653146794630a654d33e1a9c600ac686 Mon Sep 17 00:00:00 2001 From: Alejandro Miguez Date: Wed, 20 Aug 2025 17:24:25 -0300 Subject: [PATCH 10/14] Removing erleans_registry_partition from partisan broadcast_mods --- src/erleans_app.erl | 2 +- src/erleans_registry_partition.erl | 45 ++++++++++++++++++++++++------ src/erleans_registry_sup.erl | 14 +++++----- 3 files changed, 45 insertions(+), 16 deletions(-) diff --git a/src/erleans_app.erl b/src/erleans_app.erl index 8674407..fa60a64 100644 --- a/src/erleans_app.erl +++ b/src/erleans_app.erl @@ -53,7 +53,7 @@ setup_partisan() -> Overrides = #{ broadcast_mods => ordsets:to_list( ordsets:union( - ordsets:from_list([erleans_registry_partition, partisan_plumtree_backend]), + ordsets:from_list([partisan_plumtree_backend]), ordsets:from_list(BroadcastMods) ) ) diff --git a/src/erleans_registry_partition.erl b/src/erleans_registry_partition.erl index 214f23f..cdd0e47 100644 --- a/src/erleans_registry_partition.erl +++ b/src/erleans_registry_partition.erl @@ -240,8 +240,13 @@ where Args is the list of arguments passed to the callback. """). %% CRDT callback router - handles callback_mfa pattern correctly crdt_callback(PartitionName, Function, Args) -> - ?LOG_DEBUG("CRDT callback: partition=~p, function=~p, args=~p", [PartitionName, Function, Args]), - + ?LOG_DEBUG(#{ + message => "CRDT callback", + partition => PartitionName, + function => Function, + args => Args + }), + case Function of send -> [Peer, Message] = Args, @@ -532,6 +537,7 @@ init([PartitionId, PoolName]) -> ), %% We monitor all nodes so that we can cleanup our view of the registry + %% Just start monitoring - partisan handles multiple calls gracefully partisan:monitor_nodes(true), {channel, Channel} = lists:keyfind(channel, 1, partisan_gen:get_opts()), @@ -579,9 +585,18 @@ init([PartitionId, PoolName]) -> %% Connect to gproc_pool immediately case gproc_pool:connect_worker(PoolName, {partition, PartitionId}) of true -> - ?LOG_INFO("Successfully connected partition ~p to pool ~p", [PartitionId, PoolName]); + ?LOG_INFO(#{ + message => "Successfully connected partition", + partition => PartitionId, + pool => PoolName + }); Error -> - ?LOG_ERROR("Failed to connect partition ~p to pool ~p: ~p", [PartitionId, PoolName, Error]), + ?LOG_ERROR(#{ + message => "Failed to connect partition", + partition => PartitionId, + pool => PoolName, + error => Error + }), error({pool_connection_failed, Error}) end, @@ -825,16 +840,27 @@ handle_info({nodeup, _Node}, State) -> {noreply, State}; handle_info({'DOWN', MRef, process, Pid, _Info}, State0) when is_pid(Pid) -> - ?LOG_INFO("Grain down ~p", [{Pid, MRef}]), + ?LOG_INFO(#{message => "Grain down", pid => Pid, mref => MRef}), {_, State} = do_unregister_process(State0, Pid), {noreply, State}; handle_info(Event, State) -> - ?LOG_INFO("Received unknown event ~p", [Event]), + ?LOG_INFO(#{message => "Received unknown event", event => Event}), {noreply, State}. -spec terminate(Reason :: (normal | shutdown | {shutdown, term()} | term()), State :: t()) -> ok. terminate(_Reason, #state{partition_id = PartitionId} = State) -> + %% Only stop monitoring if we're the last partition + try + case length(erleans_pm:get_all_partition_pids()) =< 1 of + true -> + partisan:monitor_nodes(false); + false -> + ok % Other partitions still need monitoring + end + catch + _:_ -> ok % Ignore errors during shutdown + end, ok = unregister_all_local(State), _ = persistent_term:erase(?PERSISTENT_KEY(PartitionId)), ok. @@ -1236,7 +1262,7 @@ unregister_all_local(#state{partition_id = PartitionId} = State) -> catch Class:Reason:Stacktrace -> ?LOG_ERROR(#{ - message => "Unexpected error", + message => "Unexpected error unregistering local grains", class => Class, reason => Reason, stacktrace => Stacktrace @@ -1271,7 +1297,10 @@ select_partition_for_gossip(Gossip) -> erleans_pm:select_partition(GrainRef); GrainKey -> %% Fallback for other key formats - log to understand usage - ?LOG_WARNING("Unexpected grain key format in gossip: ~p", [GrainKey]), + ?LOG_WARNING(#{ + message => "Unexpected grain key format in gossip", + grain_key => GrainKey + }), GrainRef = #{id => GrainKey, implementing_module => undefined}, erleans_pm:select_partition(GrainRef) end. diff --git a/src/erleans_registry_sup.erl b/src/erleans_registry_sup.erl index c51178c..79a079b 100644 --- a/src/erleans_registry_sup.erl +++ b/src/erleans_registry_sup.erl @@ -45,23 +45,23 @@ start_link() -> init([]) -> %% Get number of partitions from configuration N = erleans_config:get(pm_partitions, 1), - - ?LOG_INFO("Starting erleans registry with ~p partitions", [N]), - + + ?LOG_INFO(#{message => "Starting erleans registry with partitions", partitions => N}), + %% Create the gproc_pool first try gproc_pool:new(?POOL_NAME, hash, [{size, N}]), - ?LOG_INFO("Created gproc_pool ~p with size ~p", [?POOL_NAME, N]), + ?LOG_INFO(#{message => "Created gproc_pool", pool => ?POOL_NAME, size => N}), %% Add workers to the pool for each partition [gproc_pool:add_worker(?POOL_NAME, {partition, PartitionId}, PartitionId) || PartitionId <- lists:seq(1, N)], - ?LOG_INFO("Added ~p workers to pool ~p", [N, ?POOL_NAME]) + ?LOG_INFO(#{message => "Added workers to pool", count => N, pool => ?POOL_NAME}) catch error:exists -> - ?LOG_INFO("gproc_pool ~p already exists", [?POOL_NAME]); + ?LOG_INFO(#{message => "gproc_pool already exists", pool => ?POOL_NAME}); Error:Reason -> - ?LOG_ERROR("Failed to create gproc_pool ~p: ~p:~p", [?POOL_NAME, Error, Reason]), + ?LOG_ERROR(#{message => "Failed to create gproc_pool", pool => ?POOL_NAME, error => {Error, Reason}}), error({gproc_pool_creation_failed, Error, Reason}) end, From 63aac6ca172ce7ec854ec2aa4a35ce376710f41a Mon Sep 17 00:00:00 2001 From: Alejandro Miguez Date: Wed, 20 Aug 2025 17:42:27 -0300 Subject: [PATCH 11/14] Returning true as default in is_stale/2 plumtree_brodcast_handler callback --- src/erleans_registry_partition.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/erleans_registry_partition.erl b/src/erleans_registry_partition.erl index cdd0e47..cfc7f49 100644 --- a/src/erleans_registry_partition.erl +++ b/src/erleans_registry_partition.erl @@ -405,7 +405,7 @@ partisan_plumtree_broadcast_handler behaviour. """). -spec is_stale(gossip_id()) -> boolean(). -is_stale({Peer, Root}) -> +is_stale({_Peer, _Root}) -> %% In our case the I_HAVE message is the root of the peer's tree, so we %% always return `true` signaling Plumtree that we do not need the message, %% and we send ourself a message to potentially init a merge with the peer @@ -424,9 +424,9 @@ is_stale({Peer, Root}) -> %% Option 5: initiate a merge with the same partition on ther Peer node %% Determine which partition this is by looking at the calling process - {registered_name, PartitionName} = erlang:process_info(self(), registered_name), + % {registered_name, PartitionName} = erlang:process_info(self(), registered_name), %% Cast to the same partition on the peer node - partisan_gen_server:cast({PartitionName, Peer}, {crdt_maybe_merge, partisan:node(), Root}), + % partisan_gen_server:cast({PartitionName, Peer}, {crdt_maybe_merge, partisan:node(), Root}), true. From 8c62249ab5663db8c2610cbde584cfd87929ae06 Mon Sep 17 00:00:00 2001 From: Alejandro Miguez Date: Thu, 21 Aug 2025 11:48:55 -0300 Subject: [PATCH 12/14] Added to be able to perform a GC in partition registry CRDT based on configurable interval --- rebar.lock | 2 +- src/erleans_registry_partition.erl | 75 +++++++++++++++++++++++++++--- 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/rebar.lock b/rebar.lock index ea86672..f7b27b1 100644 --- a/rebar.lock +++ b/rebar.lock @@ -13,7 +13,7 @@ 1}, {<<"bondy_mst">>, {git,"https://github.com/bondy-io/bondy_mst.git", - {ref,"2c9419bb7424e8b175bcbb057784b8088ea222d8"}}, + {ref,"e983ae504d1a0e9c11652eb7c163916ee0d150a0"}}, 0}, {<<"cf">>,{pkg,<<"cf">>,<<"0.3.1">>},1}, {<<"erlware_commons">>,{pkg,<<"erlware_commons">>,<<"1.8.1">>},0}, diff --git a/src/erleans_registry_partition.erl b/src/erleans_registry_partition.erl index cfc7f49..5e0008b 100644 --- a/src/erleans_registry_partition.erl +++ b/src/erleans_registry_partition.erl @@ -47,6 +47,7 @@ This is stored on `bondy_mst`. -define(TREE(PartitionId), persistent_term:get(?PERSISTENT_KEY(PartitionId))). -define(MONITOR_TAB(PartitionId), list_to_atom("erleans_registry_partition_monitor_" ++ integer_to_list(PartitionId))). -define(TIMEOUT, 15000). +-define(CRDT_GC_INTERVAL, erleans_config:get(crdt_gc_interval, undefined)). %% This server may receive a huge amount of messages. %% We make sure that they are stored off heap to avoid excessive GCs. @@ -59,7 +60,8 @@ This is stored on `bondy_mst`. partition_id :: pos_integer(), crdt :: bondy_mst_crdt:t(), partisan_channel :: partisan:channel(), - initial_sync = false :: boolean() + initial_sync = false :: boolean(), + crdt_gc_tref :: undefined | reference() }). -type t() :: #state{}. @@ -629,7 +631,15 @@ handle_continue(monitor_existing, #state{partition_id = PartitionId} = State0) - Acc end end, - State = lists:foldl(Fun, State0, ets:tab2list(MonitorTab)), + State1 = lists:foldl(Fun, State0, ets:tab2list(MonitorTab)), + + State = case ?CRDT_GC_INTERVAL of + undefined -> + State1; + Interval -> + TRef = erlang:send_after(Interval, self(), {crdt_gc, erlang:monotonic_time()}), + State1#state{crdt_gc_tref = TRef} + end, %% We should now have all existing local grains re-registered on this %% server and broadcast messages sent to cluster peers. @@ -832,6 +842,10 @@ handle_cast(_Request, State) -> handle_info({'ETS-TRANSFER', _, _, []}, State) -> {noreply, State}; +handle_info({crdt_gc, Epoch}, State0) -> + State = do_gc(State0, Epoch), + {noreply, State}; + handle_info({nodedown, Node}, State) -> CRDT = bondy_mst_crdt:cancel_merge(State#state.crdt, Node), {noreply, State#state{crdt = CRDT}}; @@ -873,6 +887,22 @@ terminate(_Reason, #state{partition_id = PartitionId} = State) -> +%% ----------------------------------------------------------------------------- +%% @private +%% @doc It allows to perform gargbage collection on the CRDT. +%% @param State The current state of the partition. +%% @param Epoch The current epoch. +%% @return The new state of the partition after garbage collection. +%% ----------------------------------------------------------------------------- +-spec do_gc(State :: t(), Epoch :: pos_integer()) -> t(). + +do_gc(State, Epoch) -> + %% Meta = #{name => T#?MODULE.name, freed_count => Num, freed_bytes => Bytes}, + CRDT = bondy_mst_crdt:gc(State#state.crdt, Epoch), + TRef = erlang:send_after(?CRDT_GC_INTERVAL, self(), {crdt_gc, erlang:monotonic_time()}), + State#state{crdt = CRDT, crdt_gc_tref = TRef}. + + do_lookup(#state{partition_id = PartitionId, crdt = CRDT}, #{id := _} = GrainRef) -> do_lookup(#state{partition_id = PartitionId, crdt = CRDT}, grain_key(GrainRef)); @@ -1116,6 +1146,18 @@ do_unregister_name_by_key(#state{partition_id = PartitionId} = State0, GrainKey, State = remove(State0, GrainKey, Value), {ok, State}. +%% ----------------------------------------------------------------------------- +%% @private +%% @doc +%% Converts a grain reference to a grain key. +%% The grain key is a tuple of the grain id and the implementing module. +%% This is used to store the grain in the registry and the CRDT. +%% @param GrainRef The grain reference to convert. +%% @return The grain key as a tuple {Id, ImplementingModule}. +%% ----------------------------------------------------------------------------- +-spec grain_key(GrainRef :: #{id := pos_integer(), implementing_module := atom()}) -> + grain_key(). + grain_key(#{id := Id, implementing_module := Mod}) -> {Id, Mod}. @@ -1175,10 +1217,23 @@ deactivate_grain(GrainKey, ProcRef) -> }), %% This is an inconsistency, we need to cleanup. %% We ask the peer to do it, via a private cast (peer can be us) - partisan_gen_server:cast( - {?MODULE, partisan_remote_ref:node(ProcRef)}, - {force_unregister_name, GrainKey, ProcRef} - ); + case GrainKey of + {Id, Mod} -> + GrainRef = #{id => Id, implementing_module => Mod}, + PartitionPid = erleans_pm:select_partition(GrainRef), + {registered_name, PartitionName} = erlang:process_info(PartitionPid, registered_name), + partisan_gen_server:cast( + {PartitionName, partisan_remote_ref:node(ProcRef)}, + {force_unregister_name, GrainKey, ProcRef} + ); + _ -> + ?LOG_ERROR(#{ + description => "Invalid grain key format", + grain => GrainKey, + pid => ProcRef + }), + ok + end; {error, Reason} -> ?LOG_ERROR(#{ description => "Failed to deactivate duplicate", @@ -1282,8 +1337,14 @@ unregister_local(_, '$end_of_table') -> ok. +%% ------------------------------------------------------------------------------- %% @private -%% Selects the appropriate partition PID for a gossip message based on the grain key +%% @doc Selects the appropriate partition PID for a gossip message +%% based on the grain key +%% @end +%% -------------------------------------------------------------------------------- +-spec select_partition_for_gossip(Gossip :: term()) -> pid(). + select_partition_for_gossip(Gossip) -> #{key := Key} = bondy_mst_crdt:gossip_data(Gossip), case Key of From 112216422e9de68eafd51976d5a57d64a1bf289c Mon Sep 17 00:00:00 2001 From: Alejandro Miguez Date: Sat, 23 Aug 2025 15:48:21 -0300 Subject: [PATCH 13/14] Upgrade bondy_mst version using the feature of callback_args instead of previous callback_mfa (removed) --- rebar.lock | 2 +- src/erleans_registry_partition.erl | 68 +++++++++++------------------- 2 files changed, 25 insertions(+), 45 deletions(-) diff --git a/rebar.lock b/rebar.lock index f7b27b1..5e153b4 100644 --- a/rebar.lock +++ b/rebar.lock @@ -13,7 +13,7 @@ 1}, {<<"bondy_mst">>, {git,"https://github.com/bondy-io/bondy_mst.git", - {ref,"e983ae504d1a0e9c11652eb7c163916ee0d150a0"}}, + {ref,"3ff177084842bf01509e05442f505d218759b7e7"}}, 0}, {<<"cf">>,{pkg,<<"cf">>,<<"0.3.1">>},1}, {<<"erlware_commons">>,{pkg,<<"erlware_commons">>,<<"1.8.1">>},0}, diff --git a/src/erleans_registry_partition.erl b/src/erleans_registry_partition.erl index 5e0008b..113f647 100644 --- a/src/erleans_registry_partition.erl +++ b/src/erleans_registry_partition.erl @@ -86,9 +86,11 @@ This is stored on `bondy_mst`. %% BONDY_MST_CRDT CALLBACKS -export([broadcast/1]). --export([crdt_callback/3]). +-export([broadcast/2]). -export([on_merge/1]). +-export([on_merge/2]). -export([send/2]). +-export([send/3]). -export([sync/1]). %% PARTISAN_PLUMTREE_BROADCAST_HANDLER CALLBACKS @@ -234,69 +236,45 @@ info(PartitionPid) -> -?DOC(""" -CRDT callback router that routes to specific partition by name. -This function is called by bondy_mst_crdt with callback_mfa pattern. -The callback_mfa calls this function as: crdt_callback(PartitionName, Function, Args) -where Args is the list of arguments passed to the callback. -"""). -%% CRDT callback router - handles callback_mfa pattern correctly -crdt_callback(PartitionName, Function, Args) -> - ?LOG_DEBUG(#{ - message => "CRDT callback", - partition => PartitionName, - function => Function, - args => Args - }), - - case Function of - send -> - [Peer, Message] = Args, - send_to_partition(PartitionName, Peer, Message); - broadcast -> - [Gossip] = Args, - broadcast_gossip(Gossip); - on_merge -> - [Peer] = Args, - on_merge_partition(PartitionName, Peer); - _ -> - error({unknown_crdt_callback, Function, Args}) - end. - - ?DOC(""" Implementation of the `bondy_mst_crdt` callback. Casts message `Message` to this server on node `Peer` using `partisan`. +The partition name is passed as the first argument from callback_args. """). -send_to_partition(PartitionName, Peer, Message) -> +send(PartitionName, Peer, Message) -> partisan_gen_server:cast({PartitionName, Peer}, {crdt_message, Message}). ?DOC(""" Implementation of the `bondy_mst_crdt` callback. Broadcasts message `Gossip` to peers using Plumtree (Epidemis broadcast trees). +The partition name is passed as the first argument from callback_args. """). -broadcast_gossip(Gossip) -> +broadcast(_PartitionName, Gossip) -> partisan:broadcast(Gossip, ?MODULE). ?DOC(""" Implementation of the `bondy_mst_crdt` callback. Removes stale entries and duplicates after merge. +The partition name is passed as the first argument from callback_args. """). -on_merge_partition(PartitionName, Peer) -> +on_merge(PartitionName, Peer) -> partisan_gen_server:cast(PartitionName, {crdt_on_merge, Peer}). -%% Legacy callback functions - kept for backward compatibility -send(Peer, Message) -> - partisan_gen_server:cast({?MODULE, Peer}, {crdt_message, Message}). +%% Legacy callback functions - required for behavior completeness but not used with callback_args +send(_Peer, _Message) -> + %% Not called when using callback_args, but required by behavior + ok. -broadcast(Gossip) -> - partisan:broadcast(Gossip, ?MODULE). +broadcast(_Gossip) -> + %% Not called when using callback_args, but required by behavior + ok. -on_merge(Peer) -> - partisan_gen_server:cast(?MODULE, {crdt_on_merge, Peer}). +on_merge(_Peer) -> + %% Not called when using callback_args, but required by behavior + ok. @@ -546,6 +524,7 @@ init([PartitionId, PoolName]) -> %% We wrap the tree using the exchange module Node = partisan:node(), + PartitionName = partition_name(PartitionId), Opts = #{ hash_algorithm => sha256, merger => fun(GrainKey, AWSet1, AWSet2) -> @@ -553,12 +532,13 @@ init([PartitionId, PoolName]) -> end, store => bondy_mst_ets_store, store_opts => #{ - name => atom_to_binary(partition_name(PartitionId)), + name => atom_to_binary(PartitionName), persistent => true }, %% CRDT opts - %% Use callback_mfa to route calls to this specific partition instance - callback_mfa => {?MODULE, crdt_callback, [partition_name(PartitionId)]}, + %% Use callback_mod and callback_args to route calls to this specific partition + callback_mod => ?MODULE, + callback_args => [PartitionName], max_merges => 1, max_merges_per_root => 1, max_versions => 10, From 92a095a620179453381a2729bf8eb16448f5dc9b Mon Sep 17 00:00:00 2001 From: Alejandro Miguez Date: Wed, 3 Sep 2025 18:12:36 -0300 Subject: [PATCH 14/14] Fix partisan dependency to https://github.com/lasp-lang/partisan.git instead of http --- rebar.config | 2 +- rebar.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rebar.config b/rebar.config index 090ef69..f95ea26 100644 --- a/rebar.config +++ b/rebar.config @@ -19,7 +19,7 @@ resulto, {partisan, { git, - "http://github.com/lasp-lang/partisan.git", + "https://github.com/lasp-lang/partisan.git", {tag, "v5.0.3"} }}, {bondy_mst,{ diff --git a/rebar.lock b/rebar.lock index 5e153b4..2a15b4d 100644 --- a/rebar.lock +++ b/rebar.lock @@ -39,7 +39,7 @@ {ref,"41cb700d60a7dcfdb4165aca73634bbe1c07a33f"}}, 0}, {<<"partisan">>, - {git,"http://github.com/lasp-lang/partisan.git", + {git,"https://github.com/lasp-lang/partisan.git", {ref,"c5309d479b2beddfdd32f1c00338a8a771a745be"}}, 0}, {<<"quickrand">>,{pkg,<<"quickrand">>,<<"2.0.7">>},1},