-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Service Layer changes for Recommission API #4320
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8c38c53
40d739b
32d1499
79561e1
f3b8dd8
ace11fe
3988e2d
fe3f01a
7ffb650
bf0af0a
1972243
2584d51
e9d50a6
4c6273b
0b56861
95c41ff
6e2ae8f
eaeefa6
8ca49e7
97c4d32
2d83698
e7946d8
98b4ac8
fcf36cf
4f6543b
02a96ba
b2bfb31
f3582e3
fe8f3b1
d52d48e
93ee423
45b98d3
56eca8f
4d04395
c7c9f35
0cf8def
e203a57
7c14326
f281473
f22307f
22a1d5e
5227538
b0cfef5
c16ac3a
1b3471b
b1cf7a5
22cd77c
b3930c5
5228119
4f58513
224dad4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ | |
| import org.opensearch.OpenSearchTimeoutException; | ||
| import org.opensearch.action.ActionListener; | ||
| import org.opensearch.action.admin.cluster.decommission.awareness.put.DecommissionResponse; | ||
| import org.opensearch.action.support.master.AcknowledgedResponse; | ||
| import org.opensearch.cluster.ClusterState; | ||
| import org.opensearch.cluster.ClusterStateObserver; | ||
| import org.opensearch.cluster.ClusterStateUpdateTask; | ||
|
|
@@ -481,4 +482,53 @@ public void onFailure(Exception e) { | |
| } | ||
| }; | ||
| } | ||
|
|
||
| public void startRecommissionAction(final ActionListener<AcknowledgedResponse> listener) { | ||
| /* | ||
| * For abandoned requests, we might not really know if it actually restored the exclusion list. | ||
| * And can land up in cases where even after recommission, exclusions are set(which is unexpected). | ||
| * And by definition of OpenSearch - Clusters should have no voting configuration exclusions in normal operation. | ||
| * Once the excluded nodes have stopped, clear the voting configuration exclusions with DELETE /_cluster/voting_config_exclusions. | ||
| * And hence it is safe to remove the exclusion if any. User should make conscious choice before decommissioning awareness attribute. | ||
| */ | ||
| decommissionController.clearVotingConfigExclusion(new ActionListener<Void>() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we provide an option to not force clear voting as a flag to the API?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Cluster might get into an unexpected state if we don't clear the exclusion and recommissioned the zone. When the cluster decommissioned one zone, it was expected that no further exclusion will be set as we already decommissioned one master. Any exclusion after decommissioning, would just be making things worse for a cluster already under stress. Not sure, if the user would need any exclusion set when the recommissioning the zone back. Also as per discussion in #4084 PR, the DELETE API should try to restore the system back for multiple failure cases Let me know if you see a case where recommissioning won't expect the exclusion to be wiped off |
||
| @Override | ||
| public void onResponse(Void unused) { | ||
| logger.info("successfully cleared voting config exclusion for deleting the decommission."); | ||
| deleteDecommissionState(listener); | ||
| } | ||
|
|
||
| @Override | ||
| public void onFailure(Exception e) { | ||
| logger.error("Failure in clearing voting config during delete_decommission request.", e); | ||
| listener.onFailure(e); | ||
| } | ||
| }, false); | ||
| } | ||
|
|
||
| void deleteDecommissionState(ActionListener<AcknowledgedResponse> listener) { | ||
| clusterService.submitStateUpdateTask("delete_decommission_state", new ClusterStateUpdateTask(Priority.URGENT) { | ||
| @Override | ||
| public ClusterState execute(ClusterState currentState) { | ||
| logger.info("Deleting the decommission attribute from the cluster state"); | ||
| Metadata metadata = currentState.metadata(); | ||
| Metadata.Builder mdBuilder = Metadata.builder(metadata); | ||
| mdBuilder.removeCustom(DecommissionAttributeMetadata.TYPE); | ||
| return ClusterState.builder(currentState).metadata(mdBuilder).build(); | ||
| } | ||
|
|
||
| @Override | ||
| public void onFailure(String source, Exception e) { | ||
| logger.error(() -> new ParameterizedMessage("Failed to clear decommission attribute. [{}]", source), e); | ||
| listener.onFailure(e); | ||
| } | ||
|
|
||
| @Override | ||
| public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { | ||
| // Cluster state processed for deleting the decommission attribute. | ||
| assert newState.metadata().decommissionAttributeMetadata() == null; | ||
| listener.onResponse(new AcknowledgedResponse(true)); | ||
| } | ||
| }); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.