From 6992fd876c4fe565e61df11b4648bca1fcd2bd81 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Fri, 20 Mar 2026 20:42:37 +0530 Subject: [PATCH 1/4] HDDS-14870. Allow balancing of CLOSED containers with non-closed replicas --- .../ContainerBalancerConfiguration.java | 35 +++++++++++- .../src/main/proto/hdds.proto | 1 + .../ContainerBalancerSelectionCriteria.java | 26 ++++++--- ...estContainerBalancerSelectionCriteria.java | 53 +++++++++++++++++-- 4 files changed, 104 insertions(+), 11 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerConfiguration.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerConfiguration.java index 8ff3b6e5e20..13c75a63d09 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerConfiguration.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerConfiguration.java @@ -146,6 +146,13 @@ public final class ContainerBalancerConfiguration { "data node is very high") private boolean triggerDuEnable = false; + @Config(key = "hdds.container.balancer.include.closed.container.with.non.closed.replicas", type = ConfigType.BOOLEAN, + defaultValue = "false", tags = {ConfigTag.BALANCER}, + description = "Whether to include CLOSED containers for balancing " + + "that have the minimum required closed replicas, even if additional " + + "replicas are in a non-closed state") + private boolean includeClosedContainerWithNonClosedReplicas = false; + /** * Gets the threshold value for Container Balancer. * @@ -432,6 +439,24 @@ public void setExcludeNodes(String excludeNodes) { this.excludeNodes = excludeNodes; } + /** + * Get the includeClosedContainerWithNonClosedReplicas value for Container Balancer. + * + * @return the boolean value of includeClosedContainerWithNonClosedReplicas + */ + public Boolean getIncludeClosedContainerWithNonClosedReplicas() { + return includeClosedContainerWithNonClosedReplicas; + } + + /** + * Set the includeClosedContainerWithNonClosedReplicas value for Container Balancer. + * + * @param enable the boolean value to be set to includeClosedContainerWithNonClosedReplicas + */ + public void setIncludeClosedContainerWithNonClosedReplicas(boolean enable) { + includeClosedContainerWithNonClosedReplicas = enable; + } + @Override public String toString() { return String.format("Container Balancer Configuration values:%n" + @@ -478,7 +503,9 @@ public String toString() { "Datanodes Specified to be Balanced", includeNodes.equals("") ? "None" : includeNodes, "Datanodes Excluded from Balancing", - excludeNodes.equals("") ? "None" : excludeNodes); + excludeNodes.equals("") ? "None" : excludeNodes, + "Whether to include CLOSED containers with non-closed replicas for balancing", + includeClosedContainerWithNonClosedReplicas); } public ContainerBalancerConfigurationProto.Builder toProtobufBuilder() { @@ -500,7 +527,8 @@ public ContainerBalancerConfigurationProto.Builder toProtobufBuilder() { .setExcludeDatanodes(excludeNodes) .setMoveNetworkTopologyEnable(networkTopologyEnable) .setTriggerDuBeforeMoveEnable(triggerDuEnable) - .setMoveReplicationTimeout(moveReplicationTimeout); + .setMoveReplicationTimeout(moveReplicationTimeout) + .setIncludeClosedContainerWithNonClosedReplicas(includeClosedContainerWithNonClosedReplicas); return builder; } @@ -555,6 +583,9 @@ static ContainerBalancerConfiguration fromProtobuf( if (proto.hasMoveReplicationTimeout()) { config.setMoveReplicationTimeout(proto.getMoveReplicationTimeout()); } + if (proto.hasIncludeClosedContainerWithNonClosedReplicas()) { + config.setIncludeClosedContainerWithNonClosedReplicas(proto.getIncludeClosedContainerWithNonClosedReplicas()); + } return config; } } diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto b/hadoop-hdds/interface-client/src/main/proto/hdds.proto index 2f20fde3e09..ed4ca1b81a1 100644 --- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto +++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto @@ -504,6 +504,7 @@ message ContainerBalancerConfigurationProto { optional int32 nextIterationIndex = 19; optional int64 moveReplicationTimeout = 20; optional string includeContainers = 21; + optional bool includeClosedContainerWithNonClosedReplicas = 22; } message TransferLeadershipRequestProto { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerSelectionCriteria.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerSelectionCriteria.java index 52f34347c9a..f3c76a16c67 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerSelectionCriteria.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerSelectionCriteria.java @@ -199,6 +199,8 @@ public boolean shouldBeExcluded(ContainerID containerID, * Checks whether specified container is closed. Also checks if the replica * on the specified datanode is CLOSED. Assumes that there will only be one * replica of a container on a particular Datanode. + * If hdds.container.balancer.include.closed.container.with.non.closed.replicas config is set to true, + * check for minimum required closed replicas, even if additional replicas are in a non-closed state. * @param container container to check * @param datanodeDetails datanode on which a replica of the container is * present @@ -213,12 +215,24 @@ private boolean isContainerClosed(ContainerInfo container, return false; } - for (ContainerReplica replica : replicas) { - if (replica.getDatanodeDetails().equals(datanodeDetails)) { - // don't consider replica if it's not closed - // assumption: there's only one replica of this container on this DN - return replica.getState().equals(ContainerReplicaProto.State.CLOSED); - } + ContainerReplica targetReplica = replicas.stream() + .filter(r -> r.getDatanodeDetails().equals(datanodeDetails)) + .findFirst() + .orElse(null); + + if (targetReplica == null) { + return false; + } + if (targetReplica.getState().equals(ContainerReplicaProto.State.CLOSED)) { + return true; + } + // Replica is NOT closed - only allow if config is enabled AND we have enough CLOSED replicas + if (balancerConfiguration.getIncludeClosedContainerWithNonClosedReplicas()) { + long numReplicasClosed = replicas.stream() + .filter(r -> r.getState() == ContainerReplicaProto.State.CLOSED) + .count(); + int minimumRequiredNodes = container.getReplicationConfig().getRequiredNodes(); + return numReplicasClosed >= minimumRequiredNodes; } return false; diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java index 60e74eb183c..b4238058bb0 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; @@ -57,8 +58,11 @@ public class TestContainerBalancerSelectionCriteria { private ContainerBalancerSelectionCriteria criteria; + private ContainerBalancerConfiguration balancerConfiguration; private ContainerManager containerManager; private ReplicationManager replicationManager; + private NodeManager nodeManager; + private FindSourceStrategy findSourceStrategy; private DatanodeDetails source; private ContainerInfo containerInfo; private ContainerID containerID; @@ -66,13 +70,13 @@ public class TestContainerBalancerSelectionCriteria { @BeforeEach public void setup() throws Exception { OzoneConfiguration conf = new OzoneConfiguration(); - ContainerBalancerConfiguration balancerConfiguration = conf.getObject(ContainerBalancerConfiguration.class); + balancerConfiguration = conf.getObject(ContainerBalancerConfiguration.class); balancerConfiguration.setMaxSizeToMovePerIteration(100 * OzoneConsts.GB); - NodeManager nodeManager = mock(NodeManager.class); + nodeManager = mock(NodeManager.class); containerManager = mock(ContainerManager.class); replicationManager = mock(ReplicationManager.class); - FindSourceStrategy findSourceStrategy = mock(FindSourceStrategy.class); + findSourceStrategy = mock(FindSourceStrategy.class); source = MockDatanodeDetails.randomDatanodeDetails(); containerInfo = ReplicationTestUtil.createContainerInfo(RatisReplicationConfig.getInstance(THREE), 1L, @@ -135,4 +139,47 @@ public void shouldExcludeReplicatingContainer() verify(replicationManager, times(1)).getContainerReplicationHealth( eq(containerInfo), anySet()); } + + @Test + public void shouldIncludeClosedContainerWithNonClosedReplicas() throws Exception { + DatanodeDetails dn2 = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails dn3 = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails dn4 = MockDatanodeDetails.randomDatanodeDetails(); + + // Create replicas: 3 CLOSED replicas (minimum required) + 1 QUASI_CLOSED replica + Set replicas = new HashSet<>(); + replicas.add(ReplicationTestUtil.createContainerReplica(containerID, 0, IN_SERVICE, + CLOSED, 1L, OzoneConsts.GB, source, source.getID())); + replicas.add(ReplicationTestUtil.createContainerReplica(containerID, 0, IN_SERVICE, + CLOSED, 1L, OzoneConsts.GB, dn2, dn2.getID())); + replicas.add(ReplicationTestUtil.createContainerReplica(containerID, 0, IN_SERVICE, + CLOSED, 1L, OzoneConsts.GB, dn3, dn3.getID())); + replicas.add(ReplicationTestUtil.createContainerReplica(containerID, 0, IN_SERVICE, + QUASI_CLOSED, 1L, OzoneConsts.GB, dn4, dn4.getID())); + + // Update the mock to return the new replica set + when(containerManager.getContainerReplicas(containerID)).thenReturn(replicas); + + balancerConfiguration.setIncludeClosedContainerWithNonClosedReplicas(true); + ContainerBalancerSelectionCriteria configEnabled = new ContainerBalancerSelectionCriteria( + balancerConfiguration, nodeManager, replicationManager, + containerManager, findSourceStrategy, new HashMap<>()); + + // With config enabled, even the QUASI_CLOSED replica can be moved + // because the cluster has enough CLOSED replicas + assertFalse(configEnabled.shouldBeExcluded(containerID, source, 0L)); + assertFalse(configEnabled.shouldBeExcluded(containerID, dn4, 0L)); + + // Verify behavior when config is disabled (default) + balancerConfiguration.setIncludeClosedContainerWithNonClosedReplicas(false); + ContainerBalancerSelectionCriteria criteriaDisabled = new ContainerBalancerSelectionCriteria( + balancerConfiguration, nodeManager, replicationManager, + containerManager, findSourceStrategy, new HashMap<>()); + + // With config disabled, the CLOSED replica can still be moved + assertFalse(criteriaDisabled.shouldBeExcluded(containerID, source, 0L)); + + // But the QUASI_CLOSED replica should be excluded when config is disabled + assertTrue(criteriaDisabled.shouldBeExcluded(containerID, dn4, 0L)); + } } From 77557d26c1bcc8927d946904c2834ea9018b9049 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Sat, 21 Mar 2026 12:58:17 +0530 Subject: [PATCH 2/4] Allow balancer to pick over replicated and quasi closed containers --- .../ContainerBalancerConfiguration.java | 36 ++++---- .../src/main/proto/hdds.proto | 2 +- .../ContainerBalancerSelectionCriteria.java | 83 +++++++++++++------ ...estContainerBalancerSelectionCriteria.java | 29 ++++--- 4 files changed, 93 insertions(+), 57 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerConfiguration.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerConfiguration.java index 13c75a63d09..6c1d1cd0c8b 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerConfiguration.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerConfiguration.java @@ -146,12 +146,12 @@ public final class ContainerBalancerConfiguration { "data node is very high") private boolean triggerDuEnable = false; - @Config(key = "hdds.container.balancer.include.closed.container.with.non.closed.replicas", type = ConfigType.BOOLEAN, + @Config(key = "hdds.container.balancer.include.non.standard.containers", type = ConfigType.BOOLEAN, defaultValue = "false", tags = {ConfigTag.BALANCER}, - description = "Whether to include CLOSED containers for balancing " + - "that have the minimum required closed replicas, even if additional " + - "replicas are in a non-closed state") - private boolean includeClosedContainerWithNonClosedReplicas = false; + description = "Whether to include containers in non-standard states, such as " + + "over-replicated CLOSED containers with additional QUASI_CLOSED replicas " + + "or consistent QUASI_CLOSED containers.") + private boolean includeNonStandardContainers = false; /** * Gets the threshold value for Container Balancer. @@ -440,21 +440,21 @@ public void setExcludeNodes(String excludeNodes) { } /** - * Get the includeClosedContainerWithNonClosedReplicas value for Container Balancer. + * Get the includeNonStandardContainers value for Container Balancer. * - * @return the boolean value of includeClosedContainerWithNonClosedReplicas + * @return the boolean value of includeNonStandardContainers */ - public Boolean getIncludeClosedContainerWithNonClosedReplicas() { - return includeClosedContainerWithNonClosedReplicas; + public Boolean getIncludeNonStandardContainers() { + return includeNonStandardContainers; } /** - * Set the includeClosedContainerWithNonClosedReplicas value for Container Balancer. + * Set the includeNonStandardContainers value for Container Balancer. * - * @param enable the boolean value to be set to includeClosedContainerWithNonClosedReplicas + * @param enable the boolean value to be set to includeNonStandardContainers */ - public void setIncludeClosedContainerWithNonClosedReplicas(boolean enable) { - includeClosedContainerWithNonClosedReplicas = enable; + public void setIncludeNonStandardContainers(boolean enable) { + includeNonStandardContainers = enable; } @Override @@ -504,8 +504,8 @@ public String toString() { includeNodes.equals("") ? "None" : includeNodes, "Datanodes Excluded from Balancing", excludeNodes.equals("") ? "None" : excludeNodes, - "Whether to include CLOSED containers with non-closed replicas for balancing", - includeClosedContainerWithNonClosedReplicas); + "Whether to include non-standard containers (over-replicated, quasi-closed) for balancing", + includeNonStandardContainers); } public ContainerBalancerConfigurationProto.Builder toProtobufBuilder() { @@ -528,7 +528,7 @@ public ContainerBalancerConfigurationProto.Builder toProtobufBuilder() { .setMoveNetworkTopologyEnable(networkTopologyEnable) .setTriggerDuBeforeMoveEnable(triggerDuEnable) .setMoveReplicationTimeout(moveReplicationTimeout) - .setIncludeClosedContainerWithNonClosedReplicas(includeClosedContainerWithNonClosedReplicas); + .setIncludeNonStandardContainers(includeNonStandardContainers); return builder; } @@ -583,8 +583,8 @@ static ContainerBalancerConfiguration fromProtobuf( if (proto.hasMoveReplicationTimeout()) { config.setMoveReplicationTimeout(proto.getMoveReplicationTimeout()); } - if (proto.hasIncludeClosedContainerWithNonClosedReplicas()) { - config.setIncludeClosedContainerWithNonClosedReplicas(proto.getIncludeClosedContainerWithNonClosedReplicas()); + if (proto.hasIncludeNonStandardContainers()) { + config.setIncludeNonStandardContainers(proto.getIncludeNonStandardContainers()); } return config; } diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto b/hadoop-hdds/interface-client/src/main/proto/hdds.proto index ed4ca1b81a1..3d13b156848 100644 --- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto +++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto @@ -504,7 +504,7 @@ message ContainerBalancerConfigurationProto { optional int32 nextIterationIndex = 19; optional int64 moveReplicationTimeout = 20; optional string includeContainers = 21; - optional bool includeClosedContainerWithNonClosedReplicas = 22; + optional bool includeNonStandardContainers = 22; } message TransferLeadershipRequestProto { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerSelectionCriteria.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerSelectionCriteria.java index f3c76a16c67..09919c1c495 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerSelectionCriteria.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerSelectionCriteria.java @@ -199,48 +199,71 @@ public boolean shouldBeExcluded(ContainerID containerID, * Checks whether specified container is closed. Also checks if the replica * on the specified datanode is CLOSED. Assumes that there will only be one * replica of a container on a particular Datanode. - * If hdds.container.balancer.include.closed.container.with.non.closed.replicas config is set to true, - * check for minimum required closed replicas, even if additional replicas are in a non-closed state. + *

+ * With hdds.container.balancer.include.non.standard.containers set to true: + * - CLOSED containers: Allows moving QUASI_CLOSED replicas if minimum CLOSED replicas exist + * - QUASI_CLOSED containers: Allows moving if all replicas are QUASI_CLOSED + * * @param container container to check - * @param datanodeDetails datanode on which a replica of the container is - * present - * @return true if container LifeCycleState is - * {@link HddsProtos.LifeCycleState#CLOSED} and its replica on the - * specified datanode is CLOSED, else false + * @param datanodeDetails datanode on which a replica of the container is present + * @param replicas all replicas of the container + * @return true if container and replica are eligible for balancing, else false */ private boolean isContainerClosed(ContainerInfo container, DatanodeDetails datanodeDetails, Set replicas) { - if (!container.getState().equals(HddsProtos.LifeCycleState.CLOSED)) { - return false; - } - + HddsProtos.LifeCycleState containerState = container.getState(); + // Find the specific replica on this datanode ContainerReplica targetReplica = replicas.stream() .filter(r -> r.getDatanodeDetails().equals(datanodeDetails)) .findFirst() .orElse(null); - if (targetReplica == null) { return false; } - if (targetReplica.getState().equals(ContainerReplicaProto.State.CLOSED)) { - return true; + ContainerReplicaProto.State replicaState = targetReplica.getState(); + + // Case 1: Container is CLOSED + if (containerState == HddsProtos.LifeCycleState.CLOSED) { + if (replicaState == ContainerReplicaProto.State.CLOSED) { + return true; + } + + // With config enabled: Also allow QUASI_CLOSED replicas if we have minimum required CLOSED replicas + if (balancerConfiguration.getIncludeNonStandardContainers()) { + long numClosedReplicas = replicas.stream() + .filter(r -> r.getState() == ContainerReplicaProto.State.CLOSED) + .count(); + int minRequiredReplicas = container.getReplicationConfig().getRequiredNodes(); + + if (numClosedReplicas >= minRequiredReplicas) { + return replicaState == ContainerReplicaProto.State.QUASI_CLOSED; + } + } + return false; } - // Replica is NOT closed - only allow if config is enabled AND we have enough CLOSED replicas - if (balancerConfiguration.getIncludeClosedContainerWithNonClosedReplicas()) { - long numReplicasClosed = replicas.stream() - .filter(r -> r.getState() == ContainerReplicaProto.State.CLOSED) - .count(); - int minimumRequiredNodes = container.getReplicationConfig().getRequiredNodes(); - return numReplicasClosed >= minimumRequiredNodes; + + // Case 2: Container is QUASI_CLOSED + if (containerState == HddsProtos.LifeCycleState.QUASI_CLOSED) { + if (!balancerConfiguration.getIncludeNonStandardContainers()) { + return false; + } + + // Only allow if ALL replicas are QUASI_CLOSED + // This ensures we don't move containers with mixed replica states + return replicas.stream() + .allMatch(r -> r.getState() == ContainerReplicaProto.State.QUASI_CLOSED); } - return false; } /** * This asks replication manager whether a container is under/over/mis replicated. The intention is the same as * isContainerReplicatingOrDeleting but the check is done in a different way to be doubly sure. + *

+ * With hdds.container.balancer.include.non.standard.containers set to true: + * - OVER_REPLICATED containers are allowed + * * @param container container to check * @param replicas the container's replicas * @return false if it should not be moved, true otherwise @@ -248,12 +271,20 @@ private boolean isContainerClosed(ContainerInfo container, private boolean isContainerHealthyForMove(ContainerInfo container, Set replicas) { ContainerHealthResult.HealthState state = replicationManager.getContainerReplicationHealth(container, replicas).getHealthState(); - if (state != ContainerHealthResult.HealthState.HEALTHY) { - LOG.debug("Excluding container {} with replicas {} as its health is {}.", container, replicas, state); - return false; + if (state == ContainerHealthResult.HealthState.HEALTHY) { + return true; } - return true; + // OVER_REPLICATED containers allowed when config is enabled + if (state == ContainerHealthResult.HealthState.OVER_REPLICATED && + balancerConfiguration.getIncludeNonStandardContainers()) { + LOG.debug("Container {} is over-replicated but allowed for balancing if " + + "includeNonStandardContainers config is true", container); + return true; + } + + LOG.debug("Excluding container {} with replicas {} as its health is {}.", container, replicas, state); + return false; } private boolean breaksMaxSizeToMoveLimit(ContainerID containerID, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java index b4238058bb0..ff77a2059f0 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java @@ -141,12 +141,12 @@ public void shouldExcludeReplicatingContainer() } @Test - public void shouldIncludeClosedContainerWithNonClosedReplicas() throws Exception { + public void shouldIncludeNonStandardContainers() throws Exception { DatanodeDetails dn2 = MockDatanodeDetails.randomDatanodeDetails(); DatanodeDetails dn3 = MockDatanodeDetails.randomDatanodeDetails(); DatanodeDetails dn4 = MockDatanodeDetails.randomDatanodeDetails(); - // Create replicas: 3 CLOSED replicas (minimum required) + 1 QUASI_CLOSED replica + // Over-replicated CLOSED container with 3 CLOSED + 1 QUASI_CLOSED replica Set replicas = new HashSet<>(); replicas.add(ReplicationTestUtil.createContainerReplica(containerID, 0, IN_SERVICE, CLOSED, 1L, OzoneConsts.GB, source, source.getID())); @@ -157,29 +157,34 @@ public void shouldIncludeClosedContainerWithNonClosedReplicas() throws Exception replicas.add(ReplicationTestUtil.createContainerReplica(containerID, 0, IN_SERVICE, QUASI_CLOSED, 1L, OzoneConsts.GB, dn4, dn4.getID())); - // Update the mock to return the new replica set + // Update mocks: container is over-replicated when(containerManager.getContainerReplicas(containerID)).thenReturn(replicas); + when(replicationManager.getContainerReplicationHealth(eq(containerInfo), anySet())) + .thenReturn(new ContainerHealthResult.OverReplicatedHealthResult(containerInfo, 1, false)); - balancerConfiguration.setIncludeClosedContainerWithNonClosedReplicas(true); + // Test 1: Config ENABLED - should allow balancing of over-replicated + quasi-closed + balancerConfiguration.setIncludeNonStandardContainers(true); ContainerBalancerSelectionCriteria configEnabled = new ContainerBalancerSelectionCriteria( balancerConfiguration, nodeManager, replicationManager, containerManager, findSourceStrategy, new HashMap<>()); - // With config enabled, even the QUASI_CLOSED replica can be moved - // because the cluster has enough CLOSED replicas + // All replicas (including QUASI_CLOSED) can be moved because: + // - Config is enabled + // - Container has minimum 3 CLOSED replicas + // - Container is over-replicated (allowed by config) assertFalse(configEnabled.shouldBeExcluded(containerID, source, 0L)); + assertFalse(configEnabled.shouldBeExcluded(containerID, dn2, 0L)); + assertFalse(configEnabled.shouldBeExcluded(containerID, dn3, 0L)); assertFalse(configEnabled.shouldBeExcluded(containerID, dn4, 0L)); - // Verify behavior when config is disabled (default) - balancerConfiguration.setIncludeClosedContainerWithNonClosedReplicas(false); + // Test 2: Config DISABLED (default) - should exclude over-replicated containers + balancerConfiguration.setIncludeNonStandardContainers(false); ContainerBalancerSelectionCriteria criteriaDisabled = new ContainerBalancerSelectionCriteria( balancerConfiguration, nodeManager, replicationManager, containerManager, findSourceStrategy, new HashMap<>()); - // With config disabled, the CLOSED replica can still be moved - assertFalse(criteriaDisabled.shouldBeExcluded(containerID, source, 0L)); - - // But the QUASI_CLOSED replica should be excluded when config is disabled + // Over-replicated containers are excluded when config is disabled + assertTrue(criteriaDisabled.shouldBeExcluded(containerID, source, 0L)); assertTrue(criteriaDisabled.shouldBeExcluded(containerID, dn4, 0L)); } } From 068a032bfc13e884142dd8632fd96ea3d6cfef9d Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Mon, 23 Mar 2026 17:42:13 +0530 Subject: [PATCH 3/4] Handle the case in MoveManager as well --- .../balancer/ContainerBalancerTask.java | 2 + .../scm/container/balancer/MoveManager.java | 63 ++++++++--- ...estContainerBalancerSelectionCriteria.java | 52 ++++++++- .../container/balancer/TestMoveManager.java | 106 ++++++++++++++++++ 4 files changed, 209 insertions(+), 14 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTask.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTask.java index 3726269edc8..9b4f11d8c31 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTask.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTask.java @@ -144,6 +144,8 @@ public ContainerBalancerTask(StorageContainerManager scm, this.moveManager.setMoveTimeout(config.getMoveTimeout().toMillis()); this.moveManager.setReplicationTimeout( config.getMoveReplicationTimeout().toMillis()); + this.moveManager.setIncludeNonStandardContainers( + config.getIncludeNonStandardContainers()); this.delayStart = delayStart; this.ozoneConfiguration = scm.getConfiguration(); this.containerBalancer = containerBalancer; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/MoveManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/MoveManager.java index e04688ab2cc..a9598d837ca 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/MoveManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/MoveManager.java @@ -27,6 +27,7 @@ import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; @@ -63,6 +64,7 @@ public final class MoveManager implements */ private long moveTimeout = 1000 * 65 * 60; private long replicationTimeout = 1000 * 50 * 60; + private boolean includeNonStandardContainers = false; private final ReplicationManager replicationManager; private final ContainerManager containerManager; @@ -195,14 +197,22 @@ CompletableFuture move( If the container is under, over, or mis replicated, we should let replication manager solve these issues first. Fail move for such a container. + + If includeNonStandardContainers is enabled, allow OVER_REPLICATED + containers to be moved by the balancer. */ ContainerHealthResult healthBeforeMove = replicationManager.getContainerReplicationHealth(containerInfo, currentReplicas); - if (healthBeforeMove.getHealthState() != - ContainerHealthResult.HealthState.HEALTHY) { - ret.complete(MoveResult.REPLICATION_NOT_HEALTHY_BEFORE_MOVE); - return ret; + ContainerHealthResult.HealthState healthState = healthBeforeMove.getHealthState(); + + if (healthState != ContainerHealthResult.HealthState.HEALTHY) { + // Allow OVER_REPLICATED if config is enabled + if (!(healthState == ContainerHealthResult.HealthState.OVER_REPLICATED && + includeNonStandardContainers)) { + ret.complete(MoveResult.REPLICATION_NOT_HEALTHY_BEFORE_MOVE); + return ret; + } } /* @@ -226,10 +236,15 @@ CompletableFuture move( } // Ensure the container is CLOSED + // If includeNonStandardContainers is enabled and ALL replicas + // are QUASI_CLOSED, allow moving QUASI_CLOSED containers HddsProtos.LifeCycleState currentContainerStat = containerInfo.getState(); if (currentContainerStat != HddsProtos.LifeCycleState.CLOSED) { - ret.complete(MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED); - return ret; + // Allow QUASI_CLOSED if config is enabled and all replicas are QUASI_CLOSED + if (!isQuasiClosed(currentContainerStat, currentReplicas)) { + ret.complete(MoveResult.REPLICATION_FAIL_CONTAINER_NOT_CLOSED); + return ret; + } } // Create a set or replicas that indicates how the container will look @@ -239,10 +254,17 @@ CompletableFuture move( src, tgt, currentReplicas); ContainerHealthResult healthResult = replicationManager .getContainerReplicationHealth(containerInfo, replicasAfterMove); - if (healthResult.getHealthState() - != ContainerHealthResult.HealthState.HEALTHY) { - ret.complete(MoveResult.REPLICATION_NOT_HEALTHY_AFTER_MOVE); - return ret; + ContainerHealthResult.HealthState healthAfterMove = healthResult.getHealthState(); + + if (healthAfterMove != ContainerHealthResult.HealthState.HEALTHY) { + // Allow OVER_REPLICATED after move if config is enabled + // This allows balancing over-replicated containers + // ReplicationManager will handle the excess replica deletion separately + if (!(healthAfterMove == ContainerHealthResult.HealthState.OVER_REPLICATED && + includeNonStandardContainers)) { + ret.complete(MoveResult.REPLICATION_NOT_HEALTHY_AFTER_MOVE); + return ret; + } } startMove(containerInfo, src, tgt, ret); LOG.debug("Processed a move request for container {}, from {} to {}", @@ -251,6 +273,13 @@ CompletableFuture move( } } + private boolean isQuasiClosed(HddsProtos.LifeCycleState lifeCycleState, Set replicas) { + return (lifeCycleState == HddsProtos.LifeCycleState.QUASI_CLOSED && + includeNonStandardContainers && + replicas.stream().allMatch(r -> + r.getState() == StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED)); + } + /** * Notify Move Manager that a container op has been completed. * @@ -345,13 +374,17 @@ private void handleSuccessfulAdd(final ContainerID cid) ContainerHealthResult healthResult = replicationManager .getContainerReplicationHealth(containerInfo, futureReplicas); + ContainerHealthResult.HealthState futureHealthState = healthResult.getHealthState(); - if (healthResult.getHealthState() == - ContainerHealthResult.HealthState.HEALTHY) { + // Allow deletion if future state is HEALTHY, or if it's OVER_REPLICATED + // and includeNonStandardContainers is enabled (balancer can move over-replicated containers) + if (futureHealthState == ContainerHealthResult.HealthState.HEALTHY || + (futureHealthState == ContainerHealthResult.HealthState.OVER_REPLICATED && + includeNonStandardContainers)) { sendDeleteCommand(containerInfo, src, moveOp.getMoveStartTime()); } else { LOG.info("Cannot remove source replica as the container health would " + - "be {}", healthResult.getHealthState()); + "be {}", futureHealthState); completeMove(cid, MoveResult.DELETE_FAIL_POLICY); } } @@ -447,6 +480,10 @@ void setReplicationTimeout(long replicationTimeout) { this.replicationTimeout = replicationTimeout; } + void setIncludeNonStandardContainers(boolean includeNonStandardContainers) { + this.includeNonStandardContainers = includeNonStandardContainers; + } + /** * All details about a move operation. */ diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java index ff77a2059f0..947d2fff0a8 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java @@ -141,7 +141,7 @@ public void shouldExcludeReplicatingContainer() } @Test - public void shouldIncludeNonStandardContainers() throws Exception { + public void shouldIncludeOverReplicatedClosedContainers() throws Exception { DatanodeDetails dn2 = MockDatanodeDetails.randomDatanodeDetails(); DatanodeDetails dn3 = MockDatanodeDetails.randomDatanodeDetails(); DatanodeDetails dn4 = MockDatanodeDetails.randomDatanodeDetails(); @@ -187,4 +187,54 @@ public void shouldIncludeNonStandardContainers() throws Exception { assertTrue(criteriaDisabled.shouldBeExcluded(containerID, source, 0L)); assertTrue(criteriaDisabled.shouldBeExcluded(containerID, dn4, 0L)); } + + @Test + public void shouldIncludeQuasiClosedContainers() throws Exception { + DatanodeDetails dn2 = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails dn3 = MockDatanodeDetails.randomDatanodeDetails(); + + // QUASI_CLOSED container with all QUASI_CLOSED replicas + ContainerInfo quasiClosedContainer = ReplicationTestUtil.createContainerInfo( + RatisReplicationConfig.getInstance(THREE), 1L, + HddsProtos.LifeCycleState.QUASI_CLOSED, 1L, OzoneConsts.GB); + ContainerID quasiClosedContainerID = quasiClosedContainer.containerID(); + + Set quasiClosedReplicas = new HashSet<>(); + quasiClosedReplicas.add(ReplicationTestUtil.createContainerReplica(quasiClosedContainerID, 0, + IN_SERVICE, QUASI_CLOSED, 1L, OzoneConsts.GB, source, source.getID())); + quasiClosedReplicas.add(ReplicationTestUtil.createContainerReplica(quasiClosedContainerID, 0, + IN_SERVICE, QUASI_CLOSED, 1L, OzoneConsts.GB, dn2, dn2.getID())); + quasiClosedReplicas.add(ReplicationTestUtil.createContainerReplica(quasiClosedContainerID, 0, + IN_SERVICE, QUASI_CLOSED, 1L, OzoneConsts.GB, dn3, dn3.getID())); + + when(containerManager.getContainer(quasiClosedContainerID)).thenReturn(quasiClosedContainer); + when(containerManager.getContainerReplicas(quasiClosedContainerID)).thenReturn(quasiClosedReplicas); + when(replicationManager.isContainerReplicatingOrDeleting(quasiClosedContainerID)).thenReturn(false); + when(replicationManager.getContainerReplicationHealth(eq(quasiClosedContainer), anySet())) + .thenReturn(new ContainerHealthResult.OverReplicatedHealthResult(quasiClosedContainer, 1, false)); + + // Test 1: Config ENABLED - should allow balancing of QUASI_CLOSED container + balancerConfiguration.setIncludeNonStandardContainers(true); + ContainerBalancerSelectionCriteria configEnabled = new ContainerBalancerSelectionCriteria( + balancerConfiguration, nodeManager, replicationManager, + containerManager, findSourceStrategy, new HashMap<>()); + + // All QUASI_CLOSED replicas can be moved because: + // - Config is enabled + // - Container is QUASI_CLOSED + // - All replicas are QUASI_CLOSED (consistent state) + assertFalse(configEnabled.shouldBeExcluded(quasiClosedContainerID, source, 0L)); + assertFalse(configEnabled.shouldBeExcluded(quasiClosedContainerID, dn2, 0L)); + assertFalse(configEnabled.shouldBeExcluded(quasiClosedContainerID, dn3, 0L)); + + // Test 2: Config DISABLED (default) - should exclude QUASI_CLOSED containers + balancerConfiguration.setIncludeNonStandardContainers(false); + ContainerBalancerSelectionCriteria criteriaDisabled = new ContainerBalancerSelectionCriteria( + balancerConfiguration, nodeManager, replicationManager, + containerManager, findSourceStrategy, new HashMap<>()); + + // QUASI_CLOSED containers are excluded when config is disabled + assertTrue(criteriaDisabled.shouldBeExcluded(quasiClosedContainerID, source, 0L)); + assertTrue(criteriaDisabled.shouldBeExcluded(quasiClosedContainerID, dn2, 0L)); + } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestMoveManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestMoveManager.java index 355aab13c41..995e25e9c27 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestMoveManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestMoveManager.java @@ -39,6 +39,7 @@ import static org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp.PendingOpType.DELETE; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anySet; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyInt; import static org.mockito.Mockito.anyLong; @@ -576,4 +577,109 @@ private void assertMoveFailsWith(MoveManager.MoveResult expectedResult, MoveManager.MoveResult actualResult = res.get(); assertEquals(expectedResult, actualResult); } + + /** + * Test that moving an over-replicated CLOSED container fails when config is disabled. + */ + @Test + public void testMoveOverReplicatedClosedContainerWithConfigDisabled() throws Exception { + setupOverReplicatedContainer(); + + moveManager.setIncludeNonStandardContainers(false); + assertMoveFailsWith(REPLICATION_NOT_HEALTHY_BEFORE_MOVE, containerInfo.containerID()); + } + + /** + * Test that moving an over-replicated CLOSED container succeeds when config is enabled. + */ + @Test + public void testMoveOverReplicatedClosedContainerWithConfigEnabled() throws Exception { + setupOverReplicatedContainer(); + when(replicationManager.getPendingReplicationOps(containerInfo.containerID())).thenReturn(new ArrayList<>()); + + moveManager.setIncludeNonStandardContainers(true); + CompletableFuture successRes = + moveManager.move(containerInfo.containerID(), src, tgt); + verify(replicationManager).sendLowPriorityReplicateContainerCommand( + eq(containerInfo), eq(0), eq(src), eq(tgt), anyLong()); + completeMove(containerInfo, src, tgt, successRes); + } + + /** + * Test that moving a QUASI_CLOSED container fails when config is disabled. + */ + @Test + public void testMoveQuasiClosedContainerWithConfigDisabled() throws Exception { + ContainerInfo qcContainer = setupQuasiClosedContainer(1); + src = replicas.iterator().next().getDatanodeDetails(); + tgt = MockDatanodeDetails.randomDatanodeDetails(); + nodes.put(src, NodeStatus.inServiceHealthy()); + nodes.put(tgt, NodeStatus.inServiceHealthy()); + + moveManager.setIncludeNonStandardContainers(false); + assertMoveFailsWith(REPLICATION_FAIL_CONTAINER_NOT_CLOSED, qcContainer.containerID()); + } + + /** + * Test that moving a QUASI_CLOSED container succeeds when config is enabled. + */ + @Test + public void testMoveQuasiClosedContainerWithConfigEnabled() throws Exception { + ContainerInfo qcContainer = setupQuasiClosedContainer(2); + src = replicas.iterator().next().getDatanodeDetails(); + tgt = MockDatanodeDetails.randomDatanodeDetails(); + nodes.put(src, NodeStatus.inServiceHealthy()); + nodes.put(tgt, NodeStatus.inServiceHealthy()); + + moveManager.setIncludeNonStandardContainers(true); + CompletableFuture successRes = + moveManager.move(qcContainer.containerID(), src, tgt); + verify(replicationManager).sendLowPriorityReplicateContainerCommand( + eq(qcContainer), eq(0), eq(src), eq(tgt), anyLong()); + completeMove(qcContainer, src, tgt, successRes); + } + + private void setupOverReplicatedContainer() { + replicas.clear(); + replicas.addAll(ReplicationTestUtil.createReplicas(containerInfo.containerID(), 0, 0, 0, 0)); + src = replicas.iterator().next().getDatanodeDetails(); + tgt = MockDatanodeDetails.randomDatanodeDetails(); + nodes.put(src, NodeStatus.inServiceHealthy()); + nodes.put(tgt, NodeStatus.inServiceHealthy()); + + when(replicationManager.getContainerReplicationHealth(any(), anySet())) + .thenReturn(new ContainerHealthResult.OverReplicatedHealthResult(containerInfo, 1, false)); + } + + private ContainerInfo setupQuasiClosedContainer(long containerId) throws Exception { + ContainerInfo qcContainer = ReplicationTestUtil.createContainerInfo( + RatisReplicationConfig.getInstance(THREE), containerId, + HddsProtos.LifeCycleState.QUASI_CLOSED); + replicas.clear(); + replicas.addAll(ReplicationTestUtil.createReplicas(qcContainer.containerID(), + ContainerReplicaProto.State.QUASI_CLOSED, 0, 0, 0)); + + when(containerManager.getContainer(eq(qcContainer.containerID()))).thenReturn(qcContainer); + when(containerManager.getContainerReplicas(qcContainer.containerID())).thenReturn(replicas); + when(replicationManager.getContainerReplicationHealth(eq(qcContainer), anySet())) + .thenReturn(new ContainerHealthResult.HealthyResult(qcContainer)); + + return qcContainer; + } + + private void completeMove(ContainerInfo container, DatanodeDetails source, + DatanodeDetails target, CompletableFuture moveResult) throws Exception { + ContainerReplicaOp addOp = new ContainerReplicaOp( + ADD, target, 0, null, clock.millis() + 1000, 0); + moveManager.opCompleted(addOp, container.containerID(), false); + + verify(replicationManager).sendDeleteCommand( + eq(container), eq(0), eq(source), eq(true), anyLong()); + + ContainerReplicaOp deleteOp = new ContainerReplicaOp( + DELETE, source, 0, null, clock.millis() + 1000, 0); + moveManager.opCompleted(deleteOp, container.containerID(), false); + + assertEquals(COMPLETED, moveResult.get()); + } } From 7263f023ba279f0cfaaf8dbe1627ba8559a052da Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Wed, 25 Mar 2026 12:19:14 +0530 Subject: [PATCH 4/4] Added separate methods and checks empty replica --- .../ContainerBalancerSelectionCriteria.java | 102 +++++++++++++----- ...estContainerBalancerSelectionCriteria.java | 51 ++++++++- 2 files changed, 123 insertions(+), 30 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerSelectionCriteria.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerSelectionCriteria.java index 09919c1c495..4dfa90163d3 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerSelectionCriteria.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerSelectionCriteria.java @@ -190,28 +190,78 @@ public boolean shouldBeExcluded(ContainerID containerID, return true; } - return !isContainerClosed(container, node, replicas) || - !isContainerHealthyForMove(container, replicas) || - isContainerReplicatingOrDeleting(containerID); + if (balancerConfiguration.getIncludeNonStandardContainers()) { + return !isContainerClosedRelaxed(container, node, replicas) || + !isContainerHealthyForMoveRelaxed(container, replicas) || + isContainerReplicatingOrDeleting(containerID); + } else { + return !isContainerClosed(container, node, replicas) || + !isContainerHealthyForMove(container, replicas) || + isContainerReplicatingOrDeleting(containerID); + } } /** * Checks whether specified container is closed. Also checks if the replica * on the specified datanode is CLOSED. Assumes that there will only be one * replica of a container on a particular Datanode. + * @param container container to check + * @param datanodeDetails datanode on which a replica of the container is + * present + * @return true if container LifeCycleState is + * {@link HddsProtos.LifeCycleState#CLOSED} and its replica on the + * specified datanode is CLOSED, else false + */ + private boolean isContainerClosed(ContainerInfo container, + DatanodeDetails datanodeDetails, + Set replicas) { + if (!container.getState().equals(HddsProtos.LifeCycleState.CLOSED)) { + return false; + } + + for (ContainerReplica replica : replicas) { + if (replica.getDatanodeDetails().equals(datanodeDetails)) { + // don't consider replica if it's not closed + // assumption: there's only one replica of this container on this DN + return replica.getState().equals(ContainerReplicaProto.State.CLOSED); + } + } + + return false; + } + + /** + * This asks replication manager whether a container is under/over/mis replicated. The intention is the same as + * isContainerReplicatingOrDeleting but the check is done in a different way to be doubly sure. + * @param container container to check + * @param replicas the container's replicas + * @return false if it should not be moved, true otherwise + */ + private boolean isContainerHealthyForMove(ContainerInfo container, Set replicas) { + ContainerHealthResult.HealthState state = + replicationManager.getContainerReplicationHealth(container, replicas).getHealthState(); + if (state != ContainerHealthResult.HealthState.HEALTHY) { + LOG.debug("Excluding container {} with replicas {} as its health is {}.", container, replicas, state); + return false; + } + + return true; + } + + /** + * Relaxed version of isContainerClosed used when includeNonStandardContainers is enabled. *

- * With hdds.container.balancer.include.non.standard.containers set to true: - * - CLOSED containers: Allows moving QUASI_CLOSED replicas if minimum CLOSED replicas exist - * - QUASI_CLOSED containers: Allows moving if all replicas are QUASI_CLOSED + * - CLOSED containers: Allows moving non-empty QUASI_CLOSED replicas if minimum CLOSED replicas exist + * - QUASI_CLOSED containers: Allows moving if all replicas are QUASI_CLOSED and not empty * * @param container container to check * @param datanodeDetails datanode on which a replica of the container is present * @param replicas all replicas of the container * @return true if container and replica are eligible for balancing, else false */ - private boolean isContainerClosed(ContainerInfo container, - DatanodeDetails datanodeDetails, - Set replicas) { + private boolean isContainerClosedRelaxed(ContainerInfo container, + DatanodeDetails datanodeDetails, + Set replicas) { HddsProtos.LifeCycleState containerState = container.getState(); // Find the specific replica on this datanode ContainerReplica targetReplica = replicas.stream() @@ -229,15 +279,15 @@ private boolean isContainerClosed(ContainerInfo container, return true; } - // With config enabled: Also allow QUASI_CLOSED replicas if we have minimum required CLOSED replicas - if (balancerConfiguration.getIncludeNonStandardContainers()) { + // Allow non-empty QUASI_CLOSED replicas if we have minimum required CLOSED replicas + if (replicaState == ContainerReplicaProto.State.QUASI_CLOSED) { long numClosedReplicas = replicas.stream() .filter(r -> r.getState() == ContainerReplicaProto.State.CLOSED) .count(); int minRequiredReplicas = container.getReplicationConfig().getRequiredNodes(); if (numClosedReplicas >= minRequiredReplicas) { - return replicaState == ContainerReplicaProto.State.QUASI_CLOSED; + return !targetReplica.isEmpty(); } } return false; @@ -245,41 +295,37 @@ private boolean isContainerClosed(ContainerInfo container, // Case 2: Container is QUASI_CLOSED if (containerState == HddsProtos.LifeCycleState.QUASI_CLOSED) { - if (!balancerConfiguration.getIncludeNonStandardContainers()) { + // All replicas must be QUASI_CLOSED + boolean allReplicasQuasiClosed = replicas.stream() + .allMatch(r -> r.getState() == ContainerReplicaProto.State.QUASI_CLOSED); + if (!allReplicasQuasiClosed) { return false; } - - // Only allow if ALL replicas are QUASI_CLOSED - // This ensures we don't move containers with mixed replica states - return replicas.stream() - .allMatch(r -> r.getState() == ContainerReplicaProto.State.QUASI_CLOSED); + return !targetReplica.isEmpty(); } return false; } /** - * This asks replication manager whether a container is under/over/mis replicated. The intention is the same as - * isContainerReplicatingOrDeleting but the check is done in a different way to be doubly sure. + * Relaxed version of isContainerHealthyForMove used when includeNonStandardContainers is enabled. *

- * With hdds.container.balancer.include.non.standard.containers set to true: - * - OVER_REPLICATED containers are allowed + * - OVER_REPLICATED containers are also allowed. * * @param container container to check * @param replicas the container's replicas * @return false if it should not be moved, true otherwise */ - private boolean isContainerHealthyForMove(ContainerInfo container, Set replicas) { + private boolean isContainerHealthyForMoveRelaxed(ContainerInfo container, Set replicas) { ContainerHealthResult.HealthState state = replicationManager.getContainerReplicationHealth(container, replicas).getHealthState(); if (state == ContainerHealthResult.HealthState.HEALTHY) { return true; } - // OVER_REPLICATED containers allowed when config is enabled - if (state == ContainerHealthResult.HealthState.OVER_REPLICATED && - balancerConfiguration.getIncludeNonStandardContainers()) { - LOG.debug("Container {} is over-replicated but allowed for balancing if " + - "includeNonStandardContainers config is true", container); + // OVER_REPLICATED containers allowed + if (state == ContainerHealthResult.HealthState.OVER_REPLICATED) { + LOG.debug("Container {} is over-replicated but allowed for balancing with " + + "includeNonStandardContainers enabled", container); return true; } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java index 947d2fff0a8..969d15349da 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerSelectionCriteria.java @@ -146,7 +146,7 @@ public void shouldIncludeOverReplicatedClosedContainers() throws Exception { DatanodeDetails dn3 = MockDatanodeDetails.randomDatanodeDetails(); DatanodeDetails dn4 = MockDatanodeDetails.randomDatanodeDetails(); - // Over-replicated CLOSED container with 3 CLOSED + 1 QUASI_CLOSED replica + // Over-replicated CLOSED container with 3 CLOSED + 1 QUASI_CLOSED replica (non-empty) Set replicas = new HashSet<>(); replicas.add(ReplicationTestUtil.createContainerReplica(containerID, 0, IN_SERVICE, CLOSED, 1L, OzoneConsts.GB, source, source.getID())); @@ -188,6 +188,53 @@ public void shouldIncludeOverReplicatedClosedContainers() throws Exception { assertTrue(criteriaDisabled.shouldBeExcluded(containerID, dn4, 0L)); } + @Test + public void shouldExcludeEmptyQuasiClosedReplicas() throws Exception { + DatanodeDetails dn2 = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails dn3 = MockDatanodeDetails.randomDatanodeDetails(); + DatanodeDetails dn4 = MockDatanodeDetails.randomDatanodeDetails(); + + // Over-replicated CLOSED container with 3 CLOSED + 1 QUASI_CLOSED replica (empty) + Set replicas = new HashSet<>(); + replicas.add(ReplicationTestUtil.createContainerReplica(containerID, 0, IN_SERVICE, + CLOSED, 1L, OzoneConsts.GB, source, source.getID())); + replicas.add(ReplicationTestUtil.createContainerReplica(containerID, 0, IN_SERVICE, + CLOSED, 1L, OzoneConsts.GB, dn2, dn2.getID())); + replicas.add(ReplicationTestUtil.createContainerReplica(containerID, 0, IN_SERVICE, + CLOSED, 1L, OzoneConsts.GB, dn3, dn3.getID())); + + // Empty QUASI_CLOSED replica + ContainerReplica emptyQuasiClosedReplica = ContainerReplica.newBuilder() + .setContainerID(containerID) + .setContainerState(QUASI_CLOSED) + .setSequenceId(0L) + .setKeyCount(0) + .setBytesUsed(0) + .setReplicaIndex(0) + .setDatanodeDetails(dn4) + .setEmpty(true) + .build(); + replicas.add(emptyQuasiClosedReplica); + + when(containerManager.getContainerReplicas(containerID)).thenReturn(replicas); + when(replicationManager.getContainerReplicationHealth(eq(containerInfo), anySet())) + .thenReturn(new ContainerHealthResult.OverReplicatedHealthResult(containerInfo, 1, false)); + + balancerConfiguration.setIncludeNonStandardContainers(true); + ContainerBalancerSelectionCriteria configEnabled = new ContainerBalancerSelectionCriteria( + balancerConfiguration, nodeManager, replicationManager, + containerManager, findSourceStrategy, new HashMap<>()); + + // CLOSED replicas can be moved + assertFalse(configEnabled.shouldBeExcluded(containerID, source, 0L)); + assertFalse(configEnabled.shouldBeExcluded(containerID, dn2, 0L)); + assertFalse(configEnabled.shouldBeExcluded(containerID, dn3, 0L)); + + // Empty QUASI_CLOSED replica should be EXCLUDED + assertTrue(configEnabled.shouldBeExcluded(containerID, dn4, 0L), + "Empty QUASI_CLOSED replica should be excluded from balancing"); + } + @Test public void shouldIncludeQuasiClosedContainers() throws Exception { DatanodeDetails dn2 = MockDatanodeDetails.randomDatanodeDetails(); @@ -222,7 +269,7 @@ public void shouldIncludeQuasiClosedContainers() throws Exception { // All QUASI_CLOSED replicas can be moved because: // - Config is enabled // - Container is QUASI_CLOSED - // - All replicas are QUASI_CLOSED (consistent state) + // - All replicas are non-empty QUASI_CLOSED (consistent state) assertFalse(configEnabled.shouldBeExcluded(quasiClosedContainerID, source, 0L)); assertFalse(configEnabled.shouldBeExcluded(quasiClosedContainerID, dn2, 0L)); assertFalse(configEnabled.shouldBeExcluded(quasiClosedContainerID, dn3, 0L));