From b1df449029788d0f1c8ee492a6bcf21ede6a0f90 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Wed, 21 May 2025 10:56:36 -0700 Subject: [PATCH 01/11] HDDS-13108. Transition to use the generic sliding window for counting scanner failures --- .../container/common/utils/SlidingWindow.java | 106 ++++++++++++ .../container/common/volume/HddsVolume.java | 39 +---- .../common/volume/StorageVolume.java | 55 +++--- .../common/utils/TestSlidingWindow.java | 157 ++++++++++++++++++ .../container/common/utils/package-info.java | 19 +++ .../volume/TestStorageVolumeHealthChecks.java | 14 ++ 6 files changed, 326 insertions(+), 64 deletions(-) create mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/SlidingWindow.java create mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestSlidingWindow.java create mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/package-info.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/SlidingWindow.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/SlidingWindow.java new file mode 100644 index 000000000000..11557fe9190c --- /dev/null +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/SlidingWindow.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.utils; + +import java.time.Duration; +import java.util.ArrayDeque; +import java.util.Deque; +import org.apache.hadoop.util.Time; + +/** + * A time-based sliding window implementation that tracks event timestamps. + */ +public class SlidingWindow { + private final Object lock = new Object(); + private final int windowSize; + private final Deque timestamps; + private final long expiryDurationMillis; + private final Duration expiryDuration; + + /** + * @param windowSize the maximum number of events that are tracked + * @param expiryDuration the duration after which an entry in the window expires + */ + public SlidingWindow(int windowSize, Duration expiryDuration) { + if (windowSize <= 0) { + throw new IllegalArgumentException("Window size must be greater than 0"); + } + if (expiryDuration.isNegative() || expiryDuration.isZero()) { + throw new IllegalArgumentException("Expiry duration must be greater than 0"); + } + this.expiryDuration = expiryDuration; + this.expiryDurationMillis = expiryDuration.toMillis(); + this.windowSize = windowSize; + // We limit the initial queue size to 100 to control the memory usage + this.timestamps = new ArrayDeque<>(Math.min(windowSize + 1, 100)); + } + + public void add() { + synchronized (lock) { + removeExpired(); + + if (isFull()) { + timestamps.remove(); + } + + timestamps.add(getCurrentTime()); + } + } + + /** + * Checks if the sliding window has exceeded its maximum size. + * This is useful to track if we have encountered more events than the window's defined limit. + * @return true if the number of tracked timestamps in the sliding window + * exceeds the specified window size, false otherwise. + */ + public boolean isFull() { + synchronized (lock) { + removeExpired(); + return timestamps.size() > windowSize; + } + } + + private void removeExpired() { + synchronized (lock) { + long currentTime = getCurrentTime(); + long expirationThreshold = currentTime - expiryDurationMillis; + + while (!timestamps.isEmpty() && timestamps.peek() < expirationThreshold) { + timestamps.remove(); + } + } + } + + public int getWindowSize() { + return windowSize; + } + + public int getSize() { + synchronized (lock) { + return timestamps.size(); + } + } + + public Duration getExpiryDuration() { + return expiryDuration; + } + + private long getCurrentTime() { + return Time.monotonicNow(); + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 0c72b0a6f91f..4f5061efb126 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -25,12 +25,9 @@ import jakarta.annotation.Nullable; import java.io.File; import java.io.IOException; -import java.util.LinkedList; import java.util.List; -import java.util.Queue; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; import org.apache.commons.io.FileUtils; @@ -106,11 +103,6 @@ public class HddsVolume extends StorageVolume { private AtomicBoolean dbLoaded = new AtomicBoolean(false); private final AtomicBoolean dbLoadFailure = new AtomicBoolean(false); - private final int volumeTestCount; - private final int volumeTestFailureTolerance; - private AtomicInteger volumeTestFailureCount; - private Queue volumeTestResultQueue; - /** * Builder for HddsVolume. */ @@ -143,11 +135,6 @@ private HddsVolume(Builder b) throws IOException { this.volumeInfoMetrics = new VolumeInfoMetrics(b.getVolumeRootStr(), this); - this.volumeTestCount = getDatanodeConfig().getVolumeIOTestCount(); - this.volumeTestFailureTolerance = getDatanodeConfig().getVolumeIOFailureTolerance(); - this.volumeTestFailureCount = new AtomicInteger(0); - this.volumeTestResultQueue = new LinkedList<>(); - initialize(); } else { // Builder is called with failedVolume set, so create a failed volume @@ -155,8 +142,6 @@ private HddsVolume(Builder b) throws IOException { this.setState(VolumeState.FAILED); volumeIOStats = null; volumeInfoMetrics = new VolumeInfoMetrics(b.getVolumeRootStr(), this); - this.volumeTestCount = 0; - this.volumeTestFailureTolerance = 0; } LOG.info("HddsVolume: {}", getReport()); @@ -317,38 +302,32 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) @VisibleForTesting public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException { - if (volumeTestCount == 0) { + if (getIoTestCount() == 0) { return VolumeCheckResult.HEALTHY; } - final boolean isVolumeTestResultHealthy = true; try (ManagedOptions managedOptions = new ManagedOptions(); ManagedRocksDB readOnlyDb = ManagedRocksDB.openReadOnly(managedOptions, dbFile.toString())) { - volumeTestResultQueue.add(isVolumeTestResultHealthy); + // Do nothing. Only check if rocksdb is accessible. + LOG.debug("Successfully opened the database at \"{}\" for HDDS volume {}.", dbFile, getStorageDir()); } catch (Exception e) { if (Thread.currentThread().isInterrupted()) { throw new InterruptedException("Check of database for volume " + this + " interrupted."); } LOG.warn("Could not open Volume DB located at {}", dbFile, e); - volumeTestResultQueue.add(!isVolumeTestResultHealthy); - volumeTestFailureCount.incrementAndGet(); - } - - if (volumeTestResultQueue.size() > volumeTestCount - && (Boolean.TRUE.equals(volumeTestResultQueue.poll()) != isVolumeTestResultHealthy)) { - volumeTestFailureCount.decrementAndGet(); + getIoTestSlidingWindow().add(); } - if (volumeTestFailureCount.get() > volumeTestFailureTolerance) { + if (getIoTestSlidingWindow().isFull()) { LOG.error("Failed to open the database at \"{}\" for HDDS volume {}: " + - "the last {} runs encountered {} out of {} tolerated failures.", - dbFile, this, volumeTestResultQueue.size(), volumeTestFailureCount.get(), volumeTestFailureTolerance); + "encountered more than the {} tolerated failures.", + dbFile, this, getIoTestSlidingWindow().getWindowSize()); return VolumeCheckResult.FAILED; } LOG.debug("Successfully opened the database at \"{}\" for HDDS volume {}: " + - "the last {} runs encountered {} out of {} tolerated failures", - dbFile, this, volumeTestResultQueue.size(), volumeTestFailureTolerance, volumeTestFailureTolerance); + "encountered {} out of {} tolerated failures", + dbFile, this, getIoTestSlidingWindow().getSize(), getIoTestSlidingWindow().getWindowSize()); return VolumeCheckResult.HEALTHY; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java index 3ef4008463a7..3293d54fb2bc 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java @@ -26,13 +26,11 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.LinkedList; +import java.time.Duration; import java.util.Objects; import java.util.Optional; import java.util.Properties; -import java.util.Queue; import java.util.UUID; -import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Stream; @@ -49,6 +47,7 @@ import org.apache.hadoop.ozone.container.common.impl.StorageLocationReport; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.common.utils.DiskCheckUtil; +import org.apache.hadoop.ozone.container.common.utils.SlidingWindow; import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; import org.apache.hadoop.util.Time; import org.slf4j.Logger; @@ -109,9 +108,7 @@ public abstract class StorageVolume implements Checkable ioTestSlidingWindow; + private SlidingWindow ioTestSlidingWindow; private int healthCheckFileSize; /** @@ -161,9 +158,7 @@ protected StorageVolume(Builder b) throws IOException { this.conf = b.conf; this.dnConf = conf.getObject(DatanodeConfiguration.class); this.ioTestCount = dnConf.getVolumeIOTestCount(); - this.ioFailureTolerance = dnConf.getVolumeIOFailureTolerance(); - this.ioTestSlidingWindow = new LinkedList<>(); - this.currentIOFailureCount = new AtomicInteger(0); + this.ioTestSlidingWindow = new SlidingWindow(dnConf.getVolumeIOFailureTolerance(), Duration.ofHours(1)); this.healthCheckFileSize = dnConf.getVolumeHealthCheckFileSize(); } else { storageDir = new File(b.volumeRootStr); @@ -172,7 +167,6 @@ protected StorageVolume(Builder b) throws IOException { this.storageID = UUID.randomUUID().toString(); this.state = VolumeState.FAILED; this.ioTestCount = 0; - this.ioFailureTolerance = 0; this.conf = null; this.dnConf = null; } @@ -538,6 +532,14 @@ public VolumeSet getVolumeSet() { return this.volumeSet; } + public int getIoTestCount() { + return ioTestCount; + } + + public SlidingWindow getIoTestSlidingWindow() { + return ioTestSlidingWindow; + } + public StorageType getStorageType() { return storageType; } @@ -635,7 +637,7 @@ private void cleanTmpDiskCheckDir() { * check consists of a directory check and an IO check. * * If the directory check fails, the volume check fails immediately. - * The IO check is allows to fail up to {@code ioFailureTolerance} times + * The IO check is allowed to fail up to {@code ioFailureTolerance} times * out of the last {@code ioTestCount} IO checks before this volume check is * failed. Each call to this method runs one IO check. * @@ -672,7 +674,6 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) // to avoid volume failure we can ignore checking disk read/write int minimumDiskSpace = healthCheckFileSize * 2; if (getCurrentUsage().getAvailable() < minimumDiskSpace) { - ioTestSlidingWindow.add(true); return VolumeCheckResult.HEALTHY; } @@ -691,39 +692,25 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) // We can check again if disk is full. If it is full, // in this case keep volume as healthy so that READ can still be served if (!diskChecksPassed && getCurrentUsage().getAvailable() < minimumDiskSpace) { - ioTestSlidingWindow.add(true); return VolumeCheckResult.HEALTHY; } - // Move the sliding window of IO test results forward 1 by adding the - // latest entry and removing the oldest entry from the window. - // Update the failure counter for the new window. - ioTestSlidingWindow.add(diskChecksPassed); if (!diskChecksPassed) { - currentIOFailureCount.incrementAndGet(); - } - if (ioTestSlidingWindow.size() > ioTestCount && - Objects.equals(ioTestSlidingWindow.poll(), Boolean.FALSE)) { - currentIOFailureCount.decrementAndGet(); + ioTestSlidingWindow.add(); } - // If the failure threshold has been crossed, fail the volume without - // further scans. + // If the failure threshold has been crossed, fail the volume without further scans. // Once the volume is failed, it will not be checked anymore. // The failure counts can be left as is. - if (currentIOFailureCount.get() > ioFailureTolerance) { - LOG.error("Failed IO test for volume {}: the last {} runs " + - "encountered {} out of {} tolerated failures.", this, - ioTestSlidingWindow.size(), currentIOFailureCount, - ioFailureTolerance); + if (ioTestSlidingWindow.isFull()) { + LOG.error("Failed IO test for volume {}: encountered more than the {} tolerated failures within the past {} ms.", + this, ioTestSlidingWindow.getWindowSize(), ioTestSlidingWindow.getExpiryDuration().toMillis()); return VolumeCheckResult.FAILED; - } else if (LOG.isDebugEnabled()) { - LOG.debug("IO test results for volume {}: the last {} runs encountered " + - "{} out of {} tolerated failures", this, - ioTestSlidingWindow.size(), - currentIOFailureCount, ioFailureTolerance); } + LOG.debug("IO test results for volume {}: encountered {} out of {} tolerated failures", + this, ioTestSlidingWindow.getSize(), ioTestSlidingWindow.getWindowSize()); + return VolumeCheckResult.HEALTHY; } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestSlidingWindow.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestSlidingWindow.java new file mode 100644 index 000000000000..f697a319a05d --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestSlidingWindow.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.container.common.utils; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.time.Duration; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; + +/** + * Tests for {@link SlidingWindow} class. + */ +public class TestSlidingWindow { + + private SlidingWindow slidingWindow; + + @BeforeEach + public void setup() { + slidingWindow = new SlidingWindow(3, Duration.ofSeconds(5)); + } + + @Test + public void testConstructorValidation() { + // Test invalid window size + assertThrows(IllegalArgumentException.class, () -> + new SlidingWindow(0, Duration.ofMillis(100))); + assertThrows(IllegalArgumentException.class, () -> + new SlidingWindow(-1, Duration.ofMillis(100))); + + // Test invalid expiry duration + assertThrows(IllegalArgumentException.class, () -> + new SlidingWindow(1, Duration.ofMillis(0))); + assertThrows(IllegalArgumentException.class, () -> + new SlidingWindow(1, Duration.ofMillis(-1))); + } + + @Test + public void testAdd() { + for (int i = 0; i < slidingWindow.getWindowSize(); i++) { + slidingWindow.add(); + assertFalse(slidingWindow.isFull()); + } + + slidingWindow.add(); + assertTrue(slidingWindow.isFull()); + } + + @Test + public void testEventExpiration() throws InterruptedException { + slidingWindow = new SlidingWindow(2, Duration.ofMillis(500)); + + // Add events to reach threshold + slidingWindow.add(); + slidingWindow.add(); + slidingWindow.add(); + assertTrue(slidingWindow.isFull()); + + // Wait for events to expire + Thread.sleep(600); + + assertFalse(slidingWindow.isFull()); + + // Add one more event - should not be enough to mark as full + slidingWindow.add(); + assertFalse(slidingWindow.isFull()); + } + + @Test + public void testPartialExpiration() throws InterruptedException { + slidingWindow = new SlidingWindow(3, Duration.ofSeconds(1)); + + slidingWindow.add(); + slidingWindow.add(); + slidingWindow.add(); + slidingWindow.add(); + assertTrue(slidingWindow.isFull()); + + Thread.sleep(600); + slidingWindow.add(); // this will remove the oldest event as the window is full + + // Wait for the oldest events to expire + Thread.sleep(500); + assertFalse(slidingWindow.isFull()); + } + + @Test + @Timeout(value = 10) + public void testConcurrentAccess() throws InterruptedException { + // Create a sliding window with size of 5 + final SlidingWindow concurrentWindow = new SlidingWindow(5, Duration.ofSeconds(5)); + final int threadCount = 10; + final int operationsPerThread = 100; + final ExecutorService executor = Executors.newFixedThreadPool(threadCount); + final CountDownLatch startLatch = new CountDownLatch(1); + final CountDownLatch finishLatch = new CountDownLatch(threadCount); + final AtomicBoolean hasError = new AtomicBoolean(false); + + // Create and submit tasks + for (int i = 0; i < threadCount; i++) { + executor.submit(() -> { + try { + startLatch.await(); // Wait for all threads to be ready + for (int j = 0; j < operationsPerThread; j++) { + concurrentWindow.add(); + // Check window status occasionally + if (j % 10 == 0) { + concurrentWindow.isFull(); + } + } + } catch (Exception e) { + hasError.set(true); + e.printStackTrace(); + } finally { + finishLatch.countDown(); + } + }); + } + + // Start all threads + startLatch.countDown(); + + // Wait for all threads to finish + finishLatch.await(); + executor.shutdown(); + executor.awaitTermination(5, TimeUnit.SECONDS); + + // Verify no exceptions occurred + assertFalse(hasError.get(), "Concurrent operations caused errors"); + + // Window should be full after all those operations + assertTrue(concurrentWindow.isFull()); + } +} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/package-info.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/package-info.java new file mode 100644 index 000000000000..170e20abca8f --- /dev/null +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/package-info.java @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Tests for Common container utils. */ +package org.apache.hadoop.ozone.container.common.utils; diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java index 25c6f05585a1..f5f26ae762ef 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java @@ -21,8 +21,10 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.File; +import java.lang.reflect.Field; import java.nio.file.Path; import java.nio.file.Paths; +import java.time.Duration; import java.util.UUID; import java.util.stream.Stream; import org.apache.commons.io.FileUtils; @@ -31,6 +33,7 @@ import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.common.utils.DiskCheckUtil; +import org.apache.hadoop.ozone.container.common.utils.SlidingWindow; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Named; import org.junit.jupiter.api.Test; @@ -309,8 +312,19 @@ private void testCheckIOUntilFailure(StorageVolume.Builder builder, StorageVolume volume = builder.build(); volume.format(CLUSTER_ID); volume.createTmpDirs(CLUSTER_ID); + // Sliding window protocol transitioned from count-based to a time-based system + // Update the default failure duration of the window from 1 hour to a shorter duration for the test + long eventRate = 1L; + Field expiryDuration = SlidingWindow.class.getDeclaredField("expiryDuration"); + Field expiryDurationMillis = SlidingWindow.class.getDeclaredField("expiryDurationMillis"); + expiryDuration.setAccessible(true); + expiryDurationMillis.setAccessible(true); + expiryDuration.set(volume.getIoTestSlidingWindow(), Duration.ofMillis(eventRate * ioTestCount)); + expiryDurationMillis.set(volume.getIoTestSlidingWindow(), Duration.ofMillis(eventRate * ioTestCount).toMillis()); for (int i = 0; i < checkResults.length; i++) { + // Sleep to allow entries in the sliding window to eventually timeout + Thread.sleep(eventRate); final boolean result = checkResults[i]; final DiskCheckUtil.DiskChecks ioResult = new DiskCheckUtil.DiskChecks() { @Override From 625d52a5afea5b01927f01460b3ca332118737c9 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Mon, 21 Jul 2025 23:07:26 -0700 Subject: [PATCH 02/11] HDDS-13108. Remove unused SlidingWindow implementation and associated tests --- .../container/common/utils/SlidingWindow.java | 106 ------------ .../common/utils/TestSlidingWindow.java | 157 ------------------ .../container/common/utils/package-info.java | 19 --- 3 files changed, 282 deletions(-) delete mode 100644 hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/SlidingWindow.java delete mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestSlidingWindow.java delete mode 100644 hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/package-info.java diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/SlidingWindow.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/SlidingWindow.java deleted file mode 100644 index 11557fe9190c..000000000000 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/utils/SlidingWindow.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.container.common.utils; - -import java.time.Duration; -import java.util.ArrayDeque; -import java.util.Deque; -import org.apache.hadoop.util.Time; - -/** - * A time-based sliding window implementation that tracks event timestamps. - */ -public class SlidingWindow { - private final Object lock = new Object(); - private final int windowSize; - private final Deque timestamps; - private final long expiryDurationMillis; - private final Duration expiryDuration; - - /** - * @param windowSize the maximum number of events that are tracked - * @param expiryDuration the duration after which an entry in the window expires - */ - public SlidingWindow(int windowSize, Duration expiryDuration) { - if (windowSize <= 0) { - throw new IllegalArgumentException("Window size must be greater than 0"); - } - if (expiryDuration.isNegative() || expiryDuration.isZero()) { - throw new IllegalArgumentException("Expiry duration must be greater than 0"); - } - this.expiryDuration = expiryDuration; - this.expiryDurationMillis = expiryDuration.toMillis(); - this.windowSize = windowSize; - // We limit the initial queue size to 100 to control the memory usage - this.timestamps = new ArrayDeque<>(Math.min(windowSize + 1, 100)); - } - - public void add() { - synchronized (lock) { - removeExpired(); - - if (isFull()) { - timestamps.remove(); - } - - timestamps.add(getCurrentTime()); - } - } - - /** - * Checks if the sliding window has exceeded its maximum size. - * This is useful to track if we have encountered more events than the window's defined limit. - * @return true if the number of tracked timestamps in the sliding window - * exceeds the specified window size, false otherwise. - */ - public boolean isFull() { - synchronized (lock) { - removeExpired(); - return timestamps.size() > windowSize; - } - } - - private void removeExpired() { - synchronized (lock) { - long currentTime = getCurrentTime(); - long expirationThreshold = currentTime - expiryDurationMillis; - - while (!timestamps.isEmpty() && timestamps.peek() < expirationThreshold) { - timestamps.remove(); - } - } - } - - public int getWindowSize() { - return windowSize; - } - - public int getSize() { - synchronized (lock) { - return timestamps.size(); - } - } - - public Duration getExpiryDuration() { - return expiryDuration; - } - - private long getCurrentTime() { - return Time.monotonicNow(); - } -} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestSlidingWindow.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestSlidingWindow.java deleted file mode 100644 index f697a319a05d..000000000000 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/TestSlidingWindow.java +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.ozone.container.common.utils; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.time.Duration; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.Timeout; - -/** - * Tests for {@link SlidingWindow} class. - */ -public class TestSlidingWindow { - - private SlidingWindow slidingWindow; - - @BeforeEach - public void setup() { - slidingWindow = new SlidingWindow(3, Duration.ofSeconds(5)); - } - - @Test - public void testConstructorValidation() { - // Test invalid window size - assertThrows(IllegalArgumentException.class, () -> - new SlidingWindow(0, Duration.ofMillis(100))); - assertThrows(IllegalArgumentException.class, () -> - new SlidingWindow(-1, Duration.ofMillis(100))); - - // Test invalid expiry duration - assertThrows(IllegalArgumentException.class, () -> - new SlidingWindow(1, Duration.ofMillis(0))); - assertThrows(IllegalArgumentException.class, () -> - new SlidingWindow(1, Duration.ofMillis(-1))); - } - - @Test - public void testAdd() { - for (int i = 0; i < slidingWindow.getWindowSize(); i++) { - slidingWindow.add(); - assertFalse(slidingWindow.isFull()); - } - - slidingWindow.add(); - assertTrue(slidingWindow.isFull()); - } - - @Test - public void testEventExpiration() throws InterruptedException { - slidingWindow = new SlidingWindow(2, Duration.ofMillis(500)); - - // Add events to reach threshold - slidingWindow.add(); - slidingWindow.add(); - slidingWindow.add(); - assertTrue(slidingWindow.isFull()); - - // Wait for events to expire - Thread.sleep(600); - - assertFalse(slidingWindow.isFull()); - - // Add one more event - should not be enough to mark as full - slidingWindow.add(); - assertFalse(slidingWindow.isFull()); - } - - @Test - public void testPartialExpiration() throws InterruptedException { - slidingWindow = new SlidingWindow(3, Duration.ofSeconds(1)); - - slidingWindow.add(); - slidingWindow.add(); - slidingWindow.add(); - slidingWindow.add(); - assertTrue(slidingWindow.isFull()); - - Thread.sleep(600); - slidingWindow.add(); // this will remove the oldest event as the window is full - - // Wait for the oldest events to expire - Thread.sleep(500); - assertFalse(slidingWindow.isFull()); - } - - @Test - @Timeout(value = 10) - public void testConcurrentAccess() throws InterruptedException { - // Create a sliding window with size of 5 - final SlidingWindow concurrentWindow = new SlidingWindow(5, Duration.ofSeconds(5)); - final int threadCount = 10; - final int operationsPerThread = 100; - final ExecutorService executor = Executors.newFixedThreadPool(threadCount); - final CountDownLatch startLatch = new CountDownLatch(1); - final CountDownLatch finishLatch = new CountDownLatch(threadCount); - final AtomicBoolean hasError = new AtomicBoolean(false); - - // Create and submit tasks - for (int i = 0; i < threadCount; i++) { - executor.submit(() -> { - try { - startLatch.await(); // Wait for all threads to be ready - for (int j = 0; j < operationsPerThread; j++) { - concurrentWindow.add(); - // Check window status occasionally - if (j % 10 == 0) { - concurrentWindow.isFull(); - } - } - } catch (Exception e) { - hasError.set(true); - e.printStackTrace(); - } finally { - finishLatch.countDown(); - } - }); - } - - // Start all threads - startLatch.countDown(); - - // Wait for all threads to finish - finishLatch.await(); - executor.shutdown(); - executor.awaitTermination(5, TimeUnit.SECONDS); - - // Verify no exceptions occurred - assertFalse(hasError.get(), "Concurrent operations caused errors"); - - // Window should be full after all those operations - assertTrue(concurrentWindow.isFull()); - } -} diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/package-info.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/package-info.java deleted file mode 100644 index 170e20abca8f..000000000000 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/utils/package-info.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Tests for Common container utils. */ -package org.apache.hadoop.ozone.container.common.utils; From 33b9a4b5c238adfb0ea28c969989cced56f2d685 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Mon, 21 Jul 2025 23:33:48 -0700 Subject: [PATCH 03/11] HDDS-13108. Replace count-based failure checks with time-based sliding window mechanism --- .../org/apache/hadoop/hdds/utils/SlidingWindow.java | 4 ++++ .../ozone/container/common/volume/HddsVolume.java | 4 ++-- .../ozone/container/common/volume/StorageVolume.java | 8 ++++---- .../common/volume/TestStorageVolumeHealthChecks.java | 12 +++++++----- 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java index 744e842a398a..64cb24588bef 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java @@ -149,6 +149,10 @@ private long getCurrentTime() { return clock.millis(); } + public long getExpiryDurationMillis() { + return expiryDurationMillis; + } + /** * A custom monotonic clock implementation. * Implementation of Clock that uses System.nanoTime() for real usage. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index f76a085bb84c..35a0578ee5d3 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -319,7 +319,7 @@ public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException getIoTestSlidingWindow().add(); } - if (getIoTestSlidingWindow().isFull()) { + if (getIoTestSlidingWindow().isExceeded()) { LOG.error("Failed to open the database at \"{}\" for HDDS volume {}: " + "encountered more than the {} tolerated failures.", dbFile, this, getIoTestSlidingWindow().getWindowSize()); @@ -328,7 +328,7 @@ public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException LOG.debug("Successfully opened the database at \"{}\" for HDDS volume {}: " + "encountered {} out of {} tolerated failures", - dbFile, this, getIoTestSlidingWindow().getSize(), getIoTestSlidingWindow().getWindowSize()); + dbFile, this, getIoTestSlidingWindow().getNumEventsInWindow(), getIoTestSlidingWindow().getWindowSize()); return VolumeCheckResult.HEALTHY; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java index 3293d54fb2bc..f8b3485be10a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hdds.fs.SpaceUsageCheckFactory; import org.apache.hadoop.hdds.fs.SpaceUsageCheckParams; import org.apache.hadoop.hdds.fs.SpaceUsageSource; +import org.apache.hadoop.hdds.utils.SlidingWindow; import org.apache.hadoop.hdfs.server.datanode.StorageLocation; import org.apache.hadoop.hdfs.server.datanode.checker.Checkable; import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult; @@ -47,7 +48,6 @@ import org.apache.hadoop.ozone.container.common.impl.StorageLocationReport; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.common.utils.DiskCheckUtil; -import org.apache.hadoop.ozone.container.common.utils.SlidingWindow; import org.apache.hadoop.ozone.container.common.utils.StorageVolumeUtil; import org.apache.hadoop.util.Time; import org.slf4j.Logger; @@ -702,14 +702,14 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) // If the failure threshold has been crossed, fail the volume without further scans. // Once the volume is failed, it will not be checked anymore. // The failure counts can be left as is. - if (ioTestSlidingWindow.isFull()) { + if (ioTestSlidingWindow.isExceeded()) { LOG.error("Failed IO test for volume {}: encountered more than the {} tolerated failures within the past {} ms.", - this, ioTestSlidingWindow.getWindowSize(), ioTestSlidingWindow.getExpiryDuration().toMillis()); + this, ioTestSlidingWindow.getWindowSize(), ioTestSlidingWindow.getExpiryDurationMillis()); return VolumeCheckResult.FAILED; } LOG.debug("IO test results for volume {}: encountered {} out of {} tolerated failures", - this, ioTestSlidingWindow.getSize(), ioTestSlidingWindow.getWindowSize()); + this, ioTestSlidingWindow.getNumEventsInWindow(), ioTestSlidingWindow.getWindowSize()); return VolumeCheckResult.HEALTHY; } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java index f5f26ae762ef..f3514af54008 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java @@ -30,10 +30,11 @@ import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.fs.MockSpaceUsageCheckFactory; +import org.apache.hadoop.hdds.utils.SlidingWindow; import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.common.utils.DiskCheckUtil; -import org.apache.hadoop.ozone.container.common.utils.SlidingWindow; +import org.apache.ozone.test.TestClock; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Named; import org.junit.jupiter.api.Test; @@ -315,16 +316,17 @@ private void testCheckIOUntilFailure(StorageVolume.Builder builder, // Sliding window protocol transitioned from count-based to a time-based system // Update the default failure duration of the window from 1 hour to a shorter duration for the test long eventRate = 1L; - Field expiryDuration = SlidingWindow.class.getDeclaredField("expiryDuration"); + TestClock testClock = TestClock.newInstance(); + Field clock = SlidingWindow.class.getDeclaredField("clock"); Field expiryDurationMillis = SlidingWindow.class.getDeclaredField("expiryDurationMillis"); - expiryDuration.setAccessible(true); + clock.setAccessible(true); expiryDurationMillis.setAccessible(true); - expiryDuration.set(volume.getIoTestSlidingWindow(), Duration.ofMillis(eventRate * ioTestCount)); + clock.set(volume.getIoTestSlidingWindow(), testClock); expiryDurationMillis.set(volume.getIoTestSlidingWindow(), Duration.ofMillis(eventRate * ioTestCount).toMillis()); for (int i = 0; i < checkResults.length; i++) { // Sleep to allow entries in the sliding window to eventually timeout - Thread.sleep(eventRate); + testClock.fastForward(eventRate); final boolean result = checkResults[i]; final DiskCheckUtil.DiskChecks ioResult = new DiskCheckUtil.DiskChecks() { @Override From e81f59b2ff6a8a43d572cc6b29bf97f43dfab493 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Wed, 30 Jul 2025 23:34:59 -0700 Subject: [PATCH 04/11] HDDS-13108. Add configuration for sliding window timeout in disk checks --- .../statemachine/DatanodeConfiguration.java | 28 +++++++++++++++++++ .../common/volume/StorageVolume.java | 4 +-- .../volume/TestStorageVolumeHealthChecks.java | 6 ++-- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index 6d2b92831476..44977363897a 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -61,6 +61,7 @@ public class DatanodeConfiguration extends ReconfigurableConfig { public static final String FAILED_DB_VOLUMES_TOLERATED_KEY = "hdds.datanode.failed.db.volumes.tolerated"; public static final String DISK_CHECK_MIN_GAP_KEY = "hdds.datanode.disk.check.min.gap"; public static final String DISK_CHECK_TIMEOUT_KEY = "hdds.datanode.disk.check.timeout"; + public static final String DISK_CHECK_SLIDING_WINDOW_TIMEOUT_KEY = "hdds.datanode.disk.check.sliding.window.timeout"; // Minimum space should be left on volume. // Ex: If volume has 1000GB and minFreeSpace is configured as 10GB, @@ -99,6 +100,8 @@ public class DatanodeConfiguration extends ReconfigurableConfig { static final Duration DISK_CHECK_TIMEOUT_DEFAULT = Duration.ofMinutes(10); + static final Duration DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT = Duration.ofMinutes(60); + static final boolean CONTAINER_SCHEMA_V3_ENABLED_DEFAULT = true; static final long ROCKSDB_LOG_MAX_FILE_SIZE_BYTES_DEFAULT = 32 * 1024 * 1024; static final int ROCKSDB_LOG_MAX_FILE_NUM_DEFAULT = 64; @@ -404,6 +407,16 @@ public class DatanodeConfiguration extends ReconfigurableConfig { ) private Duration diskCheckTimeout = DISK_CHECK_TIMEOUT_DEFAULT; + @Config(key = "disk.check.sliding.window.timeout", + defaultValue = "60m", + type = ConfigType.TIME, + tags = {ConfigTag.DATANODE}, + description = "Time interval after which a disk check" + + " failure result stored in the sliding window will expire." + + " Unit could be defined with postfix (ns,ms,s,m,h,d)." + ) + private Duration diskCheckSlidingWindowTimeout = DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT; + @Config(key = "chunk.data.validation.check", defaultValue = "false", type = ConfigType.BOOLEAN, @@ -688,6 +701,13 @@ public void validate() { diskCheckTimeout = DISK_CHECK_TIMEOUT_DEFAULT; } + if (diskCheckSlidingWindowTimeout.isNegative()) { + LOG.warn("{} must be greater than zero and was set to {}. Defaulting to {}", + DISK_CHECK_SLIDING_WINDOW_TIMEOUT_KEY, diskCheckSlidingWindowTimeout, + DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT); + diskCheckSlidingWindowTimeout = DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT; + } + if (blockDeleteCommandWorkerInterval.isNegative()) { LOG.warn(BLOCK_DELETE_COMMAND_WORKER_INTERVAL + " must be greater than zero and was set to {}. Defaulting to {}", @@ -907,6 +927,14 @@ public void setDiskCheckTimeout(Duration duration) { diskCheckTimeout = duration; } + public Duration getDiskCheckSlidingWindowTimeout() { + return diskCheckSlidingWindowTimeout; + } + + public void setDiskCheckSlidingWindowTimeout(Duration duration) { + diskCheckSlidingWindowTimeout = duration; + } + public int getBlockDeleteThreads() { return blockDeleteThreads; } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java index f8b3485be10a..bdc5f0864597 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java @@ -26,7 +26,6 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.time.Duration; import java.util.Objects; import java.util.Optional; import java.util.Properties; @@ -158,7 +157,8 @@ protected StorageVolume(Builder b) throws IOException { this.conf = b.conf; this.dnConf = conf.getObject(DatanodeConfiguration.class); this.ioTestCount = dnConf.getVolumeIOTestCount(); - this.ioTestSlidingWindow = new SlidingWindow(dnConf.getVolumeIOFailureTolerance(), Duration.ofHours(1)); + this.ioTestSlidingWindow = new SlidingWindow(dnConf.getVolumeIOFailureTolerance(), + dnConf.getDiskCheckSlidingWindowTimeout()); this.healthCheckFileSize = dnConf.getVolumeHealthCheckFileSize(); } else { storageDir = new File(b.volumeRootStr); diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java index f3514af54008..c55aa1ba1724 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java @@ -308,21 +308,19 @@ private void testCheckIOUntilFailure(StorageVolume.Builder builder, DatanodeConfiguration dnConf = CONF.getObject(DatanodeConfiguration.class); dnConf.setVolumeIOTestCount(ioTestCount); dnConf.setVolumeIOFailureTolerance(ioFailureTolerance); + dnConf.setDiskCheckSlidingWindowTimeout(Duration.ofMillis(ioTestCount)); CONF.setFromObject(dnConf); builder.conf(CONF); StorageVolume volume = builder.build(); volume.format(CLUSTER_ID); volume.createTmpDirs(CLUSTER_ID); // Sliding window protocol transitioned from count-based to a time-based system - // Update the default failure duration of the window from 1 hour to a shorter duration for the test + // Update the default failure duration of the window from 60 minutes to a shorter duration for the test long eventRate = 1L; TestClock testClock = TestClock.newInstance(); Field clock = SlidingWindow.class.getDeclaredField("clock"); - Field expiryDurationMillis = SlidingWindow.class.getDeclaredField("expiryDurationMillis"); clock.setAccessible(true); - expiryDurationMillis.setAccessible(true); clock.set(volume.getIoTestSlidingWindow(), testClock); - expiryDurationMillis.set(volume.getIoTestSlidingWindow(), Duration.ofMillis(eventRate * ioTestCount).toMillis()); for (int i = 0; i < checkResults.length; i++) { // Sleep to allow entries in the sliding window to eventually timeout From 17fa93851ce9ce4f44b7c7ce4f6d68e146515fce Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Thu, 12 Feb 2026 01:38:24 -0800 Subject: [PATCH 05/11] HDDS-13108. Remove unused imports from HddsVolume --- .../apache/hadoop/ozone/container/common/volume/HddsVolume.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 99a09d181e00..8f69e54e8b2e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -26,9 +26,7 @@ import java.io.File; import java.io.IOException; import java.util.Iterator; -import java.util.LinkedList; import java.util.List; -import java.util.Queue; import java.util.concurrent.ConcurrentSkipListSet; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicBoolean; From 0b9e02b5a0a93042b8b4cd792a03fbe7125e65bf Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Mon, 9 Mar 2026 10:38:46 +0100 Subject: [PATCH 06/11] checkstyle --- .../apache/hadoop/ozone/container/common/volume/HddsVolume.java | 2 +- .../hadoop/ozone/container/common/volume/StorageVolume.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java index 8f69e54e8b2e..6b3f65b4b57f 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/HddsVolume.java @@ -312,7 +312,7 @@ public VolumeCheckResult checkDbHealth(File dbFile) throws InterruptedException } try (ManagedOptions managedOptions = new ManagedOptions(); - ManagedRocksDB readOnlyDb = ManagedRocksDB.openReadOnly(managedOptions, dbFile.toString())) { + ManagedRocksDB ignored = ManagedRocksDB.openReadOnly(managedOptions, dbFile.toString())) { // Do nothing. Only check if rocksdb is accessible. LOG.debug("Successfully opened the database at \"{}\" for HDDS volume {}.", dbFile, getStorageDir()); } catch (Exception e) { diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java index 4c2e648756b9..6889700f51cd 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java @@ -38,9 +38,9 @@ import org.apache.hadoop.hdds.fs.SpaceUsageCheckFactory; import org.apache.hadoop.hdds.fs.SpaceUsageCheckParams; import org.apache.hadoop.hdds.fs.SpaceUsageSource; -import org.apache.hadoop.hdds.utils.SlidingWindow; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.server.ServerUtils; +import org.apache.hadoop.hdds.utils.SlidingWindow; import org.apache.hadoop.hdfs.server.datanode.StorageLocation; import org.apache.hadoop.hdfs.server.datanode.checker.Checkable; import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult; From 158819e2aac318642e08412b0e43ce23547f8a23 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Tue, 24 Mar 2026 19:32:30 -0700 Subject: [PATCH 07/11] HDDS-13108. Enforce minimum sliding window timeout based on disk check interval --- .../hadoop/hdds/utils/SlidingWindow.java | 2 +- .../statemachine/DatanodeConfiguration.java | 19 +++++++++++++++++-- .../volume/TestStorageVolumeHealthChecks.java | 18 ++++++++---------- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java index 64cb24588bef..c3699336a8b6 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java @@ -158,7 +158,7 @@ public long getExpiryDurationMillis() { * Implementation of Clock that uses System.nanoTime() for real usage. * See {@see org.apache.ozone.test.TestClock} */ - private static final class MonotonicClock extends Clock { + public static final class MonotonicClock extends Clock { @Override public long millis() { return TimeUnit.NANOSECONDS.toMillis(System.nanoTime()); diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index acf687eb5c90..fecb566efadd 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -26,6 +26,7 @@ import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.CONFIG_PREFIX; import java.time.Duration; +import java.time.temporal.ChronoUnit; import org.apache.hadoop.hdds.conf.Config; import org.apache.hadoop.hdds.conf.ConfigGroup; import org.apache.hadoop.hdds.conf.ConfigTag; @@ -100,7 +101,8 @@ public class DatanodeConfiguration extends ReconfigurableConfig { static final Duration DISK_CHECK_TIMEOUT_DEFAULT = Duration.ofMinutes(10); - static final Duration DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT = Duration.ofMinutes(60); + static final Duration DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT = + Duration.ofMinutes(PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT).plus(DISK_CHECK_TIMEOUT_DEFAULT); static final boolean CONTAINER_SCHEMA_V3_ENABLED_DEFAULT = true; static final long ROCKSDB_LOG_MAX_FILE_SIZE_BYTES_DEFAULT = 32 * 1024 * 1024; @@ -408,11 +410,14 @@ public class DatanodeConfiguration extends ReconfigurableConfig { private Duration diskCheckTimeout = DISK_CHECK_TIMEOUT_DEFAULT; @Config(key = "hdds.datanode.disk.check.sliding.window.timeout", - defaultValue = "60m", + defaultValue = "70m", type = ConfigType.TIME, tags = {ConfigTag.DATANODE}, description = "Time interval after which a disk check" + " failure result stored in the sliding window will expire." + + " Do not set the window timeout period to less than or equal to the disk check interval period" + + " or failures can be missed across sparse checks" + + " e.g., every 120m interval with a 60m window rarely accumulates enough failed events" + " Unit could be defined with postfix (ns,ms,s,m,h,d)." ) private Duration diskCheckSlidingWindowTimeout = DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT; @@ -708,6 +713,16 @@ public void validate() { diskCheckSlidingWindowTimeout = DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT; } + // Do not set window timeout <= periodic disk check interval period, or failures can be missed across sparse checks + // e.g., every 120m interval with a 60m window rarely accumulates enough failed events + if (diskCheckSlidingWindowTimeout.get(ChronoUnit.MINUTES) <= periodicDiskCheckIntervalMinutes) { + LOG.warn("{} must be greater than or equal to {} minutes and was set to {} minutes. Defaulting to {}", + DISK_CHECK_SLIDING_WINDOW_TIMEOUT_KEY, periodicDiskCheckIntervalMinutes, + diskCheckSlidingWindowTimeout.get(ChronoUnit.MINUTES), + DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT); + diskCheckSlidingWindowTimeout = DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT; + } + if (blockDeleteCommandWorkerInterval.isNegative()) { LOG.warn(BLOCK_DELETE_COMMAND_WORKER_INTERVAL + " must be greater than zero and was set to {}. Defaulting to {}", diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java index 5ac03d0219a5..495fcbf54635 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java @@ -21,7 +21,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.File; -import java.lang.reflect.Field; import java.nio.file.Path; import java.nio.file.Paths; import java.time.Duration; @@ -30,7 +29,6 @@ import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.fs.MockSpaceUsageCheckFactory; -import org.apache.hadoop.hdds.utils.SlidingWindow; import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.common.utils.DiskCheckUtil; @@ -52,6 +50,7 @@ public class TestStorageVolumeHealthChecks { private static final String DATANODE_UUID = UUID.randomUUID().toString(); private static final String CLUSTER_ID = UUID.randomUUID().toString(); private static final OzoneConfiguration CONF = new OzoneConfiguration(); + private static final TestClock TEST_CLOCK = TestClock.newInstance(); @TempDir private static Path volumePath; @@ -61,19 +60,22 @@ public static Stream volumeBuilders() { new HddsVolume.Builder(volumePath.toString()) .datanodeUuid(DATANODE_UUID) .conf(CONF) - .usageCheckFactory(MockSpaceUsageCheckFactory.NONE); + .usageCheckFactory(MockSpaceUsageCheckFactory.NONE) + .clock(TEST_CLOCK); MetadataVolume.Builder metadataVolumeBuilder = new MetadataVolume.Builder(volumePath.toString()) .datanodeUuid(DATANODE_UUID) .conf(CONF) - .usageCheckFactory(MockSpaceUsageCheckFactory.NONE); + .usageCheckFactory(MockSpaceUsageCheckFactory.NONE) + .clock(TEST_CLOCK); DbVolume.Builder dbVolumeBuilder = new DbVolume.Builder(volumePath.toString()) .datanodeUuid(DATANODE_UUID) .conf(CONF) - .usageCheckFactory(MockSpaceUsageCheckFactory.NONE); + .usageCheckFactory(MockSpaceUsageCheckFactory.NONE) + .clock(TEST_CLOCK); return Stream.of( Arguments.of(Named.of("HDDS Volume", hddsVolumeBuilder)), @@ -317,14 +319,10 @@ private void testCheckIOUntilFailure(StorageVolume.Builder builder, // Sliding window protocol transitioned from count-based to a time-based system // Update the default failure duration of the window from 60 minutes to a shorter duration for the test long eventRate = 1L; - TestClock testClock = TestClock.newInstance(); - Field clock = SlidingWindow.class.getDeclaredField("clock"); - clock.setAccessible(true); - clock.set(volume.getIoTestSlidingWindow(), testClock); for (int i = 0; i < checkResults.length; i++) { // Sleep to allow entries in the sliding window to eventually timeout - testClock.fastForward(eventRate); + TEST_CLOCK.fastForward(eventRate); final boolean result = checkResults[i]; final DiskCheckUtil.DiskChecks ioResult = new DiskCheckUtil.DiskChecks() { @Override From 10d0dcd7cd46c8c388602a1b75d23573fc7a1258 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Tue, 24 Mar 2026 19:59:46 -0700 Subject: [PATCH 08/11] HDDS-13108. Add Clock support to StorageVolume and its Builder for customizable sliding window timestamps --- .../container/common/volume/StorageVolume.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java index 6889700f51cd..01f0e351d722 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/volume/StorageVolume.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.time.Clock; import java.util.Objects; import java.util.Properties; import java.util.UUID; @@ -160,7 +161,7 @@ protected StorageVolume(Builder b) throws IOException { this.dnConf = conf.getObject(DatanodeConfiguration.class); this.ioTestCount = dnConf.getVolumeIOTestCount(); this.ioTestSlidingWindow = new SlidingWindow(dnConf.getVolumeIOFailureTolerance(), - dnConf.getDiskCheckSlidingWindowTimeout()); + dnConf.getDiskCheckSlidingWindowTimeout(), b.getClock()); this.healthCheckFileSize = dnConf.getVolumeHealthCheckFileSize(); } else { storageDir = new File(b.volumeRootStr); @@ -405,6 +406,7 @@ public abstract static class Builder> { private boolean failedVolume = false; private String datanodeUuid; private String clusterID; + private Clock clock = new SlidingWindow.MonotonicClock(); public Builder(String volumeRootStr, String storageDirStr) { this.volumeRootStr = volumeRootStr; @@ -451,6 +453,11 @@ public T clusterID(String cid) { return this.getThis(); } + public T clock(Clock c) { + this.clock = c; + return this.getThis(); + } + public abstract StorageVolume build() throws IOException; public String getVolumeRootStr() { @@ -468,6 +475,10 @@ public StorageType getStorageType() { public String getStorageDirStr() { return this.storageDirStr; } + + public Clock getClock() { + return this.clock; + } } public String getVolumeRootDir() { From af0e4f83c95754e00a81ead2730d8727adebe82a Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Tue, 24 Mar 2026 20:43:08 -0700 Subject: [PATCH 09/11] HDDS-13108. Update sliding window checks and fix test clock adjustments --- .../common/statemachine/DatanodeConfiguration.java | 8 ++++---- .../common/volume/TestStorageVolumeHealthChecks.java | 8 +++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java index fecb566efadd..51af0a30df31 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeConfiguration.java @@ -26,7 +26,7 @@ import static org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration.CONFIG_PREFIX; import java.time.Duration; -import java.time.temporal.ChronoUnit; +import org.apache.commons.lang3.time.DurationFormatUtils; import org.apache.hadoop.hdds.conf.Config; import org.apache.hadoop.hdds.conf.ConfigGroup; import org.apache.hadoop.hdds.conf.ConfigTag; @@ -715,11 +715,11 @@ public void validate() { // Do not set window timeout <= periodic disk check interval period, or failures can be missed across sparse checks // e.g., every 120m interval with a 60m window rarely accumulates enough failed events - if (diskCheckSlidingWindowTimeout.get(ChronoUnit.MINUTES) <= periodicDiskCheckIntervalMinutes) { + if (diskCheckSlidingWindowTimeout.compareTo(Duration.ofMinutes(periodicDiskCheckIntervalMinutes)) < 0) { LOG.warn("{} must be greater than or equal to {} minutes and was set to {} minutes. Defaulting to {}", DISK_CHECK_SLIDING_WINDOW_TIMEOUT_KEY, periodicDiskCheckIntervalMinutes, - diskCheckSlidingWindowTimeout.get(ChronoUnit.MINUTES), - DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT); + diskCheckSlidingWindowTimeout.toMinutes(), + DurationFormatUtils.formatDurationHMS(DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT.toMillis())); diskCheckSlidingWindowTimeout = DISK_CHECK_SLIDING_WINDOW_TIMEOUT_DEFAULT; } diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java index 495fcbf54635..06d844ca9183 100644 --- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java +++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/volume/TestStorageVolumeHealthChecks.java @@ -317,12 +317,14 @@ private void testCheckIOUntilFailure(StorageVolume.Builder builder, volume.format(CLUSTER_ID); volume.createTmpDirs(CLUSTER_ID); // Sliding window protocol transitioned from count-based to a time-based system - // Update the default failure duration of the window from 60 minutes to a shorter duration for the test - long eventRate = 1L; + // Update the event rate so that all the tested events are processed within the same sliding window period + long slidingWindowTimeoutMillis = volume.getConf().getObject(DatanodeConfiguration.class) + .getDiskCheckSlidingWindowTimeout().toMillis(); + long eventRateMillis = slidingWindowTimeoutMillis / ioTestCount; for (int i = 0; i < checkResults.length; i++) { // Sleep to allow entries in the sliding window to eventually timeout - TEST_CLOCK.fastForward(eventRate); + TEST_CLOCK.fastForward(eventRateMillis); final boolean result = checkResults[i]; final DiskCheckUtil.DiskChecks ioResult = new DiskCheckUtil.DiskChecks() { @Override From 1ec9e0e6f1a6e72c6e37fe9d2ea58b18e0c06a48 Mon Sep 17 00:00:00 2001 From: Rishabh Patel <1607531+ptlrs@users.noreply.github.com> Date: Wed, 25 Mar 2026 08:30:45 -0700 Subject: [PATCH 10/11] Update SlidingWindow.java Co-authored-by: Doroszlai, Attila <6454655+adoroszlai@users.noreply.github.com> --- .../main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java index c3699336a8b6..723fe3804374 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java @@ -156,7 +156,7 @@ public long getExpiryDurationMillis() { /** * A custom monotonic clock implementation. * Implementation of Clock that uses System.nanoTime() for real usage. - * See {@see org.apache.ozone.test.TestClock} + * @see org.apache.ozone.test.TestClock */ public static final class MonotonicClock extends Clock { @Override From 0de347cb8130ac35b77cd474ceaf36e36b0bc456 Mon Sep 17 00:00:00 2001 From: Rishabh Patel Date: Wed, 25 Mar 2026 12:50:34 -0700 Subject: [PATCH 11/11] HDDS-13108. Fix Javadoc for MonotonicClock in SlidingWindow --- .../java/org/apache/hadoop/hdds/utils/SlidingWindow.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java index 723fe3804374..316c88aba57b 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/SlidingWindow.java @@ -154,9 +154,10 @@ public long getExpiryDurationMillis() { } /** - * A custom monotonic clock implementation. + * A custom monotonic clock implementation to allow overriding the current time for testing purposes. * Implementation of Clock that uses System.nanoTime() for real usage. - * @see org.apache.ozone.test.TestClock + * The class {@code org.apache.ozone.test.TestClock} provides a mock clock which can be used + * to manipulate the current time in tests. */ public static final class MonotonicClock extends Clock { @Override