-
Notifications
You must be signed in to change notification settings - Fork 598
HDDS-13108. Refactor StorageVolume to use SlidingWindow #8843
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
b1df449
625d52a
f108d3c
33b9a4b
e81f59b
9c9c564
17fa938
4d8b6fe
0b9e02b
158819e
10d0dcd
af0e4f8
1ec9e0e
0de347c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,12 +27,10 @@ | |
| import java.io.IOException; | ||
| import java.nio.file.Files; | ||
| import java.nio.file.Path; | ||
| import java.util.LinkedList; | ||
| import java.time.Clock; | ||
| import java.util.Objects; | ||
| import java.util.Properties; | ||
| import java.util.Queue; | ||
| import java.util.UUID; | ||
| import java.util.concurrent.atomic.AtomicInteger; | ||
| import java.util.function.Function; | ||
| import java.util.function.Supplier; | ||
| import java.util.stream.Stream; | ||
|
|
@@ -43,6 +41,7 @@ | |
| import org.apache.hadoop.hdds.fs.SpaceUsageSource; | ||
| import org.apache.hadoop.hdds.scm.ScmConfigKeys; | ||
| import org.apache.hadoop.hdds.server.ServerUtils; | ||
| import org.apache.hadoop.hdds.utils.SlidingWindow; | ||
| import org.apache.hadoop.hdfs.server.datanode.StorageLocation; | ||
| import org.apache.hadoop.hdfs.server.datanode.checker.Checkable; | ||
| import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult; | ||
|
|
@@ -111,9 +110,7 @@ public abstract class StorageVolume implements Checkable<Boolean, VolumeCheckRes | |
| tests run, then the volume is considered failed. | ||
| */ | ||
| private final int ioTestCount; | ||
| private final int ioFailureTolerance; | ||
| private AtomicInteger currentIOFailureCount; | ||
| private Queue<Boolean> ioTestSlidingWindow; | ||
| private SlidingWindow ioTestSlidingWindow; | ||
| private int healthCheckFileSize; | ||
|
|
||
| /** | ||
|
|
@@ -163,9 +160,8 @@ protected StorageVolume(Builder<?> b) throws IOException { | |
| this.conf = b.conf; | ||
| this.dnConf = conf.getObject(DatanodeConfiguration.class); | ||
| this.ioTestCount = dnConf.getVolumeIOTestCount(); | ||
| this.ioFailureTolerance = dnConf.getVolumeIOFailureTolerance(); | ||
| this.ioTestSlidingWindow = new LinkedList<>(); | ||
| this.currentIOFailureCount = new AtomicInteger(0); | ||
| this.ioTestSlidingWindow = new SlidingWindow(dnConf.getVolumeIOFailureTolerance(), | ||
ptlrs marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| dnConf.getDiskCheckSlidingWindowTimeout(), b.getClock()); | ||
| this.healthCheckFileSize = dnConf.getVolumeHealthCheckFileSize(); | ||
| } else { | ||
| storageDir = new File(b.volumeRootStr); | ||
|
|
@@ -174,7 +170,6 @@ protected StorageVolume(Builder<?> b) throws IOException { | |
| this.storageID = UUID.randomUUID().toString(); | ||
| this.state = VolumeState.FAILED; | ||
| this.ioTestCount = 0; | ||
| this.ioFailureTolerance = 0; | ||
| this.conf = null; | ||
| this.dnConf = null; | ||
| } | ||
|
|
@@ -411,6 +406,7 @@ public abstract static class Builder<T extends Builder<T>> { | |
| private boolean failedVolume = false; | ||
| private String datanodeUuid; | ||
| private String clusterID; | ||
| private Clock clock = new SlidingWindow.MonotonicClock(); | ||
|
|
||
| public Builder(String volumeRootStr, String storageDirStr) { | ||
| this.volumeRootStr = volumeRootStr; | ||
|
|
@@ -457,6 +453,11 @@ public T clusterID(String cid) { | |
| return this.getThis(); | ||
| } | ||
|
|
||
| public T clock(Clock c) { | ||
| this.clock = c; | ||
| return this.getThis(); | ||
| } | ||
|
|
||
| public abstract StorageVolume build() throws IOException; | ||
|
|
||
| public String getVolumeRootStr() { | ||
|
|
@@ -474,6 +475,10 @@ public StorageType getStorageType() { | |
| public String getStorageDirStr() { | ||
| return this.storageDirStr; | ||
| } | ||
|
|
||
| public Clock getClock() { | ||
| return this.clock; | ||
| } | ||
| } | ||
|
|
||
| public String getVolumeRootDir() { | ||
|
|
@@ -554,6 +559,14 @@ public VolumeSet getVolumeSet() { | |
| return this.volumeSet; | ||
| } | ||
|
|
||
| public int getIoTestCount() { | ||
| return ioTestCount; | ||
| } | ||
|
|
||
| public SlidingWindow getIoTestSlidingWindow() { | ||
| return ioTestSlidingWindow; | ||
| } | ||
|
|
||
| public StorageType getStorageType() { | ||
| return storageType; | ||
| } | ||
|
|
@@ -661,7 +674,7 @@ private void cleanTmpDiskCheckDir() { | |
| * check consists of a directory check and an IO check. | ||
| * | ||
| * If the directory check fails, the volume check fails immediately. | ||
| * The IO check is allows to fail up to {@code ioFailureTolerance} times | ||
| * The IO check is allowed to fail up to {@code ioFailureTolerance} times | ||
| * out of the last {@code ioTestCount} IO checks before this volume check is | ||
| * failed. Each call to this method runs one IO check. | ||
| * | ||
|
|
@@ -698,7 +711,6 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) | |
| // to avoid volume failure we can ignore checking disk read/write | ||
| int minimumDiskSpace = healthCheckFileSize * 2; | ||
| if (getCurrentUsage().getAvailable() < minimumDiskSpace) { | ||
| ioTestSlidingWindow.add(true); | ||
| return VolumeCheckResult.HEALTHY; | ||
| } | ||
|
|
||
|
|
@@ -717,39 +729,25 @@ public synchronized VolumeCheckResult check(@Nullable Boolean unused) | |
| // We can check again if disk is full. If it is full, | ||
| // in this case keep volume as healthy so that READ can still be served | ||
| if (!diskChecksPassed && getCurrentUsage().getAvailable() < minimumDiskSpace) { | ||
| ioTestSlidingWindow.add(true); | ||
| return VolumeCheckResult.HEALTHY; | ||
| } | ||
|
|
||
| // Move the sliding window of IO test results forward 1 by adding the | ||
| // latest entry and removing the oldest entry from the window. | ||
| // Update the failure counter for the new window. | ||
| ioTestSlidingWindow.add(diskChecksPassed); | ||
| if (!diskChecksPassed) { | ||
| currentIOFailureCount.incrementAndGet(); | ||
| } | ||
| if (ioTestSlidingWindow.size() > ioTestCount && | ||
| Objects.equals(ioTestSlidingWindow.poll(), Boolean.FALSE)) { | ||
| currentIOFailureCount.decrementAndGet(); | ||
| ioTestSlidingWindow.add(); | ||
| } | ||
|
|
||
| // If the failure threshold has been crossed, fail the volume without | ||
| // further scans. | ||
| // If the failure threshold has been crossed, fail the volume without further scans. | ||
| // Once the volume is failed, it will not be checked anymore. | ||
| // The failure counts can be left as is. | ||
| if (currentIOFailureCount.get() > ioFailureTolerance) { | ||
| LOG.error("Failed IO test for volume {}: the last {} runs " + | ||
| "encountered {} out of {} tolerated failures.", this, | ||
| ioTestSlidingWindow.size(), currentIOFailureCount, | ||
| ioFailureTolerance); | ||
| if (ioTestSlidingWindow.isExceeded()) { | ||
| LOG.error("Failed IO test for volume {}: encountered more than the {} tolerated failures within the past {} ms.", | ||
| this, ioTestSlidingWindow.getWindowSize(), ioTestSlidingWindow.getExpiryDurationMillis()); | ||
| return VolumeCheckResult.FAILED; | ||
| } else if (LOG.isDebugEnabled()) { | ||
| LOG.debug("IO test results for volume {}: the last {} runs encountered " + | ||
| "{} out of {} tolerated failures", this, | ||
| ioTestSlidingWindow.size(), | ||
| currentIOFailureCount, ioFailureTolerance); | ||
| } | ||
|
|
||
| LOG.debug("IO test results for volume {}: encountered {} out of {} tolerated failures", | ||
| this, ioTestSlidingWindow.getNumEventsInWindow(), ioTestSlidingWindow.getWindowSize()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shall we call getNumEvents() here, instead of getNumEventsInWindow()?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This log line is logged every time in debug mode. Since our queue is larger than the window, logging We are using |
||
|
|
||
| return VolumeCheckResult.HEALTHY; | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the recommendation of value relationship between this new property and PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT? Say if user reconfigured PERIODIC_DISK_CHECK_INTERVAL_MINUTES_DEFAULT to 2h, or 30m, shall we suggest user to reconfigure this property too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a good question.
The period disk check currently runs every 1 hour and the sliding window coverage is also for 1 hour.
The window should likely cover the period between and inclusive of the two periodic checks such that if two periodic checks fail, then they get counted in the same window.
Otherwise the only opportunity for a failure will be by a combination of periodic and on-demand checks and never due to two periodic checks.
I have updated the sliding window to be as long as the periodic disk check interval plus the timeout value for the disk check.