From 9b9154b2480680b6ee42df6da20ef40614605888 Mon Sep 17 00:00:00 2001 From: bitflicker64 Date: Thu, 5 Mar 2026 14:24:00 +0530 Subject: [PATCH 1/7] Fix #2960: resolve hostname allowlist entries in IpAuthHandler at startup to avoid Netty DNS blocking --- .../hugegraph/pd/raft/auth/IpAuthHandler.java | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/auth/IpAuthHandler.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/auth/IpAuthHandler.java index 2ac384541d..38ab3e9cf3 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/auth/IpAuthHandler.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/auth/IpAuthHandler.java @@ -18,7 +18,10 @@ package org.apache.hugegraph.pd.raft.auth; import java.net.InetSocketAddress; +import java.net.InetAddress; +import java.net.UnknownHostException; import java.util.Collections; +import java.util.HashSet; import java.util.Set; import io.netty.channel.ChannelDuplexHandler; @@ -30,11 +33,14 @@ @ChannelHandler.Sharable public class IpAuthHandler extends ChannelDuplexHandler { + // Retained for potential refresh of resolvedIps on membership changes private final Set allowedIps; + private volatile Set resolvedIps; private static volatile IpAuthHandler instance; private IpAuthHandler(Set allowedIps) { this.allowedIps = Collections.unmodifiableSet(allowedIps); + this.resolvedIps = resolveAll(allowedIps); } public static IpAuthHandler getInstance(Set allowedIps) { @@ -65,7 +71,24 @@ private static String getClientIp(ChannelHandlerContext ctx) { } private boolean isIpAllowed(String ip) { - return allowedIps.isEmpty() || allowedIps.contains(ip); + Set resolved = this.resolvedIps; + return resolved.isEmpty() || resolved.contains(ip); + } + + private static Set resolveAll(Set entries) { + Set result = new HashSet<>(entries); + + for (String entry : entries) { + try { + for (InetAddress addr : InetAddress.getAllByName(entry)) { + result.add(addr.getHostAddress()); + } + } catch (UnknownHostException e) { + log.warn("Could not resolve allowlist entry '{}': {}", entry, e.getMessage()); + } + } + + return Collections.unmodifiableSet(result); } @Override From 0f2f00b1825a9d03cdb1f45f9e84674097e89f1f Mon Sep 17 00:00:00 2001 From: bitflicker64 Date: Thu, 12 Mar 2026 03:44:47 +0530 Subject: [PATCH 2/7] Fix IpAuthHandler hostname resolution and refresh allowlist after peer changes - Resolve allowlist hostnames to IPs using InetAddress.getAllByName - Add refresh() to update resolved IPs when Raft peer list changes - Wire refresh into RaftEngine.changePeerList() - Add IpAuthHandlerTest covering hostname resolution, refresh behavior, and failure cases --- .../apache/hugegraph/pd/raft/RaftEngine.java | 20 ++++- .../hugegraph/pd/raft/auth/IpAuthHandler.java | 25 +++++- .../hugegraph/pd/core/PDCoreSuiteTest.java | 2 + .../hugegraph/pd/raft/IpAuthHandlerTest.java | 90 +++++++++++++++++++ 4 files changed, 131 insertions(+), 6 deletions(-) create mode 100644 hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java index e70ac92340..da4be44297 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java @@ -23,6 +23,7 @@ import java.util.HashSet; import java.util.List; import java.util.Objects; +import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; @@ -40,7 +41,6 @@ import com.alipay.sofa.jraft.JRaftUtils; import com.alipay.sofa.jraft.Node; import com.alipay.sofa.jraft.RaftGroupService; -import com.alipay.sofa.jraft.ReplicatorGroup; import com.alipay.sofa.jraft.Status; import com.alipay.sofa.jraft.conf.Configuration; import com.alipay.sofa.jraft.core.Replicator; @@ -54,7 +54,6 @@ import com.alipay.sofa.jraft.rpc.RpcServer; import com.alipay.sofa.jraft.rpc.impl.BoltRpcServer; import com.alipay.sofa.jraft.util.Endpoint; -import com.alipay.sofa.jraft.util.ThreadId; import com.alipay.sofa.jraft.util.internal.ThrowUtil; import io.netty.channel.ChannelHandler; @@ -326,6 +325,23 @@ public Status changePeerList(String peerList) { latch.countDown(); }); latch.await(); + + // Refresh IpAuthHandler so newly added peers are not blocked + if (result.get() != null && result.get().isOk()) { + IpAuthHandler handler = IpAuthHandler.getInstance(); + if (handler != null) { + Set newIps = newPeers.getPeers() + .stream() + .map(PeerId::getIp) + .collect(Collectors.toSet()); + handler.refresh(newIps); + log.info("IpAuthHandler refreshed after peer list change to: {}", peerList); + } else { + log.warn("IpAuthHandler not initialized, skipping refresh for peer list: {}", + peerList); + } + } + } catch (Exception e) { log.error("failed to changePeerList to {},{}", peerList, e); result.set(new Status(-1, e.getMessage())); diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/auth/IpAuthHandler.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/auth/IpAuthHandler.java index 38ab3e9cf3..bdccb6dd7f 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/auth/IpAuthHandler.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/auth/IpAuthHandler.java @@ -17,8 +17,8 @@ package org.apache.hugegraph.pd.raft.auth; -import java.net.InetSocketAddress; import java.net.InetAddress; +import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.util.Collections; import java.util.HashSet; @@ -33,13 +33,10 @@ @ChannelHandler.Sharable public class IpAuthHandler extends ChannelDuplexHandler { - // Retained for potential refresh of resolvedIps on membership changes - private final Set allowedIps; private volatile Set resolvedIps; private static volatile IpAuthHandler instance; private IpAuthHandler(Set allowedIps) { - this.allowedIps = Collections.unmodifiableSet(allowedIps); this.resolvedIps = resolveAll(allowedIps); } @@ -54,6 +51,25 @@ public static IpAuthHandler getInstance(Set allowedIps) { return instance; } + /** + * Returns the existing singleton instance, or null if not yet initialized. + * Should only be called after getInstance(Set) has been called during startup. + */ + public static IpAuthHandler getInstance() { + return instance; + } + + /** + * Refreshes the resolved IP allowlist from a new set of hostnames or IPs. + * Should be called when the Raft peer list changes via RaftEngine#changePeerList(). + * Note: DNS-only changes (e.g. container restart with new IP, same hostname) + * are not automatically detected and still require a process restart. + */ + public void refresh(Set newAllowedIps) { + this.resolvedIps = resolveAll(newAllowedIps); + log.info("IpAuthHandler allowlist refreshed, resolved {} entries", resolvedIps.size()); + } + @Override public void channelActive(ChannelHandlerContext ctx) throws Exception { String clientIp = getClientIp(ctx); @@ -72,6 +88,7 @@ private static String getClientIp(ChannelHandlerContext ctx) { private boolean isIpAllowed(String ip) { Set resolved = this.resolvedIps; + // Empty allowlist means no restriction is configured — allow all return resolved.isEmpty() || resolved.contains(ip); } diff --git a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/core/PDCoreSuiteTest.java b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/core/PDCoreSuiteTest.java index 5098645128..fe35b7ea5c 100644 --- a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/core/PDCoreSuiteTest.java +++ b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/core/PDCoreSuiteTest.java @@ -19,6 +19,7 @@ import org.apache.hugegraph.pd.core.meta.MetadataKeyHelperTest; import org.apache.hugegraph.pd.core.store.HgKVStoreImplTest; +import org.apache.hugegraph.pd.raft.IpAuthHandlerTest; import org.junit.runner.RunWith; import org.junit.runners.Suite; @@ -36,6 +37,7 @@ StoreMonitorDataServiceTest.class, StoreServiceTest.class, TaskScheduleServiceTest.class, + IpAuthHandlerTest.class, // StoreNodeServiceTest.class, }) @Slf4j diff --git a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java new file mode 100644 index 0000000000..01df74826c --- /dev/null +++ b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.raft; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import org.apache.hugegraph.pd.raft.auth.IpAuthHandler; +import org.apache.hugegraph.testutil.Whitebox; +import org.junit.After; +import org.junit.Assert; +import org.junit.Test; + +public class IpAuthHandlerTest { + + @After + public void tearDown() { + Whitebox.setInternalState(IpAuthHandler.class, "instance", null); + } + + private boolean isIpAllowed(IpAuthHandler handler, String ip) { + return Whitebox.invoke(IpAuthHandler.class, + new Class[]{String.class}, + "isIpAllowed", handler, ip); + } + + @Test + public void testHostnameResolvesToIp() { + // "localhost" should resolve to "127.0.0.1" + IpAuthHandler handler = IpAuthHandler.getInstance( + Collections.singleton("localhost")); + Assert.assertTrue(isIpAllowed(handler, "127.0.0.1")); + } + + @Test + public void testUnresolvableHostnameDoesNotCrash() { + // Should log a warning and skip — no exception thrown + IpAuthHandler handler = IpAuthHandler.getInstance( + Collections.singleton("nonexistent.invalid.hostname")); + // unresolvable entry is skipped so 127.0.0.1 should not be allowed + Assert.assertFalse(isIpAllowed(handler, "127.0.0.1")); + } + + @Test + public void testRefreshUpdatesResolvedIps() { + // Start with 127.0.0.1 + IpAuthHandler handler = IpAuthHandler.getInstance( + Collections.singleton("127.0.0.1")); + Assert.assertTrue(isIpAllowed(handler, "127.0.0.1")); + + // Refresh with a different IP + Set newIps = new HashSet<>(); + newIps.add("192.168.0.1"); + handler.refresh(newIps); + + Assert.assertFalse(isIpAllowed(handler, "127.0.0.1")); + Assert.assertTrue(isIpAllowed(handler, "192.168.0.1")); + } + + @Test + public void testEmptyAllowlistAllowsAll() { + // Empty allowlist = no restriction = allow all + IpAuthHandler handler = IpAuthHandler.getInstance( + Collections.emptySet()); + Assert.assertTrue(isIpAllowed(handler, "1.2.3.4")); + Assert.assertTrue(isIpAllowed(handler, "192.168.99.99")); + } + + @Test + public void testGetInstanceReturnsNullBeforeInit() { + // After tearDown resets singleton, no-arg getInstance returns null + Assert.assertNull(IpAuthHandler.getInstance()); + } +} From 1fca2c85bf11cf3d9b0bae7ac363a24345438505 Mon Sep 17 00:00:00 2001 From: bitflicker64 Date: Sun, 15 Mar 2026 13:20:06 +0530 Subject: [PATCH 3/7] fix: refresh IpAuthHandler allowlist via updatePdRaft() bypass path - Wire IpAuthHandler.refresh() into PDService.updatePdRaft() which calls node.changePeers() directly, bypassing RaftEngine.changePeerList() - Include learner IPs in refresh since updatePdRaft() supports learner peers - Add @Before setUp() to IpAuthHandlerTest to prevent stale singleton from earlier suite classes poisoning test assertions - Strengthen testUnresolvableHostnameDoesNotCrash assertions - Replace testGetInstanceReturnsNullBeforeInit with testGetInstanceReturnsSingletonIgnoresNewAllowlist --- .../hugegraph/pd/service/PDService.java | 13 ++++++ .../hugegraph/pd/raft/IpAuthHandlerTest.java | 46 +++++++++++++++---- 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/service/PDService.java b/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/service/PDService.java index 98bc2ee803..94d136a844 100644 --- a/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/service/PDService.java +++ b/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/service/PDService.java @@ -25,6 +25,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; @@ -85,6 +86,7 @@ import org.apache.hugegraph.pd.raft.PeerUtil; import org.apache.hugegraph.pd.raft.RaftEngine; import org.apache.hugegraph.pd.raft.RaftStateListener; +import org.apache.hugegraph.pd.raft.auth.IpAuthHandler; import org.apache.hugegraph.pd.util.grpc.StreamObserverUtil; import org.apache.hugegraph.pd.watch.PDWatchSubject; import org.lognet.springboot.grpc.GRpcService; @@ -1735,6 +1737,17 @@ public void updatePdRaft(Pdpb.UpdatePdRaftRequest request, node.changePeers(config, status -> { if (status.isOk()) { log.info("updatePdRaft, change peers success"); + // Refresh IpAuthHandler so newly added peers are not blocked + IpAuthHandler handler = IpAuthHandler.getInstance(); + if (handler != null) { + Set newIps = new HashSet<>(); + config.getPeers().forEach(p -> newIps.add(p.getIp())); + config.getLearners().forEach(p -> newIps.add(p.getIp())); + handler.refresh(newIps); + log.info("IpAuthHandler refreshed after updatePdRaft peer change"); + } else { + log.warn("IpAuthHandler not initialized, skipping refresh"); + } } else { log.error("changePeers status: {}, msg:{}, code: {}, raft error:{}", status, status.getErrorMsg(), status.getCode(), diff --git a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java index 01df74826c..42965c9c72 100644 --- a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java +++ b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java @@ -25,12 +25,24 @@ import org.apache.hugegraph.testutil.Whitebox; import org.junit.After; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; public class IpAuthHandlerTest { + @Before + public void setUp() { + // Must reset BEFORE each test — earlier suite classes (e.g. ConfigServiceTest) + // initialize RaftEngine which creates the IpAuthHandler singleton with their + // own peer IPs. Without this reset, our getInstance() calls return the stale + // singleton and ignore the allowlist passed by the test. + Whitebox.setInternalState(IpAuthHandler.class, "instance", null); + } + @After public void tearDown() { + // Must reset AFTER each test — prevents our test singleton from leaking + // into later suite classes that also depend on IpAuthHandler state. Whitebox.setInternalState(IpAuthHandler.class, "instance", null); } @@ -42,7 +54,8 @@ private boolean isIpAllowed(IpAuthHandler handler, String ip) { @Test public void testHostnameResolvesToIp() { - // "localhost" should resolve to "127.0.0.1" + // "localhost" should resolve to "127.0.0.1" via InetAddress.getAllByName() + // This verifies the core fix: hostname allowlists match numeric remote addresses IpAuthHandler handler = IpAuthHandler.getInstance( Collections.singleton("localhost")); Assert.assertTrue(isIpAllowed(handler, "127.0.0.1")); @@ -50,11 +63,14 @@ public void testHostnameResolvesToIp() { @Test public void testUnresolvableHostnameDoesNotCrash() { - // Should log a warning and skip — no exception thrown + // Should log a warning and skip — no exception thrown during construction IpAuthHandler handler = IpAuthHandler.getInstance( Collections.singleton("nonexistent.invalid.hostname")); - // unresolvable entry is skipped so 127.0.0.1 should not be allowed + // Handler was still created successfully despite bad hostname + Assert.assertNotNull(handler); + // Unresolvable entry is skipped so no IPs should be allowed Assert.assertFalse(isIpAllowed(handler, "127.0.0.1")); + Assert.assertFalse(isIpAllowed(handler, "192.168.0.1")); } @Test @@ -64,18 +80,22 @@ public void testRefreshUpdatesResolvedIps() { Collections.singleton("127.0.0.1")); Assert.assertTrue(isIpAllowed(handler, "127.0.0.1")); - // Refresh with a different IP + // Refresh with a different IP — verifies refresh() swaps the set correctly Set newIps = new HashSet<>(); newIps.add("192.168.0.1"); handler.refresh(newIps); + // Old IP should no longer be allowed Assert.assertFalse(isIpAllowed(handler, "127.0.0.1")); + // New IP should now be allowed Assert.assertTrue(isIpAllowed(handler, "192.168.0.1")); } @Test public void testEmptyAllowlistAllowsAll() { - // Empty allowlist = no restriction = allow all + // Empty allowlist = no restriction configured = allow all connections + // This is intentional fallback behavior and must be explicitly tested + // because it is a security-relevant boundary IpAuthHandler handler = IpAuthHandler.getInstance( Collections.emptySet()); Assert.assertTrue(isIpAllowed(handler, "1.2.3.4")); @@ -83,8 +103,18 @@ public void testEmptyAllowlistAllowsAll() { } @Test - public void testGetInstanceReturnsNullBeforeInit() { - // After tearDown resets singleton, no-arg getInstance returns null - Assert.assertNull(IpAuthHandler.getInstance()); + public void testGetInstanceReturnsSingletonIgnoresNewAllowlist() { + // First call creates the singleton with 127.0.0.1 + IpAuthHandler first = IpAuthHandler.getInstance( + Collections.singleton("127.0.0.1")); + // Second call with a different set must return the same instance + // and must NOT reinitialize or override the existing allowlist + IpAuthHandler second = IpAuthHandler.getInstance( + Collections.singleton("192.168.0.1")); + Assert.assertSame(first, second); + // Original allowlist still in effect + Assert.assertTrue(isIpAllowed(second, "127.0.0.1")); + // New set was ignored — 192.168.0.1 should not be allowed + Assert.assertFalse(isIpAllowed(second, "192.168.0.1")); } } From 980020a0494c383229c4b46f3703fc0c6c7bb2d9 Mon Sep 17 00:00:00 2001 From: bitflicker64 Date: Sun, 15 Mar 2026 15:18:34 +0530 Subject: [PATCH 4/7] Add TODO notes documenting current PD authentication limitations - Document missing auth check on MemberAPI changePeerList endpoint - Note that password validation is currently skipped in Authentication - Flag that GrpcAuthentication interceptor is not wired into the gRPC server --- .../src/main/java/org/apache/hugegraph/pd/rest/MemberAPI.java | 3 +++ .../hugegraph/pd/service/interceptor/Authentication.java | 2 ++ .../org/apache/hugegraph/pd/util/grpc/GRpcServerConfig.java | 2 ++ 3 files changed, 7 insertions(+) diff --git a/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/rest/MemberAPI.java b/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/rest/MemberAPI.java index 4a796c37ce..525b899c96 100644 --- a/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/rest/MemberAPI.java +++ b/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/rest/MemberAPI.java @@ -113,6 +113,9 @@ public RestApiResponse getMembers() throws InterruptedException, ExecutionExcept * @return Returns a JSON string containing the modification results * @throws Exception If an exception occurs during request processing, service invocation, or Peer list modification, it is captured and returned as the JSON representation of the exception */ + // TODO: this endpoint has no authentication check — any caller with network + // access to the management port can trigger a peer list change. + // Wire authentication here as part of the planned auth refactor. @PostMapping(value = "/members/change", consumes = MediaType.APPLICATION_JSON_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) @ResponseBody diff --git a/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/service/interceptor/Authentication.java b/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/service/interceptor/Authentication.java index 83901bca1a..48bcf38683 100644 --- a/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/service/interceptor/Authentication.java +++ b/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/service/interceptor/Authentication.java @@ -77,6 +77,8 @@ protected T authenticate(String authority, String token, Function } String name = info.substring(0, delim); + // TODO: password validation is skipped — only service name is checked against + // innerModules. Full credential validation should be added as part of the auth refactor. //String pwd = info.substring(delim + 1); if (innerModules.contains(name)) { return call.get(); diff --git a/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/util/grpc/GRpcServerConfig.java b/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/util/grpc/GRpcServerConfig.java index fce6d2379d..2b1103739b 100644 --- a/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/util/grpc/GRpcServerConfig.java +++ b/hugegraph-pd/hg-pd-service/src/main/java/org/apache/hugegraph/pd/util/grpc/GRpcServerConfig.java @@ -40,6 +40,8 @@ public void configure(ServerBuilder serverBuilder) { HgExecutorUtil.createExecutor(EXECUTOR_NAME, poolGrpc.getCore(), poolGrpc.getMax(), poolGrpc.getQueue())); serverBuilder.maxInboundMessageSize(MAX_INBOUND_MESSAGE_SIZE); + // TODO: GrpcAuthentication is instantiated as a Spring bean but never registered + // here — add serverBuilder.intercept(grpcAuthentication) once auth is refactored. } } From c888632395f39aa0cd3d79e5480e78eae57336e2 Mon Sep 17 00:00:00 2001 From: bitflicker64 Date: Mon, 16 Mar 2026 00:18:57 +0530 Subject: [PATCH 5/7] fix(test): add regression test for changePeerList -> IpAuthHandler.refresh() wiring - Add RaftEngineIpAuthIntegrationTest to prove the full chain: changePeerList() -> changePeers callback -> handler.refresh() - Add failure-path test to ensure allowlist is not refreshed on failed peer update - Uses mock Node that fires callback synchronously to avoid latch.await() hang - Register in PDCoreSuiteTest suite --- .../hugegraph/pd/core/PDCoreSuiteTest.java | 2 + .../raft/RaftEngineIpAuthIntegrationTest.java | 124 ++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/RaftEngineIpAuthIntegrationTest.java diff --git a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/core/PDCoreSuiteTest.java b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/core/PDCoreSuiteTest.java index fe35b7ea5c..57fd367171 100644 --- a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/core/PDCoreSuiteTest.java +++ b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/core/PDCoreSuiteTest.java @@ -20,6 +20,7 @@ import org.apache.hugegraph.pd.core.meta.MetadataKeyHelperTest; import org.apache.hugegraph.pd.core.store.HgKVStoreImplTest; import org.apache.hugegraph.pd.raft.IpAuthHandlerTest; +import org.apache.hugegraph.pd.raft.RaftEngineIpAuthIntegrationTest; import org.junit.runner.RunWith; import org.junit.runners.Suite; @@ -38,6 +39,7 @@ StoreServiceTest.class, TaskScheduleServiceTest.class, IpAuthHandlerTest.class, + RaftEngineIpAuthIntegrationTest.class, // StoreNodeServiceTest.class, }) @Slf4j diff --git a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/RaftEngineIpAuthIntegrationTest.java b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/RaftEngineIpAuthIntegrationTest.java new file mode 100644 index 0000000000..2902ab5dd5 --- /dev/null +++ b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/RaftEngineIpAuthIntegrationTest.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hugegraph.pd.raft; + +import java.util.Collections; + +import org.apache.hugegraph.pd.raft.auth.IpAuthHandler; +import org.apache.hugegraph.testutil.Whitebox; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import com.alipay.sofa.jraft.Closure; +import com.alipay.sofa.jraft.Node; +import com.alipay.sofa.jraft.Status; +import com.alipay.sofa.jraft.conf.Configuration; +import com.alipay.sofa.jraft.error.RaftError; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; + +public class RaftEngineIpAuthIntegrationTest { + + private Node originalRaftNode; + + @Before + public void setUp() { + // Save original raftNode so we can restore it after the test + originalRaftNode = RaftEngine.getInstance().getRaftNode(); + // Reset IpAuthHandler singleton for a clean state + Whitebox.setInternalState(IpAuthHandler.class, "instance", null); + } + + @After + public void tearDown() { + // Restore original raftNode + Whitebox.setInternalState(RaftEngine.getInstance(), "raftNode", originalRaftNode); + // Reset IpAuthHandler singleton + Whitebox.setInternalState(IpAuthHandler.class, "instance", null); + } + + @Test + public void testChangePeerListRefreshesIpAuthHandler() throws Exception { + // Initialize IpAuthHandler with an old IP + IpAuthHandler handler = IpAuthHandler.getInstance( + Collections.singleton("10.0.0.1")); + Assert.assertTrue(invokeIsIpAllowed(handler, "10.0.0.1")); + Assert.assertFalse(invokeIsIpAllowed(handler, "127.0.0.1")); + + // Mock Node to fire the changePeers callback synchronously with Status.OK() + // This simulates a successful peer change without a real Raft cluster + + // Important: fire the closure synchronously or changePeerList() will + // block on latch.await() indefinitely — no timeout is configured + Node mockNode = mock(Node.class); + doAnswer(invocation -> { + Closure closure = invocation.getArgument(1); + closure.run(Status.OK()); + return null; + }).when(mockNode).changePeers(any(Configuration.class), any(Closure.class)); + + // Inject mock node into RaftEngine + Whitebox.setInternalState(RaftEngine.getInstance(), "raftNode", mockNode); + + // Call changePeerList with new peer — must be odd count + RaftEngine.getInstance().changePeerList("127.0.0.1:8610"); + + // Verify IpAuthHandler was refreshed with the new peer IP + Assert.assertTrue(invokeIsIpAllowed(handler, "127.0.0.1")); + // Old IP should no longer be allowed + Assert.assertFalse(invokeIsIpAllowed(handler, "10.0.0.1")); + } + + @Test + public void testChangePeerListDoesNotRefreshOnFailure() throws Exception { + // Initialize IpAuthHandler with original IP + IpAuthHandler handler = IpAuthHandler.getInstance( + Collections.singleton("10.0.0.1")); + Assert.assertTrue(invokeIsIpAllowed(handler, "10.0.0.1")); + + // Mock Node to fire callback with a failed status + // Simulates a failed peer change — handler should NOT be refreshed + + // Important: fire the closure synchronously or changePeerList() will + // block on latch.await() indefinitely — no timeout is configured + Node mockNode = mock(Node.class); + doAnswer(invocation -> { + Closure closure = invocation.getArgument(1); + closure.run(new Status(RaftError.EINTERNAL, "simulated failure")); + return null; + }).when(mockNode).changePeers(any(Configuration.class), any(Closure.class)); + + Whitebox.setInternalState(RaftEngine.getInstance(), "raftNode", mockNode); + + RaftEngine.getInstance().changePeerList("127.0.0.1:8610"); + + // Handler should NOT be refreshed — old IP still allowed + Assert.assertTrue(invokeIsIpAllowed(handler, "10.0.0.1")); + Assert.assertFalse(invokeIsIpAllowed(handler, "127.0.0.1")); + } + + private boolean invokeIsIpAllowed(IpAuthHandler handler, String ip) { + return Whitebox.invoke(IpAuthHandler.class, + new Class[]{String.class}, + "isIpAllowed", handler, ip); + } +} From 5d6e38ef5b1bab0a4307d531a0bfa3fde5e4ba14 Mon Sep 17 00:00:00 2001 From: bitflicker64 Date: Mon, 16 Mar 2026 11:57:56 +0530 Subject: [PATCH 6/7] fix: add timeout to changePeerList() and fix IPv6 flaky test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add timed latch.await() using 3x rpcTimeout to prevent indefinite blocking if changePeers callback is never invoked - Use compareAndSet in both callback and timeout path to avoid race - Restore Thread.currentThread().interrupt() on InterruptedException - Fix testHostnameResolvesToIp() to resolve localhost dynamically instead of hardcoding 127.0.0.1 — avoids flakiness on IPv6 systems --- .../apache/hugegraph/pd/raft/RaftEngine.java | 61 +++++++++++++------ .../hugegraph/pd/raft/IpAuthHandlerTest.java | 18 +++++- 2 files changed, 57 insertions(+), 22 deletions(-) diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java index da4be44297..24d5744739 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java @@ -27,6 +27,7 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; @@ -312,40 +313,61 @@ public List getMembers() throws ExecutionException, InterruptedEx public Status changePeerList(String peerList) { AtomicReference result = new AtomicReference<>(); + Configuration newPeers = new Configuration(); try { String[] peers = peerList.split(",", -1); if ((peers.length & 1) != 1) { throw new PDException(-1, "the number of peer list must be odd."); } - Configuration newPeers = new Configuration(); newPeers.parse(peerList); CountDownLatch latch = new CountDownLatch(1); this.raftNode.changePeers(newPeers, status -> { - result.set(status); + // Use compareAndSet so a late callback does not overwrite a timeout status + result.compareAndSet(null, status); latch.countDown(); }); - latch.await(); - - // Refresh IpAuthHandler so newly added peers are not blocked - if (result.get() != null && result.get().isOk()) { - IpAuthHandler handler = IpAuthHandler.getInstance(); - if (handler != null) { - Set newIps = newPeers.getPeers() - .stream() - .map(PeerId::getIp) - .collect(Collectors.toSet()); - handler.refresh(newIps); - log.info("IpAuthHandler refreshed after peer list change to: {}", peerList); - } else { - log.warn("IpAuthHandler not initialized, skipping refresh for peer list: {}", - peerList); + // Use configured RPC timeout — bare await() would block forever if + // the callback is never invoked (e.g. node not started / RPC failure) + boolean completed = latch.await(3L * config.getRpcTimeout(), + TimeUnit.MILLISECONDS); + if (!completed && result.get() == null) { + Status timeoutStatus = new Status(RaftError.EINTERNAL, + "changePeerList timed out after %d ms", + 3L * config.getRpcTimeout()); + if (!result.compareAndSet(null, timeoutStatus)) { + // Callback arrived just before us — keep its result + timeoutStatus = null; + } + if (timeoutStatus != null) { + log.error("changePeerList to {} timed out after {} ms", + peerList, 3L * config.getRpcTimeout()); } } - + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + result.set(new Status(RaftError.EINTERNAL, "changePeerList interrupted")); + log.error("changePeerList to {} was interrupted", peerList, e); } catch (Exception e) { log.error("failed to changePeerList to {},{}", peerList, e); result.set(new Status(-1, e.getMessage())); } + + // Refresh IpAuthHandler so newly added peers are not blocked + if (result.get() != null && result.get().isOk()) { + IpAuthHandler handler = IpAuthHandler.getInstance(); + if (handler != null) { + Set newIps = newPeers.getPeers() + .stream() + .map(PeerId::getIp) + .collect(Collectors.toSet()); + handler.refresh(newIps); + log.info("IpAuthHandler refreshed after peer list change to: {}", peerList); + } else { + log.warn("IpAuthHandler not initialized, skipping refresh for peer list: {}", + peerList); + } + } + return result.get(); } @@ -382,7 +404,8 @@ private boolean peerEquals(PeerId p1, PeerId p2) { if (p1 == null || p2 == null) { return false; } - return Objects.equals(p1.getIp(), p2.getIp()) && Objects.equals(p1.getPort(), p2.getPort()); + return Objects.equals(p1.getIp(), p2.getIp()) && + Objects.equals(p1.getPort(), p2.getPort()); } private Replicator.State getReplicatorState(PeerId peerId) { diff --git a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java index 42965c9c72..a28ef5491d 100644 --- a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java +++ b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java @@ -17,6 +17,7 @@ package org.apache.hugegraph.pd.raft; +import java.net.InetAddress; import java.util.Collections; import java.util.HashSet; import java.util.Set; @@ -53,12 +54,23 @@ private boolean isIpAllowed(IpAuthHandler handler, String ip) { } @Test - public void testHostnameResolvesToIp() { - // "localhost" should resolve to "127.0.0.1" via InetAddress.getAllByName() + public void testHostnameResolvesToIp() throws Exception { + // "localhost" should resolve to one or more IPs via InetAddress.getAllByName() // This verifies the core fix: hostname allowlists match numeric remote addresses + // Using dynamic resolution avoids hardcoding "127.0.0.1" which may not be + // returned on IPv6-only or custom resolver environments IpAuthHandler handler = IpAuthHandler.getInstance( Collections.singleton("localhost")); - Assert.assertTrue(isIpAllowed(handler, "127.0.0.1")); + InetAddress[] addresses = InetAddress.getAllByName("localhost"); + // All resolved addresses should be allowed — resolveAll() adds every address + // returned by getAllByName() so none should be blocked + Assert.assertTrue("Expected at least one resolved address", + addresses.length > 0); + for (InetAddress address : addresses) { + Assert.assertTrue( + "Expected " + address.getHostAddress() + " to be allowed", + isIpAllowed(handler, address.getHostAddress())); + } } @Test From 0aa2d5fbcd94ba7bc90bca71661a204ed0934d94 Mon Sep 17 00:00:00 2001 From: bitflicker64 Date: Mon, 16 Mar 2026 12:51:06 +0530 Subject: [PATCH 7/7] fix: avoid changePeerList timeout race and update tests --- .../apache/hugegraph/pd/raft/RaftEngine.java | 39 +++++++++++-------- .../hugegraph/pd/raft/IpAuthHandlerTest.java | 3 +- .../raft/RaftEngineIpAuthIntegrationTest.java | 4 +- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java index 24d5744739..b73364ae6d 100644 --- a/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java +++ b/hugegraph-pd/hg-pd-core/src/main/java/org/apache/hugegraph/pd/raft/RaftEngine.java @@ -324,9 +324,30 @@ public Status changePeerList(String peerList) { this.raftNode.changePeers(newPeers, status -> { // Use compareAndSet so a late callback does not overwrite a timeout status result.compareAndSet(null, status); + // Refresh inside callback so it fires even if caller already timed out + // Note: changePeerList() uses Configuration.parse() which only supports + // plain comma-separated peer addresses with no learner syntax. + // getLearners() will always be empty here. Learner support is handled + // in PDService.updatePdRaft() which uses PeerUtil.parseConfig() + // and supports the /learner suffix. + if (status != null && status.isOk()) { + IpAuthHandler handler = IpAuthHandler.getInstance(); + if (handler != null) { + Set newIps = newPeers.getPeers() + .stream() + .map(PeerId::getIp) + .collect(Collectors.toSet()); + handler.refresh(newIps); + log.info("IpAuthHandler refreshed after peer list change to: {}", + peerList); + } else { + log.warn("IpAuthHandler not initialized, skipping refresh for " + + "peer list: {}", peerList); + } + } latch.countDown(); }); - // Use configured RPC timeout — bare await() would block forever if + // Use 3x configured RPC timeout — bare await() would block forever if // the callback is never invoked (e.g. node not started / RPC failure) boolean completed = latch.await(3L * config.getRpcTimeout(), TimeUnit.MILLISECONDS); @@ -352,22 +373,6 @@ public Status changePeerList(String peerList) { result.set(new Status(-1, e.getMessage())); } - // Refresh IpAuthHandler so newly added peers are not blocked - if (result.get() != null && result.get().isOk()) { - IpAuthHandler handler = IpAuthHandler.getInstance(); - if (handler != null) { - Set newIps = newPeers.getPeers() - .stream() - .map(PeerId::getIp) - .collect(Collectors.toSet()); - handler.refresh(newIps); - log.info("IpAuthHandler refreshed after peer list change to: {}", peerList); - } else { - log.warn("IpAuthHandler not initialized, skipping refresh for peer list: {}", - peerList); - } - } - return result.get(); } diff --git a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java index a28ef5491d..31647b6d39 100644 --- a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java +++ b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/IpAuthHandlerTest.java @@ -76,8 +76,9 @@ public void testHostnameResolvesToIp() throws Exception { @Test public void testUnresolvableHostnameDoesNotCrash() { // Should log a warning and skip — no exception thrown during construction + // Uses .invalid TLD which is RFC-2606 reserved and guaranteed to never resolve IpAuthHandler handler = IpAuthHandler.getInstance( - Collections.singleton("nonexistent.invalid.hostname")); + Collections.singleton("nonexistent.invalid")); // Handler was still created successfully despite bad hostname Assert.assertNotNull(handler); // Unresolvable entry is skipped so no IPs should be allowed diff --git a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/RaftEngineIpAuthIntegrationTest.java b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/RaftEngineIpAuthIntegrationTest.java index 2902ab5dd5..1f9857df0f 100644 --- a/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/RaftEngineIpAuthIntegrationTest.java +++ b/hugegraph-pd/hg-pd-test/src/main/java/org/apache/hugegraph/pd/raft/RaftEngineIpAuthIntegrationTest.java @@ -68,7 +68,7 @@ public void testChangePeerListRefreshesIpAuthHandler() throws Exception { // This simulates a successful peer change without a real Raft cluster // Important: fire the closure synchronously or changePeerList() will - // block on latch.await() indefinitely — no timeout is configured + // block on latch.await(...) until the configured timeout elapses Node mockNode = mock(Node.class); doAnswer(invocation -> { Closure closure = invocation.getArgument(1); @@ -99,7 +99,7 @@ public void testChangePeerListDoesNotRefreshOnFailure() throws Exception { // Simulates a failed peer change — handler should NOT be refreshed // Important: fire the closure synchronously or changePeerList() will - // block on latch.await() indefinitely — no timeout is configured + // block on latch.await(...) until the configured timeout elapses Node mockNode = mock(Node.class); doAnswer(invocation -> { Closure closure = invocation.getArgument(1);