allowSinkOpsAsync(NamespaceName namespaceName,
return provider.allowSinkOpsAsync(namespaceName, role, authenticationData);
}
+ /**
+ * Whether the authenticatedPrincipal and the originalPrincipal form a valid pair. This method assumes that
+ * authenticatedPrincipal and originalPrincipal can be equal, as long as they are not a proxy role. This use
+ * case is relvant for the admin server because of the way the proxy handles authentication. The binary protocol
+ * should not use this method.
+ * @return true when roles are a valid combination and false when roles are an invalid combination
+ */
public boolean isValidOriginalPrincipal(String authenticatedPrincipal,
String originalPrincipal,
AuthenticationDataSource authDataSource) {
SocketAddress remoteAddress = authDataSource != null ? authDataSource.getPeerAddress() : null;
- return isValidOriginalPrincipal(authenticatedPrincipal, originalPrincipal, remoteAddress);
+ return isValidOriginalPrincipal(authenticatedPrincipal, originalPrincipal, remoteAddress, true);
}
/**
* Validates that the authenticatedPrincipal and the originalPrincipal are a valid combination.
- * Valid combinations fulfill the following rule: the authenticatedPrincipal is in
- * {@link ServiceConfiguration#getProxyRoles()}, if, and only if, the originalPrincipal is set to a role
- * that is not also in {@link ServiceConfiguration#getProxyRoles()}.
+ * Valid combinations fulfill one of the following two rules:
+ *
+ * 1. The authenticatedPrincipal is in {@link ServiceConfiguration#getProxyRoles()}, if, and only if,
+ * the originalPrincipal is set to a role that is not also in {@link ServiceConfiguration#getProxyRoles()}.
+ *
+ * 2. The authenticatedPrincipal and the originalPrincipal are the same, but are not a proxyRole, when
+ * allowNonProxyPrincipalsToBeEqual is true.
+ *
* @return true when roles are a valid combination and false when roles are an invalid combination
*/
public boolean isValidOriginalPrincipal(String authenticatedPrincipal,
String originalPrincipal,
- SocketAddress remoteAddress) {
+ SocketAddress remoteAddress,
+ boolean allowNonProxyPrincipalsToBeEqual) {
String errorMsg = null;
if (conf.getProxyRoles().contains(authenticatedPrincipal)) {
if (StringUtils.isBlank(originalPrincipal)) {
@@ -316,7 +329,8 @@ public boolean isValidOriginalPrincipal(String authenticatedPrincipal,
} else if (conf.getProxyRoles().contains(originalPrincipal)) {
errorMsg = "originalPrincipal cannot be a proxy role.";
}
- } else if (StringUtils.isNotBlank(originalPrincipal)) {
+ } else if (StringUtils.isNotBlank(originalPrincipal)
+ && !(allowNonProxyPrincipalsToBeEqual && originalPrincipal.equals(authenticatedPrincipal))) {
errorMsg = "cannot specify originalPrincipal when connecting without valid proxy role.";
}
if (errorMsg != null) {
diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/ServerCnx.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/ServerCnx.java
index d6a6dda402eca..adee29c5a0105 100644
--- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/ServerCnx.java
+++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/ServerCnx.java
@@ -673,7 +673,7 @@ private void completeConnect(int clientProtoVersion, String clientVersion) {
if (service.isAuthenticationEnabled()) {
if (service.isAuthorizationEnabled()) {
if (!service.getAuthorizationService()
- .isValidOriginalPrincipal(authRole, originalPrincipal, remoteAddress)) {
+ .isValidOriginalPrincipal(authRole, originalPrincipal, remoteAddress, false)) {
state = State.Failed;
service.getPulsarStats().recordConnectionCreateFail();
final ByteBuf msg = Commands.newError(-1, ServerError.AuthorizationError, "Invalid roles.");
diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/auth/AuthorizationTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/auth/AuthorizationTest.java
index c578d9ec94162..58cf4ee418ea4 100644
--- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/auth/AuthorizationTest.java
+++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/auth/AuthorizationTest.java
@@ -242,27 +242,31 @@ public void testOriginalRoleValidation() throws Exception {
AuthorizationService auth = new AuthorizationService(conf, Mockito.mock(PulsarResources.class));
// Original principal should be supplied when authenticatedPrincipal is proxy role
- assertTrue(auth.isValidOriginalPrincipal("proxy", "client", (SocketAddress) null));
+ assertTrue(auth.isValidOriginalPrincipal("proxy", "client", (SocketAddress) null, false));
// Non proxy role should not supply originalPrincipal
- assertTrue(auth.isValidOriginalPrincipal("client", "", (SocketAddress) null));
- assertTrue(auth.isValidOriginalPrincipal("client", null, (SocketAddress) null));
+ assertTrue(auth.isValidOriginalPrincipal("client", "", (SocketAddress) null, false));
+ assertTrue(auth.isValidOriginalPrincipal("client", null, (SocketAddress) null, false));
+
+ // Edge cases that differ because binary protocol and http protocol have different expectations
+ assertTrue(auth.isValidOriginalPrincipal("client", "client", (SocketAddress) null, true));
+ assertFalse(auth.isValidOriginalPrincipal("client", "client", (SocketAddress) null, false));
// Only likely in cases when authentication is disabled, but we still define these to be valid.
- assertTrue(auth.isValidOriginalPrincipal(null, null, (SocketAddress) null));
- assertTrue(auth.isValidOriginalPrincipal(null, "", (SocketAddress) null));
- assertTrue(auth.isValidOriginalPrincipal("", null, (SocketAddress) null));
- assertTrue(auth.isValidOriginalPrincipal("", "", (SocketAddress) null));
+ assertTrue(auth.isValidOriginalPrincipal(null, null, (SocketAddress) null, false));
+ assertTrue(auth.isValidOriginalPrincipal(null, "", (SocketAddress) null, false));
+ assertTrue(auth.isValidOriginalPrincipal("", null, (SocketAddress) null, false));
+ assertTrue(auth.isValidOriginalPrincipal("", "", (SocketAddress) null, false));
// Proxy role must supply an original principal
- assertFalse(auth.isValidOriginalPrincipal("proxy", "", (SocketAddress) null));
- assertFalse(auth.isValidOriginalPrincipal("proxy", null, (SocketAddress) null));
+ assertFalse(auth.isValidOriginalPrincipal("proxy", "", (SocketAddress) null, false));
+ assertFalse(auth.isValidOriginalPrincipal("proxy", null, (SocketAddress) null, false));
// OriginalPrincipal cannot be proxy role
- assertFalse(auth.isValidOriginalPrincipal("proxy", "proxy", (SocketAddress) null));
- assertFalse(auth.isValidOriginalPrincipal("client", "proxy", (SocketAddress) null));
- assertFalse(auth.isValidOriginalPrincipal("", "proxy", (SocketAddress) null));
- assertFalse(auth.isValidOriginalPrincipal(null, "proxy", (SocketAddress) null));
+ assertFalse(auth.isValidOriginalPrincipal("proxy", "proxy", (SocketAddress) null, false));
+ assertFalse(auth.isValidOriginalPrincipal("client", "proxy", (SocketAddress) null, false));
+ assertFalse(auth.isValidOriginalPrincipal("", "proxy", (SocketAddress) null, false));
+ assertFalse(auth.isValidOriginalPrincipal(null, "proxy", (SocketAddress) null, false));
// Must gracefully handle a missing AuthenticationDataSource
assertTrue(auth.isValidOriginalPrincipal("proxy", "client", (AuthenticationDataSource) null));
diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ServerCnxTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ServerCnxTest.java
index ab13b8aa3c7e1..a29d6dac72023 100644
--- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ServerCnxTest.java
+++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/service/ServerCnxTest.java
@@ -613,6 +613,8 @@ public void testConnectCommandWithInvalidRoleCombinations() throws Exception {
verifyAuthRoleAndOriginalPrincipalBehavior(authMethodName, "pass.client", "pass.proxy");
// Invalid combinations where the original principal is set to a non-proxy role
verifyAuthRoleAndOriginalPrincipalBehavior(authMethodName, "pass.client1", "pass.client");
+ verifyAuthRoleAndOriginalPrincipalBehavior(authMethodName, "pass.client", "pass.client");
+ verifyAuthRoleAndOriginalPrincipalBehavior(authMethodName, "pass.client", "pass.client1");
}
private void verifyAuthRoleAndOriginalPrincipalBehavior(String authMethodName, String authData,
diff --git a/pulsar-proxy/src/test/java/org/apache/pulsar/proxy/server/ProxyWithAuthorizationTest.java b/pulsar-proxy/src/test/java/org/apache/pulsar/proxy/server/ProxyWithAuthorizationTest.java
index de9bb087d3da3..31757cc036720 100644
--- a/pulsar-proxy/src/test/java/org/apache/pulsar/proxy/server/ProxyWithAuthorizationTest.java
+++ b/pulsar-proxy/src/test/java/org/apache/pulsar/proxy/server/ProxyWithAuthorizationTest.java
@@ -84,6 +84,7 @@ public class ProxyWithAuthorizationTest extends ProducerConsumerBase {
private final String TLS_SUPERUSER_CLIENT_CERT_FILE_PATH = "./src/test/resources/authentication/tls/client-cert.pem";
private ProxyService proxyService;
+ private WebServer webServer;
private final ProxyConfiguration proxyConfig = new ProxyConfiguration();
@DataProvider(name = "hostnameVerification")
@@ -175,6 +176,7 @@ protected void doInitConf() throws Exception {
Set superUserRoles = new HashSet<>();
superUserRoles.add("superUser");
+ superUserRoles.add("Proxy");
conf.setSuperUserRoles(superUserRoles);
conf.setBrokerClientAuthenticationPlugin(AuthenticationTls.class.getName());
@@ -202,12 +204,11 @@ protected void setup() throws Exception {
proxyConfig.setForwardAuthorizationCredentials(true);
proxyConfig.setBrokerServiceURL(pulsar.getBrokerServiceUrl());
proxyConfig.setBrokerServiceURLTLS(pulsar.getBrokerServiceUrlTls());
+ proxyConfig.setBrokerWebServiceURLTLS(pulsar.getWebServiceAddressTls());
proxyConfig.setAdvertisedAddress(null);
- proxyConfig.setServicePort(Optional.of(0));
proxyConfig.setBrokerProxyAllowedTargetPorts("*");
proxyConfig.setServicePortTls(Optional.of(0));
- proxyConfig.setWebServicePort(Optional.of(0));
proxyConfig.setWebServicePortTls(Optional.of(0));
proxyConfig.setTlsEnabledWithBroker(true);
@@ -225,9 +226,10 @@ protected void setup() throws Exception {
properties.setProperty("tokenSecretKey", AuthTokenUtils.encodeKeyBase64(SECRET_KEY));
proxyConfig.setProperties(properties);
- proxyService = Mockito.spy(new ProxyService(proxyConfig,
- new AuthenticationService(
- PulsarConfigurationLoader.convertFrom(proxyConfig))));
+ AuthenticationService authService =
+ new AuthenticationService(PulsarConfigurationLoader.convertFrom(proxyConfig));
+ proxyService = Mockito.spy(new ProxyService(proxyConfig, authService));
+ webServer = new WebServer(proxyConfig, authService);
}
@AfterMethod(alwaysRun = true)
@@ -235,10 +237,13 @@ protected void setup() throws Exception {
protected void cleanup() throws Exception {
super.internalCleanup();
proxyService.close();
+ webServer.stop();
}
private void startProxy() throws Exception {
proxyService.start();
+ ProxyServiceStarter.addWebServerHandlers(webServer, proxyConfig, proxyService, null);
+ webServer.start();
}
/**
@@ -260,23 +265,15 @@ public void testProxyAuthorization() throws Exception {
log.info("-- Starting {} test --", methodName);
startProxy();
- createAdminClient();
+ // Skip hostname verification because the certs intentionally do not have a hostname
+ createProxyAdminClient(false);
// create a client which connects to proxy over tls and pass authData
@Cleanup
PulsarClient proxyClient = createPulsarClient(proxyService.getServiceUrlTls(), PulsarClient.builder());
String namespaceName = "my-tenant/my-ns";
- admin.clusters().createCluster("proxy-authorization", ClusterData.builder().serviceUrlTls(brokerUrlTls.toString()).build());
-
- admin.tenants().createTenant("my-tenant",
- new TenantInfoImpl(Sets.newHashSet("appid1", "appid2"), Sets.newHashSet("proxy-authorization")));
- admin.namespaces().createNamespace(namespaceName);
-
- admin.namespaces().grantPermissionOnNamespace(namespaceName, "Proxy",
- Sets.newHashSet(AuthAction.consume, AuthAction.produce));
- admin.namespaces().grantPermissionOnNamespace(namespaceName, "Client",
- Sets.newHashSet(AuthAction.consume, AuthAction.produce));
+ initializeCluster(admin, namespaceName);
Consumer consumer = proxyClient.newConsumer()
.topic("persistent://my-tenant/my-ns/my-topic1")
@@ -313,7 +310,8 @@ public void testTlsHostVerificationProxyToClient(boolean hostnameVerificationEna
log.info("-- Starting {} test --", methodName);
startProxy();
- createAdminClient();
+ // Testing client to proxy hostname verification, so use the dataProvider's value here
+ createProxyAdminClient(hostnameVerificationEnabled);
// create a client which connects to proxy over tls and pass authData
@Cleanup
PulsarClient proxyClient = createPulsarClient(proxyService.getServiceUrlTls(),
@@ -321,17 +319,21 @@ public void testTlsHostVerificationProxyToClient(boolean hostnameVerificationEna
String namespaceName = "my-tenant/my-ns";
- admin.clusters().createCluster("proxy-authorization", ClusterData.builder()
- .serviceUrlTls(brokerUrlTls.toString()).build());
-
- admin.tenants().createTenant("my-tenant",
- new TenantInfoImpl(Sets.newHashSet("appid1", "appid2"), Sets.newHashSet("proxy-authorization")));
- admin.namespaces().createNamespace(namespaceName);
-
- admin.namespaces().grantPermissionOnNamespace(namespaceName, "Proxy",
- Sets.newHashSet(AuthAction.consume, AuthAction.produce));
- admin.namespaces().grantPermissionOnNamespace(namespaceName, "Client",
- Sets.newHashSet(AuthAction.consume, AuthAction.produce));
+ try {
+ initializeCluster(admin, namespaceName);
+ if (hostnameVerificationEnabled) {
+ Assert.fail("Connection should be failed due to hostnameVerification enabled");
+ }
+ } catch (PulsarAdminException e) {
+ if (!hostnameVerificationEnabled) {
+ Assert.fail("Cluster should initialize because hostnameverification is disabled");
+ }
+ admin.close();
+ // Need new client because the admin client to proxy is failing due to hostname verification, and we still
+ // want to test the binary protocol client fails to connect as well
+ createProxyAdminClient(false);
+ initializeCluster(admin, namespaceName);
+ }
try {
proxyClient.newConsumer().topic("persistent://my-tenant/my-ns/my-topic1")
@@ -366,7 +368,8 @@ public void testTlsHostVerificationProxyToBroker(boolean hostnameVerificationEna
proxyConfig.setTlsHostnameVerificationEnabled(hostnameVerificationEnabled);
startProxy();
- createAdminClient();
+ // This test skips hostname verification for client to proxy in order to test proxy to broker
+ createProxyAdminClient(false);
// create a client which connects to proxy over tls and pass authData
@Cleanup
PulsarClient proxyClient = createPulsarClient(proxyService.getServiceUrlTls(),
@@ -374,16 +377,22 @@ public void testTlsHostVerificationProxyToBroker(boolean hostnameVerificationEna
String namespaceName = "my-tenant/my-ns";
- admin.clusters().createCluster("proxy-authorization", ClusterData.builder().serviceUrlTls(brokerUrlTls.toString()).build());
-
- admin.tenants().createTenant("my-tenant",
- new TenantInfoImpl(Sets.newHashSet("appid1", "appid2"), Sets.newHashSet("proxy-authorization")));
- admin.namespaces().createNamespace(namespaceName);
-
- admin.namespaces().grantPermissionOnNamespace(namespaceName, "Proxy",
- Sets.newHashSet(AuthAction.consume, AuthAction.produce));
- admin.namespaces().grantPermissionOnNamespace(namespaceName, "Client",
- Sets.newHashSet(AuthAction.consume, AuthAction.produce));
+ try {
+ initializeCluster(admin, namespaceName);
+ if (hostnameVerificationEnabled) {
+ Assert.fail("Connection should be failed due to hostnameVerification enabled for proxy to broker");
+ }
+ } catch (PulsarAdminException.ServerSideErrorException e) {
+ if (!hostnameVerificationEnabled) {
+ Assert.fail("Cluster should initialize because hostnameverification is disabled for proxy to broker");
+ }
+ Assert.assertEquals(e.getStatusCode(), 502, "Should get bad gateway");
+ admin.close();
+ // Need to use broker's admin client because the proxy to broker is failing, and we still want to test
+ // the binary protocol client fails to connect as well
+ createBrokerAdminClient();
+ initializeCluster(admin, namespaceName);
+ }
try {
proxyClient.newConsumer().topic("persistent://my-tenant/my-ns/my-topic1")
@@ -411,19 +420,9 @@ public void tlsCiphersAndProtocols(Set tlsCiphers, Set tlsProtoc
throws Exception {
log.info("-- Starting {} test --", methodName);
String namespaceName = "my-tenant/my-ns";
- createAdminClient();
-
- admin.clusters().createCluster("proxy-authorization", ClusterData.builder()
- .serviceUrlTls(brokerUrlTls.toString()).build());
+ createBrokerAdminClient();
- admin.tenants().createTenant("my-tenant",
- new TenantInfoImpl(Sets.newHashSet("appid1", "appid2"), Sets.newHashSet("proxy-authorization")));
- admin.namespaces().createNamespace(namespaceName);
-
- admin.namespaces().grantPermissionOnNamespace(namespaceName, "Proxy",
- Sets.newHashSet(AuthAction.consume, AuthAction.produce));
- admin.namespaces().grantPermissionOnNamespace(namespaceName, "Client",
- Sets.newHashSet(AuthAction.consume, AuthAction.produce));
+ initializeCluster(admin, namespaceName);
ProxyConfiguration proxyConfig = new ProxyConfiguration();
proxyConfig.setAuthenticationEnabled(true);
@@ -510,7 +509,8 @@ public void testProxyTlsTransportWithAuth(Authentication auth) throws Exception
log.info("-- Starting {} test --", methodName);
startProxy();
- createAdminClient();
+ // Skip hostname verification because the certs intentionally do not have a hostname
+ createProxyAdminClient(false);
@Cleanup
PulsarClient proxyClient = PulsarClient.builder()
@@ -525,17 +525,7 @@ public void testProxyTlsTransportWithAuth(Authentication auth) throws Exception
String namespaceName = "my-tenant/my-ns";
- admin.clusters().createCluster("proxy-authorization",
- ClusterData.builder().serviceUrlTls(brokerUrlTls.toString()).build());
-
- admin.tenants().createTenant("my-tenant",
- new TenantInfoImpl(Sets.newHashSet("appid1", "appid2"), Sets.newHashSet("proxy-authorization")));
- admin.namespaces().createNamespace(namespaceName);
-
- admin.namespaces().grantPermissionOnNamespace(namespaceName, "Proxy",
- Sets.newHashSet(AuthAction.consume, AuthAction.produce));
- admin.namespaces().grantPermissionOnNamespace(namespaceName, "Client",
- Sets.newHashSet(AuthAction.consume, AuthAction.produce));
+ initializeCluster(admin, namespaceName);
Consumer consumer = proxyClient.newConsumer()
.topic("persistent://my-tenant/my-ns/my-topic1")
@@ -567,7 +557,32 @@ public void testProxyTlsTransportWithAuth(Authentication auth) throws Exception
log.info("-- Exiting {} test --", methodName);
}
- private void createAdminClient() throws Exception {
+ private void initializeCluster(PulsarAdmin adminClient, String namespaceName) throws Exception {
+ adminClient.clusters().createCluster("proxy-authorization", ClusterData.builder()
+ .serviceUrlTls(brokerUrlTls.toString()).build());
+
+ adminClient.tenants().createTenant("my-tenant",
+ new TenantInfoImpl(Sets.newHashSet("appid1", "appid2"), Sets.newHashSet("proxy-authorization")));
+ adminClient.namespaces().createNamespace(namespaceName);
+
+ adminClient.namespaces().grantPermissionOnNamespace(namespaceName, "Proxy",
+ Sets.newHashSet(AuthAction.consume, AuthAction.produce));
+ adminClient.namespaces().grantPermissionOnNamespace(namespaceName, "Client",
+ Sets.newHashSet(AuthAction.consume, AuthAction.produce));
+ }
+
+ private void createProxyAdminClient(boolean enableTlsHostnameVerification) throws Exception {
+ Map authParams = Maps.newHashMap();
+ authParams.put("tlsCertFile", TLS_SUPERUSER_CLIENT_CERT_FILE_PATH);
+ authParams.put("tlsKeyFile", TLS_SUPERUSER_CLIENT_KEY_FILE_PATH);
+
+ admin = spy(PulsarAdmin.builder().serviceHttpUrl("https://localhost:" + webServer.getListenPortHTTPS().get())
+ .tlsTrustCertsFilePath(TLS_TRUST_CERT_FILE_PATH)
+ .enableTlsHostnameVerification(enableTlsHostnameVerification)
+ .authentication(AuthenticationTls.class.getName(), authParams).build());
+ }
+
+ private void createBrokerAdminClient() throws Exception {
Map authParams = Maps.newHashMap();
authParams.put("tlsCertFile", TLS_SUPERUSER_CLIENT_CERT_FILE_PATH);
authParams.put("tlsKeyFile", TLS_SUPERUSER_CLIENT_KEY_FILE_PATH);
diff --git a/pulsar-proxy/src/test/java/org/apache/pulsar/proxy/server/ProxyWithJwtAuthorizationTest.java b/pulsar-proxy/src/test/java/org/apache/pulsar/proxy/server/ProxyWithJwtAuthorizationTest.java
index f42cbe4c30e87..e912006faa022 100644
--- a/pulsar-proxy/src/test/java/org/apache/pulsar/proxy/server/ProxyWithJwtAuthorizationTest.java
+++ b/pulsar-proxy/src/test/java/org/apache/pulsar/proxy/server/ProxyWithJwtAuthorizationTest.java
@@ -72,6 +72,7 @@ public class ProxyWithJwtAuthorizationTest extends ProducerConsumerBase {
private final String CLIENT_TOKEN = Jwts.builder().setSubject(CLIENT_ROLE).signWith(SECRET_KEY).compact();
private ProxyService proxyService;
+ private WebServer webServer;
private final ProxyConfiguration proxyConfig = new ProxyConfiguration();
@BeforeMethod
@@ -105,6 +106,7 @@ protected void setup() throws Exception {
proxyConfig.setAuthorizationEnabled(false);
proxyConfig.getProperties().setProperty("tokenSecretKey", "data:;base64," + Base64.getEncoder().encodeToString(SECRET_KEY.getEncoded()));
proxyConfig.setBrokerServiceURL(pulsar.getBrokerServiceUrl());
+ proxyConfig.setBrokerWebServiceURL(pulsar.getWebServiceAddress());
proxyConfig.setServicePort(Optional.of(0));
proxyConfig.setBrokerProxyAllowedTargetPorts("*");
@@ -115,9 +117,10 @@ protected void setup() throws Exception {
proxyConfig.setBrokerClientAuthenticationParameters(PROXY_TOKEN);
proxyConfig.setAuthenticationProviders(providers);
- proxyService = Mockito.spy(new ProxyService(proxyConfig,
- new AuthenticationService(
- PulsarConfigurationLoader.convertFrom(proxyConfig))));
+ AuthenticationService authService =
+ new AuthenticationService(PulsarConfigurationLoader.convertFrom(proxyConfig));
+ proxyService = Mockito.spy(new ProxyService(proxyConfig, authService));
+ webServer = new WebServer(proxyConfig, authService);
}
@AfterMethod(alwaysRun = true)
@@ -125,10 +128,13 @@ protected void setup() throws Exception {
protected void cleanup() throws Exception {
super.internalCleanup();
proxyService.close();
+ webServer.stop();
}
private void startProxy() throws Exception {
proxyService.start();
+ ProxyServiceStarter.addWebServerHandlers(webServer, proxyConfig, proxyService, null);
+ webServer.start();
}
/**
@@ -435,7 +441,7 @@ void testGetMetrics() throws Exception {
}
private void createAdminClient() throws Exception {
- admin = spy(PulsarAdmin.builder().serviceHttpUrl(brokerUrl.toString())
+ admin = spy(PulsarAdmin.builder().serviceHttpUrl(webServer.getServiceUri().toString())
.authentication(AuthenticationFactory.token(ADMIN_TOKEN)).build());
}
From e6bc4999e22a763c9097494970ed6d9796278a23 Mon Sep 17 00:00:00 2001
From: AloysZhang
Date: Thu, 23 Feb 2023 13:18:10 +0800
Subject: [PATCH 007/404] [fix][txn]fix receive duplicated messages due to
pendingAcks in PendingAckHandle (#19581)
Co-authored-by: mayozhang
---
.../mledger/util/PositionAckSetUtil.java | 7 ++
.../service/AbstractBaseDispatcher.java | 13 ++++
.../client/impl/TransactionEndToEndTest.java | 78 +++++++++++++++++++
.../BitSetRecyclableRecyclableTest.java | 17 ++++
4 files changed, 115 insertions(+)
diff --git a/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/util/PositionAckSetUtil.java b/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/util/PositionAckSetUtil.java
index 8173b30c4fea9..1c607582076a8 100644
--- a/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/util/PositionAckSetUtil.java
+++ b/managed-ledger/src/main/java/org/apache/bookkeeper/mledger/util/PositionAckSetUtil.java
@@ -59,6 +59,13 @@ public static long[] andAckSet(long[] firstAckSet, long[] secondAckSet) {
return ackSet;
}
+ public static boolean isAckSetEmpty(long[] ackSet) {
+ BitSetRecyclable bitSet = BitSetRecyclable.create().resetWords(ackSet);
+ boolean isEmpty = bitSet.isEmpty();
+ bitSet.recycle();
+ return isEmpty;
+ }
+
//This method is compare two position which position is bigger than another one.
//When the ledgerId and entryId in this position is same to another one and two position all have ack set, it will
//compare the ack set next bit index is bigger than another one.
diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractBaseDispatcher.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractBaseDispatcher.java
index ef2fd80302a98..8f6caa7a20801 100644
--- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractBaseDispatcher.java
+++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/AbstractBaseDispatcher.java
@@ -19,6 +19,7 @@
package org.apache.pulsar.broker.service;
import static org.apache.bookkeeper.mledger.util.PositionAckSetUtil.andAckSet;
+import static org.apache.bookkeeper.mledger.util.PositionAckSetUtil.isAckSetEmpty;
import io.netty.buffer.ByteBuf;
import io.prometheus.client.Gauge;
import java.util.ArrayList;
@@ -239,6 +240,18 @@ public int filterEntriesForConsumer(@Nullable MessageMetadata[] metadataArray, i
// if actSet is null, use pendingAck ackSet
ackSet = positionInPendingAck.getAckSet();
}
+ // if the result of pendingAckSet(in pendingAckHandle) AND the ackSet(in cursor) is empty
+ // filter this entry
+ if (isAckSetEmpty(ackSet)) {
+ entries.set(i, null);
+ entry.release();
+ continue;
+ }
+ } else {
+ // filter non-batch message in pendingAck state
+ entries.set(i, null);
+ entry.release();
+ continue;
}
}
}
diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/client/impl/TransactionEndToEndTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/client/impl/TransactionEndToEndTest.java
index 527b8532e0452..83feaa3ac1158 100644
--- a/pulsar-broker/src/test/java/org/apache/pulsar/client/impl/TransactionEndToEndTest.java
+++ b/pulsar-broker/src/test/java/org/apache/pulsar/client/impl/TransactionEndToEndTest.java
@@ -176,6 +176,84 @@ private void testIndividualAckAbortFilterAckSetInPendingAckState() throws Except
assertNull(consumer.receive(2, TimeUnit.SECONDS));
}
+
+ @Test(dataProvider="enableBatch")
+ private void testFilterMsgsInPendingAckStateWhenConsumerDisconnect(boolean enableBatch) throws Exception {
+ final String topicName = NAMESPACE1 + "/testFilterMsgsInPendingAckStateWhenConsumerDisconnect-" + enableBatch;
+ final int count = 10;
+
+ @Cleanup
+ Producer producer = null;
+ if (enableBatch) {
+ producer = pulsarClient
+ .newProducer(Schema.INT32)
+ .topic(topicName)
+ .enableBatching(true)
+ .batchingMaxPublishDelay(1, TimeUnit.HOURS)
+ .batchingMaxMessages(count).create();
+ } else {
+ producer = pulsarClient
+ .newProducer(Schema.INT32)
+ .topic(topicName)
+ .enableBatching(false).create();
+ }
+
+ @Cleanup
+ Consumer consumer = pulsarClient
+ .newConsumer(Schema.INT32)
+ .topic(topicName)
+ .isAckReceiptEnabled(true)
+ .subscriptionName("test")
+ .subscriptionType(SubscriptionType.Shared)
+ .enableBatchIndexAcknowledgment(true)
+ .subscribe();
+
+ for (int i = 0; i < count; i++) {
+ producer.sendAsync(i);
+ }
+
+ Transaction txn1 = getTxn();
+
+ Transaction txn2 = getTxn();
+
+
+ // txn1 ack half of messages and don't end the txn1
+ for (int i = 0; i < count / 2; i++) {
+ consumer.acknowledgeAsync(consumer.receive().getMessageId(), txn1).get();
+ }
+
+ // txn2 ack the rest half of messages and commit tnx2
+ for (int i = count / 2; i < count; i++) {
+ consumer.acknowledgeAsync(consumer.receive().getMessageId(), txn2).get();
+ }
+ // commit txn2
+ txn2.commit().get();
+
+ // close and re-create consumer
+ consumer.close();
+ consumer = pulsarClient
+ .newConsumer(Schema.INT32)
+ .topic(topicName)
+ .isAckReceiptEnabled(true)
+ .subscriptionName("test")
+ .subscriptionType(SubscriptionType.Shared)
+ .enableBatchIndexAcknowledgment(true)
+ .subscribe();
+
+ Message message = consumer.receive(3, TimeUnit.SECONDS);
+ Assert.assertNull(message);
+
+ // abort txn1
+ txn1.abort().get();
+ // after txn1 aborted, consumer will receive messages txn1 contains
+ int receiveCounter = 0;
+ while((message = consumer.receive(3, TimeUnit.SECONDS)) != null) {
+ Assert.assertEquals(message.getValue().intValue(), receiveCounter);
+ receiveCounter ++;
+ }
+ Assert.assertEquals(receiveCounter, count / 2);
+ }
+
@Test(dataProvider="enableBatch")
private void produceCommitTest(boolean enableBatch) throws Exception {
@Cleanup
diff --git a/pulsar-common/src/test/java/org/apache/pulsar/common/util/collections/BitSetRecyclableRecyclableTest.java b/pulsar-common/src/test/java/org/apache/pulsar/common/util/collections/BitSetRecyclableRecyclableTest.java
index 8374f2db8961a..8061f853d66c1 100644
--- a/pulsar-common/src/test/java/org/apache/pulsar/common/util/collections/BitSetRecyclableRecyclableTest.java
+++ b/pulsar-common/src/test/java/org/apache/pulsar/common/util/collections/BitSetRecyclableRecyclableTest.java
@@ -45,4 +45,21 @@ public void testResetWords() {
Assert.assertTrue(bitset1.get(128));
Assert.assertFalse(bitset1.get(256));
}
+
+ @Test
+ public void testBitSetEmpty() {
+ BitSetRecyclable bitSet = BitSetRecyclable.create();
+ bitSet.set(0, 5);
+ bitSet.clear(1);
+ bitSet.clear(2);
+ bitSet.clear(3);
+ long[] array = bitSet.toLongArray();
+ Assert.assertFalse(bitSet.isEmpty());
+ Assert.assertFalse(BitSetRecyclable.create().resetWords(array).isEmpty());
+ bitSet.clear(0);
+ bitSet.clear(4);
+ Assert.assertTrue(bitSet.isEmpty());
+ long[] array1 = bitSet.toLongArray();
+ Assert.assertTrue(BitSetRecyclable.create().resetWords(array1).isEmpty());
+ }
}
From e2863391e7f6f9b6c5060f0f78378493f8df37f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?=
Date: Thu, 23 Feb 2023 10:12:10 +0100
Subject: [PATCH 008/404] [fix][client] Broker address resolution wrong if
connect through a multi-dns names proxy (#19597)
---
.../client/impl/ConnectionPoolTest.java | 89 ++++++++++++++++++-
.../pulsar/client/impl/ConnectionPool.java | 24 +++--
.../client/impl/PulsarChannelInitializer.java | 8 +-
3 files changed, 106 insertions(+), 15 deletions(-)
diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/client/impl/ConnectionPoolTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/client/impl/ConnectionPoolTest.java
index e8816894513d2..fb564bd5083c1 100644
--- a/pulsar-broker/src/test/java/org/apache/pulsar/client/impl/ConnectionPoolTest.java
+++ b/pulsar-broker/src/test/java/org/apache/pulsar/client/impl/ConnectionPoolTest.java
@@ -20,12 +20,17 @@
import static org.apache.pulsar.broker.BrokerTestUtil.spyWithClassAndConstructorArgs;
import io.netty.channel.EventLoopGroup;
+import io.netty.resolver.AbstractAddressResolver;
import io.netty.util.concurrent.DefaultThreadFactory;
import java.net.InetSocketAddress;
+import java.net.SocketAddress;
import java.util.ArrayList;
import java.util.List;
+import java.util.Optional;
import java.util.concurrent.CompletableFuture;
+import java.util.function.Supplier;
import java.util.stream.IntStream;
+import io.netty.util.concurrent.Promise;
import org.apache.pulsar.broker.auth.MockedPulsarServiceBaseTest;
import org.apache.pulsar.client.impl.conf.ClientConfigurationData;
import org.apache.pulsar.common.util.netty.EventLoopUtil;
@@ -66,7 +71,7 @@ public void testSingleIpAddress() throws Exception {
List result = new ArrayList<>();
result.add(new InetSocketAddress("127.0.0.1", brokerPort));
Mockito.when(pool.resolveName(InetSocketAddress.createUnresolved("non-existing-dns-name",
- brokerPort)))
+ brokerPort)))
.thenReturn(CompletableFuture.completedFuture(result));
client.newProducer().topic("persistent://sample/standalone/ns/my-topic").create();
@@ -107,7 +112,7 @@ public void testNoConnectionPool() throws Exception {
ConnectionPool pool = spyWithClassAndConstructorArgs(ConnectionPool.class, conf, eventLoop);
InetSocketAddress brokerAddress =
- InetSocketAddress.createUnresolved("127.0.0.1", brokerPort);
+ InetSocketAddress.createUnresolved("127.0.0.1", brokerPort);
IntStream.range(1, 5).forEach(i -> {
pool.getConnection(brokerAddress).thenAccept(cnx -> {
Assert.assertTrue(cnx.channel().isActive());
@@ -119,6 +124,7 @@ public void testNoConnectionPool() throws Exception {
pool.closeAllConnections();
pool.close();
+ eventLoop.shutdownGracefully();
}
@Test
@@ -129,7 +135,7 @@ public void testEnableConnectionPool() throws Exception {
ConnectionPool pool = spyWithClassAndConstructorArgs(ConnectionPool.class, conf, eventLoop);
InetSocketAddress brokerAddress =
- InetSocketAddress.createUnresolved("127.0.0.1", brokerPort);
+ InetSocketAddress.createUnresolved("127.0.0.1", brokerPort);
IntStream.range(1, 10).forEach(i -> {
pool.getConnection(brokerAddress).thenAccept(cnx -> {
Assert.assertTrue(cnx.channel().isActive());
@@ -141,5 +147,82 @@ public void testEnableConnectionPool() throws Exception {
pool.closeAllConnections();
pool.close();
+ eventLoop.shutdownGracefully();
+ }
+
+
+ @Test
+ public void testSetProxyToTargetBrokerAddress() throws Exception {
+ ClientConfigurationData conf = new ClientConfigurationData();
+ conf.setConnectionsPerBroker(5);
+
+
+ EventLoopGroup eventLoop =
+ EventLoopUtil.newEventLoopGroup(8, false,
+ new DefaultThreadFactory("test"));
+
+ final AbstractAddressResolver resolver = new AbstractAddressResolver(eventLoop.next()) {
+ @Override
+ protected boolean doIsResolved(SocketAddress socketAddress) {
+ return !((InetSocketAddress) socketAddress).isUnresolved();
+ }
+
+ @Override
+ protected void doResolve(SocketAddress socketAddress, Promise promise) throws Exception {
+ promise.setFailure(new IllegalStateException());
+ throw new IllegalStateException();
+ }
+
+ @Override
+ protected void doResolveAll(SocketAddress socketAddress, Promise promise) throws Exception {
+ final InetSocketAddress socketAddress1 = (InetSocketAddress) socketAddress;
+ final boolean isProxy = socketAddress1.getHostName().equals("proxy");
+ final boolean isBroker = socketAddress1.getHostName().equals("broker");
+ if (!isProxy && !isBroker) {
+ promise.setFailure(new IllegalStateException());
+ throw new IllegalStateException();
+ }
+ List result = new ArrayList<>();
+ if (isProxy) {
+ result.add(new InetSocketAddress("localhost", brokerPort));
+ result.add(InetSocketAddress.createUnresolved("proxy", brokerPort));
+ } else {
+ result.add(new InetSocketAddress("127.0.0.1", brokerPort));
+ result.add(InetSocketAddress.createUnresolved("broker", brokerPort));
+ }
+ promise.setSuccess(result);
+ }
+ };
+
+ ConnectionPool pool = spyWithClassAndConstructorArgs(ConnectionPool.class, conf, eventLoop,
+ (Supplier) () -> new ClientCnx(conf, eventLoop), Optional.of(resolver));
+
+
+ ClientCnx cnx = pool.getConnection(
+ InetSocketAddress.createUnresolved("proxy", 9999),
+ InetSocketAddress.createUnresolved("proxy", 9999)).get();
+ Assert.assertEquals(cnx.remoteHostName, "proxy");
+ Assert.assertNull(cnx.proxyToTargetBrokerAddress);
+ cnx.close();
+
+ cnx = pool.getConnection(
+ InetSocketAddress.createUnresolved("broker", 9999),
+ InetSocketAddress.createUnresolved("proxy", 9999)).get();
+ Assert.assertEquals(cnx.remoteHostName, "proxy");
+ Assert.assertEquals(cnx.proxyToTargetBrokerAddress, "broker:9999");
+ cnx.close();
+
+
+ cnx = pool.getConnection(
+ InetSocketAddress.createUnresolved("broker", 9999),
+ InetSocketAddress.createUnresolved("broker", 9999)).get();
+ Assert.assertEquals(cnx.remoteHostName, "broker");
+ Assert.assertNull(cnx.proxyToTargetBrokerAddress);
+ cnx.close();
+
+
+ pool.closeAllConnections();
+ pool.close();
+ eventLoop.shutdownGracefully();
}
}
diff --git a/pulsar-client/src/main/java/org/apache/pulsar/client/impl/ConnectionPool.java b/pulsar-client/src/main/java/org/apache/pulsar/client/impl/ConnectionPool.java
index 2e105b5328467..3a9a2b9b7ab94 100644
--- a/pulsar-client/src/main/java/org/apache/pulsar/client/impl/ConnectionPool.java
+++ b/pulsar-client/src/main/java/org/apache/pulsar/client/impl/ConnectionPool.java
@@ -305,8 +305,12 @@ private CompletableFuture createConnection(InetSocketAddress logicalAdd
resolvedAddress = resolveName(unresolvedPhysicalAddress);
}
return resolvedAddress.thenCompose(
- inetAddresses -> connectToResolvedAddresses(logicalAddress, inetAddresses.iterator(),
- isSniProxy ? unresolvedPhysicalAddress : null));
+ inetAddresses -> connectToResolvedAddresses(
+ logicalAddress,
+ unresolvedPhysicalAddress,
+ inetAddresses.iterator(),
+ isSniProxy ? unresolvedPhysicalAddress : null)
+ );
} catch (URISyntaxException e) {
log.error("Invalid Proxy url {}", clientConfig.getProxyServiceUrl(), e);
return FutureUtil
@@ -319,17 +323,19 @@ private CompletableFuture createConnection(InetSocketAddress logicalAdd
* address is working.
*/
private CompletableFuture connectToResolvedAddresses(InetSocketAddress logicalAddress,
+ InetSocketAddress unresolvedPhysicalAddress,
Iterator resolvedPhysicalAddress,
InetSocketAddress sniHost) {
CompletableFuture future = new CompletableFuture<>();
// Successfully connected to server
- connectToAddress(logicalAddress, resolvedPhysicalAddress.next(), sniHost)
+ connectToAddress(logicalAddress, resolvedPhysicalAddress.next(), unresolvedPhysicalAddress, sniHost)
.thenAccept(future::complete)
.exceptionally(exception -> {
if (resolvedPhysicalAddress.hasNext()) {
// Try next IP address
- connectToResolvedAddresses(logicalAddress, resolvedPhysicalAddress, sniHost)
+ connectToResolvedAddresses(logicalAddress, unresolvedPhysicalAddress,
+ resolvedPhysicalAddress, sniHost)
.thenAccept(future::complete)
.exceptionally(ex -> {
// This is already unwinding the recursive call
@@ -362,20 +368,24 @@ CompletableFuture> resolveName(InetSocketAddress unresol
* Attempt to establish a TCP connection to an already resolved single IP address.
*/
private CompletableFuture connectToAddress(InetSocketAddress logicalAddress,
- InetSocketAddress physicalAddress, InetSocketAddress sniHost) {
+ InetSocketAddress physicalAddress,
+ InetSocketAddress unresolvedPhysicalAddress,
+ InetSocketAddress sniHost) {
if (clientConfig.isUseTls()) {
return toCompletableFuture(bootstrap.register())
.thenCompose(channel -> channelInitializerHandler
.initTls(channel, sniHost != null ? sniHost : physicalAddress))
.thenCompose(channelInitializerHandler::initSocks5IfConfig)
.thenCompose(ch ->
- channelInitializerHandler.initializeClientCnx(ch, logicalAddress, physicalAddress))
+ channelInitializerHandler.initializeClientCnx(ch, logicalAddress,
+ unresolvedPhysicalAddress))
.thenCompose(channel -> toCompletableFuture(channel.connect(physicalAddress)));
} else {
return toCompletableFuture(bootstrap.register())
.thenCompose(channelInitializerHandler::initSocks5IfConfig)
.thenCompose(ch ->
- channelInitializerHandler.initializeClientCnx(ch, logicalAddress, physicalAddress))
+ channelInitializerHandler.initializeClientCnx(ch, logicalAddress,
+ unresolvedPhysicalAddress))
.thenCompose(channel -> toCompletableFuture(channel.connect(physicalAddress)));
}
}
diff --git a/pulsar-client/src/main/java/org/apache/pulsar/client/impl/PulsarChannelInitializer.java b/pulsar-client/src/main/java/org/apache/pulsar/client/impl/PulsarChannelInitializer.java
index e01b53b8ef136..ed34f7d41c130 100644
--- a/pulsar-client/src/main/java/org/apache/pulsar/client/impl/PulsarChannelInitializer.java
+++ b/pulsar-client/src/main/java/org/apache/pulsar/client/impl/PulsarChannelInitializer.java
@@ -213,7 +213,7 @@ CompletableFuture initSocks5IfConfig(Channel ch) {
CompletableFuture initializeClientCnx(Channel ch,
InetSocketAddress logicalAddress,
- InetSocketAddress resolvedPhysicalAddress) {
+ InetSocketAddress unresolvedPhysicalAddress) {
return NettyFutureUtil.toCompletableFuture(ch.eventLoop().submit(() -> {
final ClientCnx cnx = (ClientCnx) ch.pipeline().get("handler");
@@ -221,15 +221,13 @@ CompletableFuture initializeClientCnx(Channel ch,
throw new IllegalStateException("Missing ClientCnx. This should not happen.");
}
- // Need to do our own equality because the physical address is resolved already
- if (!(logicalAddress.getHostString().equalsIgnoreCase(resolvedPhysicalAddress.getHostString())
- && logicalAddress.getPort() == resolvedPhysicalAddress.getPort())) {
+ if (!logicalAddress.equals(unresolvedPhysicalAddress)) {
// We are connecting through a proxy. We need to set the target broker in the ClientCnx object so that
// it can be specified when sending the CommandConnect.
cnx.setTargetBroker(logicalAddress);
}
- cnx.setRemoteHostName(resolvedPhysicalAddress.getHostString());
+ cnx.setRemoteHostName(unresolvedPhysicalAddress.getHostString());
return ch;
}));
From 0bb0f6b786d115a7405867b701521cd4a49340c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?=
Date: Thu, 23 Feb 2023 13:54:27 +0100
Subject: [PATCH 009/404] [fix][broker] Copy command fields and fix potential
thread-safety in ServerCnx (#19517)
---
.../java/org/apache/pulsar/broker/service/ServerCnx.java | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/ServerCnx.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/ServerCnx.java
index adee29c5a0105..242947f6a0fd6 100644
--- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/ServerCnx.java
+++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/service/ServerCnx.java
@@ -2438,10 +2438,11 @@ protected void handleAddPartitionToTxn(CommandAddPartitionToTxn command) {
final TxnID txnID = new TxnID(command.getTxnidMostBits(), command.getTxnidLeastBits());
final TransactionCoordinatorID tcId = TransactionCoordinatorID.get(command.getTxnidMostBits());
final long requestId = command.getRequestId();
+ final List partitionsList = command.getPartitionsList();
if (log.isDebugEnabled()) {
- command.getPartitionsList().forEach(partion ->
+ partitionsList.forEach(partition ->
log.debug("Receive add published partition to txn request {} "
- + "from {} with txnId {}, topic: [{}]", requestId, remoteAddress, txnID, partion));
+ + "from {} with txnId {}, topic: [{}]", requestId, remoteAddress, txnID, partition));
}
if (!checkTransactionEnableAndSendError(requestId)) {
@@ -2456,7 +2457,7 @@ protected void handleAddPartitionToTxn(CommandAddPartitionToTxn command) {
return failedFutureTxnNotOwned(txnID);
}
return transactionMetadataStoreService
- .addProducedPartitionToTxn(txnID, command.getPartitionsList());
+ .addProducedPartitionToTxn(txnID, partitionsList);
})
.whenComplete((v, ex) -> {
if (ex == null) {
From 389792b1fc7a56647ccfc820e83ae08dfed037df Mon Sep 17 00:00:00 2001
From: Heesung Sohn <103456639+heesung-sn@users.noreply.github.com>
Date: Thu, 23 Feb 2023 07:23:00 -0800
Subject: [PATCH 010/404] [improve][broker] PIP-192 Added Deleted and Init
states in ServiceUnitState (#19546)
---
.../pulsar/broker/ServiceConfiguration.java | 17 +
.../ExtensibleLoadManagerWrapper.java | 4 +
.../extensions/channel/ServiceUnitState.java | 67 +-
.../channel/ServiceUnitStateChannelImpl.java | 513 ++++++++++-----
.../ServiceUnitStateCompactionStrategy.java | 93 ++-
.../channel/ServiceUnitStateData.java | 20 +-
.../StrategicTwoPhaseCompactor.java | 39 +-
.../ExtensibleLoadManagerImplTest.java | 187 +++---
.../channel/ServiceUnitStateChannelTest.java | 615 +++++++++++++++---
...erviceUnitStateCompactionStrategyTest.java | 125 ++--
.../channel/ServiceUnitStateDataTest.java | 15 +-
.../channel/ServiceUnitStateTest.java | 82 ++-
.../ServiceUnitStateCompactionTest.java | 333 +++++++---
13 files changed, 1508 insertions(+), 602 deletions(-)
diff --git a/pulsar-broker-common/src/main/java/org/apache/pulsar/broker/ServiceConfiguration.java b/pulsar-broker-common/src/main/java/org/apache/pulsar/broker/ServiceConfiguration.java
index 106410d855e22..4f2c8e72e131d 100644
--- a/pulsar-broker-common/src/main/java/org/apache/pulsar/broker/ServiceConfiguration.java
+++ b/pulsar-broker-common/src/main/java/org/apache/pulsar/broker/ServiceConfiguration.java
@@ -2450,6 +2450,7 @@ The delayed message index bucket time step(in seconds) in per bucket snapshot se
)
private long namespaceBundleUnloadingTimeoutMs = 60000;
+ /**** --- Load Balancer Extension. --- ****/
@FieldContext(
category = CATEGORY_LOAD_BALANCER,
dynamic = true,
@@ -2525,6 +2526,22 @@ The delayed message index bucket time step(in seconds) in per bucket snapshot se
)
private double loadBalancerBundleLoadReportPercentage = 10;
+ @FieldContext(
+ category = CATEGORY_LOAD_BALANCER,
+ doc = "After this delay, the service-unit state channel tombstones any service units (e.g., bundles) "
+ + "in semi-terminal states. For example, after splits, parent bundles will be `deleted`, "
+ + "and then after this delay, the parent bundles' state will be `tombstoned` "
+ + "in the service-unit state channel. "
+ + "Pulsar does not immediately remove such semi-terminal states "
+ + "to avoid unnecessary system confusion, "
+ + "as the bundles in the `tombstoned` state might temporarily look available to reassign. "
+ + "Rarely, one could lower this delay in order to aggressively clean "
+ + "the service-unit state channel when there are a large number of bundles. "
+ + "minimum value = 30 secs"
+ + "(only used in load balancer extension logics)"
+ )
+ private long loadBalancerServiceUnitStateCleanUpDelayTimeInSeconds = 604800;
+
/**** --- Replication. --- ****/
@FieldContext(
category = CATEGORY_REPLICATION,
diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/ExtensibleLoadManagerWrapper.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/ExtensibleLoadManagerWrapper.java
index 48fc4bb7ff4f0..1eabbe620e213 100644
--- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/ExtensibleLoadManagerWrapper.java
+++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/ExtensibleLoadManagerWrapper.java
@@ -142,4 +142,8 @@ public void doNamespaceBundleSplit() {
throw new UnsupportedOperationException();
}
+ public ExtensibleLoadManagerImpl get() {
+ return loadManager;
+ }
+
}
diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitState.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitState.java
index 3225c0ba7bbc7..92fef8f65992a 100644
--- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitState.java
+++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitState.java
@@ -24,55 +24,38 @@
/**
* Defines the possible states for service units.
*
- * The following diagram defines the valid state changes
- *
- * ┌───────────┐
- * ┌──────────┤ released │◄────────┐
- * │own └───────────┘ │release
- * │ │
- * │ │
- * ▼ │
- * ┌────────┐ assign(transfer) ┌─────┴────┐
- * │ ├───────────────────►│ │
- * │ owned │ │ assigned │
- * │ │◄───────────────────┤ │
- * └──┬─────┤ own └──────────┘
- * │ ▲ │ ▲
- * │ │ │ │
- * │ │ └──────────────┐ │
- * │ │ │ │
- * │ │ unload │ │ assign(assignment)
- * split │ │ │ │
- * │ │ │ │
- * │ │ create(child) │ │
- * │ │ │ │
- * ▼ │ │ │
- * ┌─────┴─────┐ └─────►┌───┴──────┐
- * │ │ │ │
- * │ splitting ├────────────────► │ free │
- * │ │ discard(parent)│ │
- * └───────────┘ └──────────┘
+ * @see Service Unit State Channel for additional details.
*/
public enum ServiceUnitState {
- Free, // not owned by any broker (terminal state)
+ Init, // initializing the state. no previous state(terminal state)
+
+ Free, // not owned by any broker (semi-terminal state)
Owned, // owned by a broker (terminal state)
- Assigned, // the ownership is assigned(but the assigned broker has not been notified the ownership yet)
+ Assigning, // the ownership is being assigned (e.g. the new ownership is being notified to the target broker)
- Released, // the source broker's ownership has been released (e.g. the topic connections are closed)
+ Releasing, // the source broker's ownership is being released (e.g. the topic connections are being closed)
- Splitting; // the service unit(e.g. bundle) is in the process of splitting.
+ Splitting, // the service unit is in the process of splitting. (e.g. the metadata store is being updated)
- private static Map> validTransitions = Map.of(
- // (Free -> Released | Splitting) transitions are required
- // when the topic is compacted in the middle of transfer or split.
- Free, Set.of(Owned, Assigned, Released, Splitting),
- Owned, Set.of(Assigned, Splitting, Free),
- Assigned, Set.of(Owned, Released, Free),
- Released, Set.of(Owned, Free),
- Splitting, Set.of(Free)
+ Deleted; // deleted in the system (semi-terminal state)
+
+ private static final Map> validTransitions = Map.of(
+ // (Init -> all states) transitions are required
+ // when the topic is compacted in the middle of assign, transfer or split.
+ Init, Set.of(Free, Owned, Assigning, Releasing, Splitting, Deleted),
+ Free, Set.of(Assigning, Init),
+ Owned, Set.of(Assigning, Splitting, Releasing),
+ Assigning, Set.of(Owned, Releasing),
+ Releasing, Set.of(Owned, Free),
+ Splitting, Set.of(Deleted),
+ Deleted, Set.of(Init)
+ );
+
+ private static final Set inFlightStates = Set.of(
+ Assigning, Releasing, Splitting
);
public static boolean isValidTransition(ServiceUnitState from, ServiceUnitState to) {
@@ -80,4 +63,8 @@ public static boolean isValidTransition(ServiceUnitState from, ServiceUnitState
return transitions.contains(to);
}
+ public static boolean isInFlightState(ServiceUnitState state) {
+ return inFlightStates.contains(state);
+ }
+
}
diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateChannelImpl.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateChannelImpl.java
index d10138bda6805..9f205f85c5454 100644
--- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateChannelImpl.java
+++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateChannelImpl.java
@@ -20,10 +20,13 @@
import static java.lang.String.format;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Assigned;
+import static java.util.concurrent.TimeUnit.SECONDS;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Assigning;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Deleted;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Free;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Init;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Owned;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Released;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Releasing;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Splitting;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateChannelImpl.ChannelState.Closed;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateChannelImpl.ChannelState.Constructed;
@@ -35,6 +38,7 @@
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateChannelImpl.MetadataState.Jittery;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateChannelImpl.MetadataState.Stable;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateChannelImpl.MetadataState.Unstable;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateData.state;
import static org.apache.pulsar.metadata.api.extended.SessionEvent.SessionLost;
import static org.apache.pulsar.metadata.api.extended.SessionEvent.SessionReestablished;
import com.google.common.annotations.VisibleForTesting;
@@ -61,9 +65,15 @@
import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.pulsar.broker.PulsarServerException;
import org.apache.pulsar.broker.PulsarService;
+import org.apache.pulsar.broker.ServiceConfiguration;
import org.apache.pulsar.broker.loadbalance.LeaderElectionService;
+import org.apache.pulsar.broker.loadbalance.extensions.BrokerRegistry;
+import org.apache.pulsar.broker.loadbalance.extensions.ExtensibleLoadManagerWrapper;
+import org.apache.pulsar.broker.loadbalance.extensions.LoadManagerContext;
import org.apache.pulsar.broker.loadbalance.extensions.models.Split;
import org.apache.pulsar.broker.loadbalance.extensions.models.Unload;
+import org.apache.pulsar.broker.loadbalance.extensions.strategy.BrokerSelectionStrategy;
+import org.apache.pulsar.broker.loadbalance.extensions.strategy.LeastResourceUsageWithWeight;
import org.apache.pulsar.broker.loadbalance.impl.LoadManagerShared;
import org.apache.pulsar.broker.namespace.NamespaceService;
import org.apache.pulsar.broker.service.BrokerServiceException;
@@ -92,37 +102,42 @@ public class ServiceUnitStateChannelImpl implements ServiceUnitStateChannel {
TopicDomain.persistent.value(),
NamespaceName.SYSTEM_NAMESPACE,
"loadbalancer-service-unit-state").toString();
-
- // TODO: define StateCompactionStrategy
private static final long MAX_IN_FLIGHT_STATE_WAITING_TIME_IN_MILLIS = 30 * 1000; // 30sec
+
+ private static final long OWNERSHIP_MONITOR_DELAY_TIME_IN_SECS = 60;
public static final long MAX_CLEAN_UP_DELAY_TIME_IN_SECS = 3 * 60; // 3 mins
private static final long MIN_CLEAN_UP_DELAY_TIME_IN_SECS = 0; // 0 secs to clean immediately
-
private static final long MAX_CHANNEL_OWNER_ELECTION_WAITING_TIME_IN_SECS = 10;
private static final int MAX_OUTSTANDING_PUB_MESSAGES = 500;
private final PulsarService pulsar;
+ private final ServiceConfiguration config;
private final Schema schema;
private final ConcurrentOpenHashMap> getOwnerRequests;
private final String lookupServiceAddress;
- // TODO: define BrokerRegistry
private final ConcurrentOpenHashMap> cleanupJobs;
private final LeaderElectionService leaderElectionService;
+ private BrokerSelectionStrategy brokerSelector;
+ private BrokerRegistry brokerRegistry;
private TableView tableview;
private Producer producer;
- private ScheduledFuture> cleanupTasks;
+ private ScheduledFuture> monitorTask;
private SessionEvent lastMetadataSessionEvent = SessionReestablished;
private long lastMetadataSessionEventTimestamp = 0;
private long inFlightStateWaitingTimeInMillis;
+
+ private long ownershipMonitorDelayTimeInSecs;
+ private long semiTerminalStateWaitingTimeInMillis;
private long maxCleanupDelayTimeInSecs;
private long minCleanupDelayTimeInSecs;
// cleanup metrics
- private long totalCleanupCnt = 0;
- private long totalBrokerCleanupTombstoneCnt = 0;
- private long totalServiceUnitCleanupTombstoneCnt = 0;
+ private long totalInactiveBrokerCleanupCnt = 0;
+ private long totalServiceUnitTombstoneCleanupCnt = 0;
+
+ private long totalOrphanServiceUnitCleanupCnt = 0;
private AtomicLong totalCleanupErrorCnt = new AtomicLong();
- private long totalCleanupScheduledCnt = 0;
- private long totalCleanupIgnoredCnt = 0;
- private long totalCleanupCancelledCnt = 0;
+ private long totalInactiveBrokerCleanupScheduledCnt = 0;
+ private long totalInactiveBrokerCleanupIgnoredCnt = 0;
+ private long totalInactiveBrokerCleanupCancelledCnt = 0;
private volatile ChannelState channelState;
public enum EventType {
@@ -135,30 +150,18 @@ public enum EventType {
@Getter
@AllArgsConstructor
public static class Counters {
- private AtomicLong total;
- private AtomicLong failure;
+ private final AtomicLong total;
+ private final AtomicLong failure;
+ public Counters(){
+ total = new AtomicLong();
+ failure = new AtomicLong();
+ }
}
// operation metrics
- final Map ownerLookUpCounters = Map.of(
- Owned, new AtomicLong(),
- Assigned, new AtomicLong(),
- Released, new AtomicLong(),
- Splitting, new AtomicLong(),
- Free, new AtomicLong()
- );
- final Map eventCounters = Map.of(
- Assign, new Counters(new AtomicLong(), new AtomicLong()),
- Split, new Counters(new AtomicLong(), new AtomicLong()),
- Unload, new Counters(new AtomicLong(), new AtomicLong())
- );
- final Map handlerCounters = Map.of(
- Owned, new Counters(new AtomicLong(), new AtomicLong()),
- Assigned, new Counters(new AtomicLong(), new AtomicLong()),
- Released, new Counters(new AtomicLong(), new AtomicLong()),
- Splitting, new Counters(new AtomicLong(), new AtomicLong()),
- Free, new Counters(new AtomicLong(), new AtomicLong())
- );
+ final Map ownerLookUpCounters;
+ final Map eventCounters;
+ final Map handlerCounters;
enum ChannelState {
Closed(0),
@@ -180,25 +183,61 @@ enum MetadataState {
public ServiceUnitStateChannelImpl(PulsarService pulsar) {
this.pulsar = pulsar;
+ this.config = pulsar.getConfig();
this.lookupServiceAddress = pulsar.getLookupServiceAddress();
this.schema = Schema.JSON(ServiceUnitStateData.class);
this.getOwnerRequests = ConcurrentOpenHashMap.>newBuilder().build();
this.cleanupJobs = ConcurrentOpenHashMap.>newBuilder().build();
+ this.semiTerminalStateWaitingTimeInMillis = config.getLoadBalancerServiceUnitStateCleanUpDelayTimeInSeconds()
+ * 1000;
this.inFlightStateWaitingTimeInMillis = MAX_IN_FLIGHT_STATE_WAITING_TIME_IN_MILLIS;
+ this.ownershipMonitorDelayTimeInSecs = OWNERSHIP_MONITOR_DELAY_TIME_IN_SECS;
+ if (semiTerminalStateWaitingTimeInMillis < inFlightStateWaitingTimeInMillis) {
+ throw new IllegalArgumentException(
+ "Invalid Config: loadBalancerServiceUnitStateCleanUpDelayTimeInSeconds < "
+ + (MAX_IN_FLIGHT_STATE_WAITING_TIME_IN_MILLIS / 1000) + " secs");
+ }
this.maxCleanupDelayTimeInSecs = MAX_CLEAN_UP_DELAY_TIME_IN_SECS;
this.minCleanupDelayTimeInSecs = MIN_CLEAN_UP_DELAY_TIME_IN_SECS;
this.leaderElectionService = new LeaderElectionService(
pulsar.getCoordinationService(), pulsar.getSafeWebServiceAddress(),
state -> {
if (state == LeaderElectionState.Leading) {
- log.debug("This broker:{} is the leader now.", lookupServiceAddress);
- // TODO: schedule monitorOwnerships by brokerRegistry
+ log.info("This broker:{} is the leader now.", lookupServiceAddress);
+ this.monitorTask = this.pulsar.getLoadManagerExecutor()
+ .scheduleWithFixedDelay(() -> {
+ try {
+ monitorOwnerships(brokerRegistry.getAvailableBrokersAsync()
+ .get(inFlightStateWaitingTimeInMillis, MILLISECONDS));
+ } catch (Exception e) {
+ log.info("Failed to monitor the ownerships. will retry..", e);
+ }
+ },
+ ownershipMonitorDelayTimeInSecs, ownershipMonitorDelayTimeInSecs, SECONDS);
} else {
- log.debug("This broker:{} is a follower now.", lookupServiceAddress);
- // TODO: cancel scheduled monitorOwnerships if any
+ log.info("This broker:{} is a follower now.", lookupServiceAddress);
+ if (monitorTask != null) {
+ monitorTask.cancel(false);
+ monitorTask = null;
+ log.info("This previous leader broker:{} stopped the channel clean-up monitor",
+ lookupServiceAddress);
+ }
}
});
+ Map tmpOwnerLookUpCounters = new HashMap<>();
+ Map tmpHandlerCounters = new HashMap<>();
+ Map tmpEventCounters = new HashMap<>();
+ for (var state : ServiceUnitState.values()) {
+ tmpOwnerLookUpCounters.put(state, new AtomicLong());
+ tmpHandlerCounters.put(state, new Counters());
+ }
+ for (var event : EventType.values()) {
+ tmpEventCounters.put(event, new Counters());
+ }
+ ownerLookUpCounters = Map.copyOf(tmpOwnerLookUpCounters);
+ handlerCounters = Map.copyOf(tmpHandlerCounters);
+ eventCounters = Map.copyOf(tmpEventCounters);
this.channelState = Constructed;
}
@@ -207,14 +246,22 @@ public synchronized void start() throws PulsarServerException {
throw new IllegalStateException("Invalid channel state:" + channelState.name());
}
+ boolean debug = debug();
try {
+ this.brokerRegistry = getBrokerRegistry();
+ this.brokerRegistry.addListener(this::handleBrokerRegistrationEvent);
leaderElectionService.start();
this.channelState = LeaderElectionServiceStarted;
- log.debug("Successfully started the channel leader election service.");
+ if (debug) {
+ log.info("Successfully started the channel leader election service.");
+ }
+ brokerSelector = getBrokerSelector();
if (producer != null) {
producer.close();
- log.debug("Closed the channel producer.");
+ if (debug) {
+ log.info("Closed the channel producer.");
+ }
}
producer = pulsar.getClient().newProducer(schema)
.enableBatching(true)
@@ -223,11 +270,15 @@ public synchronized void start() throws PulsarServerException {
.topic(TOPIC)
.create();
- log.debug("Successfully started the channel producer.");
+ if (debug) {
+ log.info("Successfully started the channel producer.");
+ }
if (tableview != null) {
tableview.close();
- log.debug("Closed the channel tableview.");
+ if (debug) {
+ log.info("Closed the channel tableview.");
+ }
}
tableview = pulsar.getClient().newTableViewBuilder(schema)
.topic(TOPIC)
@@ -236,10 +287,13 @@ public synchronized void start() throws PulsarServerException {
ServiceUnitStateCompactionStrategy.class.getName()))
.create();
tableview.listen((key, value) -> handle(key, value));
- log.debug("Successfully started the channel tableview.");
-
+ if (debug) {
+ log.info("Successfully started the channel tableview.");
+ }
pulsar.getLocalMetadataStore().registerSessionListener(this::handleMetadataSessionEvent);
- log.debug("Successfully registered the handleMetadataSessionEvent");
+ if (debug) {
+ log.info("Successfully registered the handleMetadataSessionEvent");
+ }
channelState = Started;
log.info("Successfully started the channel.");
@@ -250,16 +304,39 @@ public synchronized void start() throws PulsarServerException {
}
}
+ @VisibleForTesting
+ protected BrokerRegistry getBrokerRegistry() {
+ return ((ExtensibleLoadManagerWrapper) pulsar.getLoadManager().get())
+ .get().getBrokerRegistry();
+ }
+
+ @VisibleForTesting
+ protected LoadManagerContext getContext() {
+ return ((ExtensibleLoadManagerWrapper) pulsar.getLoadManager().get())
+ .get().getContext();
+ }
+
+ @VisibleForTesting
+ protected BrokerSelectionStrategy getBrokerSelector() {
+ // TODO: make this selector configurable.
+ return new LeastResourceUsageWithWeight();
+ }
+
public synchronized void close() throws PulsarServerException {
channelState = Closed;
+ boolean debug = debug();
try {
leaderElectionService.close();
- log.debug("Successfully closed the channel leader election service.");
+ if (debug) {
+ log.info("Successfully closed the channel leader election service.");
+ }
if (tableview != null) {
tableview.close();
tableview = null;
- log.debug("Successfully closed the channel tableview.");
+ if (debug) {
+ log.info("Successfully closed the channel tableview.");
+ }
}
if (producer != null) {
@@ -268,11 +345,13 @@ public synchronized void close() throws PulsarServerException {
log.info("Successfully closed the channel producer.");
}
- // TODO: clean brokerRegistry
+ if (brokerRegistry != null) {
+ brokerRegistry = null;
+ }
- if (cleanupTasks != null) {
- cleanupTasks.cancel(true);
- cleanupTasks = null;
+ if (monitorTask != null) {
+ monitorTask.cancel(true);
+ monitorTask = null;
log.info("Successfully cancelled the cleanup tasks");
}
@@ -294,7 +373,7 @@ private boolean validateChannelState(ChannelState targetState, boolean checkLowe
}
private boolean debug() {
- return pulsar.getConfiguration().isLoadBalancerDebugModeEnabled() || log.isDebugEnabled();
+ return config.isLoadBalancerDebugModeEnabled() || log.isDebugEnabled();
}
public CompletableFuture> getChannelOwnerAsync() {
@@ -348,18 +427,22 @@ public CompletableFuture> getOwnerAsync(String serviceUnit) {
}
ServiceUnitStateData data = tableview.get(serviceUnit);
- ServiceUnitState state = data == null ? Free : data.state();
+ ServiceUnitState state = state(data);
ownerLookUpCounters.get(state).incrementAndGet();
switch (state) {
case Owned, Splitting -> {
return CompletableFuture.completedFuture(Optional.of(data.broker()));
}
- case Assigned, Released -> {
- return deferGetOwnerRequest(serviceUnit).thenApply(Optional::of);
+ case Assigning, Releasing -> {
+ return deferGetOwnerRequest(serviceUnit).thenApply(
+ broker -> broker == null ? Optional.empty() : Optional.of(broker));
}
- case Free -> {
+ case Init, Free -> {
return CompletableFuture.completedFuture(Optional.empty());
}
+ case Deleted -> {
+ return CompletableFuture.failedFuture(new IllegalArgumentException(serviceUnit + " is deleted."));
+ }
default -> {
String errorMsg = String.format("Failed to process service unit state data: %s when get owner.", data);
log.error(errorMsg);
@@ -372,7 +455,7 @@ public CompletableFuture publishAssignEventAsync(String serviceUnit, Str
EventType eventType = Assign;
eventCounters.get(eventType).getTotal().incrementAndGet();
CompletableFuture getOwnerRequest = deferGetOwnerRequest(serviceUnit);
- pubAsync(serviceUnit, new ServiceUnitStateData(Assigned, broker))
+ pubAsync(serviceUnit, new ServiceUnitStateData(Assigning, broker))
.whenComplete((__, ex) -> {
if (ex != null) {
getOwnerRequests.remove(serviceUnit, getOwnerRequest);
@@ -391,11 +474,11 @@ public CompletableFuture publishUnloadEventAsync(Unload unload) {
String serviceUnit = unload.serviceUnit();
CompletableFuture future;
if (isTransferCommand(unload)) {
- ServiceUnitStateData next = new ServiceUnitStateData(Assigned,
- unload.destBroker().get(), unload.sourceBroker());
- future = pubAsync(serviceUnit, next);
+ future = pubAsync(serviceUnit, new ServiceUnitStateData(
+ Assigning, unload.destBroker().get(), unload.sourceBroker()));
} else {
- future = tombstoneAsync(serviceUnit);
+ future = pubAsync(serviceUnit, new ServiceUnitStateData(
+ Releasing, unload.sourceBroker()));
}
return future.whenComplete((__, ex) -> {
@@ -424,14 +507,16 @@ private void handle(String serviceUnit, ServiceUnitStateData data) {
lookupServiceAddress, serviceUnit, data, totalHandledRequests);
}
- ServiceUnitState state = data == null ? Free : data.state();
+ ServiceUnitState state = state(data);
try {
switch (state) {
case Owned -> handleOwnEvent(serviceUnit, data);
- case Assigned -> handleAssignEvent(serviceUnit, data);
- case Released -> handleReleaseEvent(serviceUnit, data);
+ case Assigning -> handleAssignEvent(serviceUnit, data);
+ case Releasing -> handleReleaseEvent(serviceUnit, data);
case Splitting -> handleSplitEvent(serviceUnit, data);
- case Free -> handleFreeEvent(serviceUnit);
+ case Deleted -> handleDeleteEvent(serviceUnit, data);
+ case Free -> handleFreeEvent(serviceUnit, data);
+ case Init -> handleInitEvent(serviceUnit);
default -> throw new IllegalStateException("Failed to handle channel data:" + data);
}
} catch (Throwable e){
@@ -453,7 +538,7 @@ private static boolean isTransferCommand(Unload data) {
}
private static String getLogEventTag(ServiceUnitStateData data) {
- return data == null ? "Free" :
+ return data == null ? Init.toString() :
isTransferCommand(data) ? "Transfer:" + data.state() : data.state().toString();
}
@@ -466,7 +551,7 @@ private AtomicLong getHandlerFailureCounter(ServiceUnitStateData data) {
}
private AtomicLong getHandlerCounter(ServiceUnitStateData data, boolean total) {
- var state = data == null ? Free : data.state();
+ var state = state(data);
var counter = total
? handlerCounters.get(state).getTotal() : handlerCounters.get(state).getFailure();
if (counter == null) {
@@ -512,22 +597,30 @@ private void handleOwnEvent(String serviceUnit, ServiceUnitStateData data) {
}
private void handleAssignEvent(String serviceUnit, ServiceUnitStateData data) {
- deferGetOwnerRequest(serviceUnit);
if (isTargetBroker(data.broker())) {
ServiceUnitStateData next = new ServiceUnitStateData(
- isTransferCommand(data) ? Released : Owned, data.broker(), data.sourceBroker());
+ isTransferCommand(data) ? Releasing : Owned, data.broker(), data.sourceBroker());
pubAsync(serviceUnit, next)
.whenComplete((__, e) -> log(e, serviceUnit, data, next));
}
}
private void handleReleaseEvent(String serviceUnit, ServiceUnitStateData data) {
- if (isTargetBroker(data.sourceBroker())) {
- ServiceUnitStateData next = new ServiceUnitStateData(Owned, data.broker(), data.sourceBroker());
- // TODO: when close, pass message to clients to connect to the new broker
- closeServiceUnit(serviceUnit)
- .thenCompose(__ -> pubAsync(serviceUnit, next))
- .whenComplete((__, e) -> log(e, serviceUnit, data, next));
+ if (isTransferCommand(data)) {
+ if (isTargetBroker(data.sourceBroker())) {
+ ServiceUnitStateData next = new ServiceUnitStateData(Owned, data.broker(), data.sourceBroker());
+ // TODO: when close, pass message to clients to connect to the new broker
+ closeServiceUnit(serviceUnit)
+ .thenCompose(__ -> pubAsync(serviceUnit, next))
+ .whenComplete((__, e) -> log(e, serviceUnit, data, next));
+ }
+ } else {
+ if (isTargetBroker(data.broker())) {
+ ServiceUnitStateData next = new ServiceUnitStateData(Free, data.broker());
+ closeServiceUnit(serviceUnit)
+ .thenCompose(__ -> pubAsync(serviceUnit, next))
+ .whenComplete((__, e) -> log(e, serviceUnit, data, next));
+ }
}
}
@@ -538,16 +631,32 @@ private void handleSplitEvent(String serviceUnit, ServiceUnitStateData data) {
}
}
- private void handleFreeEvent(String serviceUnit) {
- closeServiceUnit(serviceUnit)
- .thenAccept(__ -> {
- var request = getOwnerRequests.remove(serviceUnit);
- if (request != null) {
- request.completeExceptionally(new IllegalStateException("The ownership has been unloaded. "
- + "No owner is found for serviceUnit: " + serviceUnit));
- }
- })
- .whenComplete((__, e) -> log(e, serviceUnit, null, null));
+ private void handleFreeEvent(String serviceUnit, ServiceUnitStateData data) {
+ var getOwnerRequest = getOwnerRequests.remove(serviceUnit);
+ if (getOwnerRequest != null) {
+ getOwnerRequest.complete(null);
+ }
+ if (isTargetBroker(data.broker())) {
+ log(null, serviceUnit, data, null);
+ }
+ }
+
+ private void handleDeleteEvent(String serviceUnit, ServiceUnitStateData data) {
+ var getOwnerRequest = getOwnerRequests.remove(serviceUnit);
+ if (getOwnerRequest != null) {
+ getOwnerRequest.completeExceptionally(new IllegalStateException(serviceUnit + "has been deleted."));
+ }
+ if (isTargetBroker(data.broker())) {
+ log(null, serviceUnit, data, null);
+ }
+ }
+
+ private void handleInitEvent(String serviceUnit) {
+ var getOwnerRequest = getOwnerRequests.remove(serviceUnit);
+ if (getOwnerRequest != null) {
+ getOwnerRequest.complete(null);
+ }
+ log(null, serviceUnit, null, null);
}
private CompletableFuture pubAsync(String serviceUnit, ServiceUnitStateData data) {
@@ -702,8 +811,8 @@ protected void splitServiceUnitOnceAndRetry(NamespaceService namespaceService,
});
updateFuture.thenAccept(r -> {
- // Free the old bundle
- tombstoneAsync(serviceUnit).thenRun(() -> {
+ // Delete the old bundle
+ pubAsync(serviceUnit, new ServiceUnitStateData(Deleted, data.broker())).thenRun(() -> {
// Update bundled_topic cache for load-report-generation
pulsar.getBrokerService().refreshTopicToStatsMaps(bundle);
// TODO: Update the load data immediately if needed.
@@ -723,6 +832,8 @@ protected void splitServiceUnitOnceAndRetry(NamespaceService namespaceService,
Throwable throwable = FutureUtil.unwrapCompletionException(ex);
if ((throwable instanceof MetadataStoreException.BadVersionException)
&& (counter.incrementAndGet() < NamespaceService.BUNDLE_SPLIT_RETRY_LIMIT)) {
+ log.warn("Failed to update bundle range in metadata store. Retrying {} th / {} limit",
+ counter.get(), NamespaceService.BUNDLE_SPLIT_RETRY_LIMIT, ex);
pulsar.getExecutor().schedule(() -> splitServiceUnitOnceAndRetry(namespaceService, bundleFactory,
bundle, serviceUnit, data, counter, startTime, completionFuture), 100, MILLISECONDS);
} else if (throwable instanceof IllegalArgumentException) {
@@ -748,8 +859,10 @@ public void handleMetadataSessionEvent(SessionEvent e) {
public void handleBrokerRegistrationEvent(String broker, NotificationType type) {
if (type == NotificationType.Created) {
+ log.info("BrokerRegistry detected the broker:{} registry has been created.", broker);
handleBrokerCreationEvent(broker);
} else if (type == NotificationType.Deleted) {
+ log.info("BrokerRegistry detected the broker:{} registry has been deleted.", broker);
handleBrokerDeletionEvent(broker);
}
}
@@ -769,7 +882,7 @@ private void handleBrokerCreationEvent(String broker) {
CompletableFuture future = cleanupJobs.remove(broker);
if (future != null) {
future.cancel(false);
- totalCleanupCancelledCnt++;
+ totalInactiveBrokerCleanupCancelledCnt++;
log.info("Successfully cancelled the ownership cleanup for broker:{}."
+ " Active cleanup job count:{}",
broker, cleanupJobs.size());
@@ -792,7 +905,7 @@ private void handleBrokerDeletionEvent(String broker) {
case Stable -> scheduleCleanup(broker, minCleanupDelayTimeInSecs);
case Jittery -> scheduleCleanup(broker, maxCleanupDelayTimeInSecs);
case Unstable -> {
- totalCleanupIgnoredCnt++;
+ totalInactiveBrokerCleanupIgnoredCnt++;
log.error("MetadataState state is unstable. "
+ "Ignoring the ownership cleanup request for the reported broker :{}", broker);
}
@@ -803,7 +916,7 @@ private void scheduleCleanup(String broker, long delayInSecs) {
cleanupJobs.computeIfAbsent(broker, k -> {
Executor delayed = CompletableFuture
.delayedExecutor(delayInSecs, TimeUnit.SECONDS, pulsar.getLoadManagerExecutor());
- totalCleanupScheduledCnt++;
+ totalInactiveBrokerCleanupScheduledCnt++;
return CompletableFuture
.runAsync(() -> {
try {
@@ -821,27 +934,48 @@ private void scheduleCleanup(String broker, long delayInSecs) {
broker, delayInSecs, cleanupJobs.size());
}
+ private void overrideOwnership(String serviceUnit, ServiceUnitStateData orphanData, Set availableBrokers) {
+
+ Optional selectedBroker = brokerSelector.select(availableBrokers, null, getContext());
+ if (selectedBroker.isPresent()) {
+ var override = new ServiceUnitStateData(Owned, selectedBroker.get(), true);
+ log.info("Overriding ownership serviceUnit:{} from orphanData:{} to overrideData:{}",
+ serviceUnit, orphanData, override);
+ pubAsync(serviceUnit, override).whenComplete((__, e) -> {
+ if (e != null) {
+ log.error("Failed to override serviceUnit:{} from orphanData:{} to overrideData:{}",
+ serviceUnit, orphanData, override, e);
+ }
+ });
+ } else {
+ log.error("Failed to override the ownership serviceUnit:{} orphanData:{}. Empty selected broker.",
+ serviceUnit, orphanData);
+ }
+ }
+
- private void doCleanup(String broker) {
+ private void doCleanup(String broker) throws ExecutionException, InterruptedException, TimeoutException {
long startTime = System.nanoTime();
log.info("Started ownership cleanup for the inactive broker:{}", broker);
- int serviceUnitTombstoneCnt = 0;
+ int orphanServiceUnitCleanupCnt = 0;
long totalCleanupErrorCntStart = totalCleanupErrorCnt.get();
- for (Map.Entry etr : tableview.entrySet()) {
- ServiceUnitStateData stateData = etr.getValue();
- String serviceUnit = etr.getKey();
- if (StringUtils.equals(broker, stateData.broker())
- || StringUtils.equals(broker, stateData.sourceBroker())) {
- log.info("Cleaning ownership serviceUnit:{}, stateData:{}.", serviceUnit, stateData);
- tombstoneAsync(serviceUnit).whenComplete((__, e) -> {
- if (e != null) {
- log.error("Failed cleaning the ownership serviceUnit:{}, stateData:{}, "
- + "cleanupErrorCnt:{}.",
- serviceUnit, stateData,
- totalCleanupErrorCnt.incrementAndGet() - totalCleanupErrorCntStart);
- }
- });
- serviceUnitTombstoneCnt++;
+ var availableBrokers = new HashSet<>(brokerRegistry.getAvailableBrokersAsync()
+ .get(inFlightStateWaitingTimeInMillis, MILLISECONDS));
+ for (var etr : tableview.entrySet()) {
+ var stateData = etr.getValue();
+ var serviceUnit = etr.getKey();
+ var state = state(stateData);
+ if (StringUtils.equals(broker, stateData.broker())) {
+ if (ServiceUnitState.isInFlightState(state) || state == Owned) {
+ overrideOwnership(serviceUnit, stateData, availableBrokers);
+ orphanServiceUnitCleanupCnt++;
+ }
+
+ } else if (StringUtils.equals(broker, stateData.sourceBroker())) {
+ if (ServiceUnitState.isInFlightState(state)) {
+ overrideOwnership(serviceUnit, stateData, availableBrokers);
+ orphanServiceUnitCleanupCnt++;
+ }
}
}
@@ -851,28 +985,51 @@ private void doCleanup(String broker) {
log.error("Failed to flush the in-flight messages.", e);
}
- if (serviceUnitTombstoneCnt > 0) {
- this.totalCleanupCnt++;
- this.totalServiceUnitCleanupTombstoneCnt += serviceUnitTombstoneCnt;
- this.totalBrokerCleanupTombstoneCnt++;
+ if (orphanServiceUnitCleanupCnt > 0) {
+ this.totalOrphanServiceUnitCleanupCnt += orphanServiceUnitCleanupCnt;
+ this.totalInactiveBrokerCleanupCnt++;
}
double cleanupTime = TimeUnit.NANOSECONDS
.toMillis((System.nanoTime() - startTime));
// TODO: clean load data stores
log.info("Completed a cleanup for the inactive broker:{} in {} ms. "
- + "Published tombstone for orphan service units: serviceUnitTombstoneCnt:{}, "
+ + "Cleaned up orphan service units: orphanServiceUnitCleanupCnt:{}, "
+ "approximate cleanupErrorCnt:{}, metrics:{} ",
broker,
cleanupTime,
- serviceUnitTombstoneCnt,
+ orphanServiceUnitCleanupCnt,
totalCleanupErrorCntStart - totalCleanupErrorCnt.get(),
printCleanupMetrics());
cleanupJobs.remove(broker);
}
- // TODO: integrate this monitor logic when broker registry is added
- private void monitorOwnerships(List brokers) {
+ private Optional getOverrideStateData(String serviceUnit, ServiceUnitStateData orphanData,
+ Set availableBrokers,
+ LoadManagerContext context) {
+ if (isTransferCommand(orphanData)) {
+ // rollback to the src
+ return Optional.of(new ServiceUnitStateData(Owned, orphanData.sourceBroker(), true));
+ } else if (orphanData.state() == Assigning) { // assign
+ // roll-forward to another broker
+ Optional selectedBroker = brokerSelector.select(availableBrokers, null, context);
+ if (selectedBroker.isEmpty()) {
+ return Optional.empty();
+ }
+ return Optional.of(new ServiceUnitStateData(Owned, selectedBroker.get(), true));
+ } else if (orphanData.state() == Splitting || orphanData.state() == Releasing) {
+ // rollback to the target broker for split and unload
+ return Optional.of(new ServiceUnitStateData(Owned, orphanData.broker(), true));
+ } else {
+ var msg = String.format("Failed to get the overrideStateData from serviceUnit=%s, orphanData=%s",
+ serviceUnit, orphanData);
+ log.error(msg);
+ throw new IllegalStateException(msg);
+ }
+ }
+
+ @VisibleForTesting
+ protected void monitorOwnerships(List brokers) {
if (!isChannelOwner()) {
log.warn("This broker is not the leader now. Skipping ownership monitor");
return;
@@ -886,34 +1043,69 @@ private void monitorOwnerships(List brokers) {
long startTime = System.nanoTime();
Set inactiveBrokers = new HashSet<>();
Set activeBrokers = new HashSet<>(brokers);
- int serviceUnitTombstoneCnt = 0;
+ Map orphanServiceUnits = new HashMap<>();
+ int serviceUnitTombstoneCleanupCnt = 0;
+ int orphanServiceUnitCleanupCnt = 0;
long totalCleanupErrorCntStart = totalCleanupErrorCnt.get();
long now = System.currentTimeMillis();
for (Map.Entry etr : tableview.entrySet()) {
String serviceUnit = etr.getKey();
ServiceUnitStateData stateData = etr.getValue();
String broker = stateData.broker();
+ var state = stateData.state();
if (!activeBrokers.contains(broker)) {
inactiveBrokers.add(stateData.broker());
- } else if (stateData.state() != Owned
+ } else if (state != Owned
&& now - stateData.timestamp() > inFlightStateWaitingTimeInMillis) {
- log.warn("Found long-running orphan(in-flight) serviceUnit:{}, stateData:{}",
- serviceUnit, stateData);
-
- tombstoneAsync(serviceUnit).whenComplete((__, e) -> {
- if (e != null) {
- log.error("Failed cleaning the ownership serviceUnit:{}, stateData:{}, "
- + "cleanupErrorCnt:{}.",
- serviceUnit, stateData,
- totalCleanupErrorCnt.incrementAndGet() - totalCleanupErrorCntStart);
+ if (state == Deleted || state == Free) {
+ if (now - stateData.timestamp()
+ > semiTerminalStateWaitingTimeInMillis) {
+ log.info("Found semi-terminal states to tombstone"
+ + " serviceUnit:{}, stateData:{}", serviceUnit, stateData);
+ tombstoneAsync(serviceUnit).whenComplete((__, e) -> {
+ if (e != null) {
+ log.error("Failed cleaning the ownership serviceUnit:{}, stateData:{}, "
+ + "cleanupErrorCnt:{}.",
+ serviceUnit, stateData,
+ totalCleanupErrorCnt.incrementAndGet() - totalCleanupErrorCntStart, e);
+ }
+ });
+ serviceUnitTombstoneCleanupCnt++;
}
- });
- serviceUnitTombstoneCnt++;
+ } else {
+ log.warn("Found orphan serviceUnit:{}, stateData:{}", serviceUnit, stateData);
+ orphanServiceUnits.put(serviceUnit, stateData);
+ }
}
}
- for (String inactiveBroker : inactiveBrokers) {
- handleBrokerDeletionEvent(inactiveBroker);
+ // Skip cleaning orphan bundles if inactiveBrokers exist. This is a bigger problem.
+ if (!inactiveBrokers.isEmpty()) {
+ for (String inactiveBroker : inactiveBrokers) {
+ handleBrokerDeletionEvent(inactiveBroker);
+ }
+ } else if (!orphanServiceUnits.isEmpty()) {
+ var context = getContext();
+ for (var etr : orphanServiceUnits.entrySet()) {
+ var orphanServiceUnit = etr.getKey();
+ var orphanData = etr.getValue();
+ var overrideData = getOverrideStateData(
+ orphanServiceUnit, orphanData, activeBrokers, context);
+ if (overrideData.isPresent()) {
+ pubAsync(orphanServiceUnit, overrideData.get()).whenComplete((__, e) -> {
+ if (e != null) {
+ log.error("Failed cleaning the ownership orphanServiceUnit:{}, orphanData:{}, "
+ + "cleanupErrorCnt:{}.",
+ orphanServiceUnit, orphanData,
+ totalCleanupErrorCnt.incrementAndGet() - totalCleanupErrorCntStart, e);
+ }
+ });
+ orphanServiceUnitCleanupCnt++;
+ } else {
+ log.warn("Failed get the overrideStateData from orphanServiceUnit:{}, orphanData:{}. will retry..",
+ orphanServiceUnit, orphanData);
+ }
+ }
}
try {
@@ -922,20 +1114,25 @@ private void monitorOwnerships(List brokers) {
log.error("Failed to flush the in-flight messages.", e);
}
- if (serviceUnitTombstoneCnt > 0) {
- this.totalServiceUnitCleanupTombstoneCnt += serviceUnitTombstoneCnt;
+ if (serviceUnitTombstoneCleanupCnt > 0) {
+ this.totalServiceUnitTombstoneCleanupCnt += serviceUnitTombstoneCleanupCnt;
+ }
+
+ if (orphanServiceUnitCleanupCnt > 0) {
+ this.totalOrphanServiceUnitCleanupCnt += orphanServiceUnitCleanupCnt;
}
double monitorTime = TimeUnit.NANOSECONDS
.toMillis((System.nanoTime() - startTime));
log.info("Completed the ownership monitor run in {} ms. "
- + "Scheduled cleanups for inactiveBrokers:{}. inactiveBrokerCount:{}. "
- + "Published tombstone for orphan service units: serviceUnitTombstoneCnt:{}, "
- + "approximate cleanupErrorCnt:{}, metrics:{} ",
+ + "Scheduled cleanups for inactive brokers:{}. inactiveBrokerCount:{}. "
+ + "Published cleanups for orphan service units, orphanServiceUnitCleanupCnt:{}. "
+ + "Tombstoned semi-terminal state service units, serviceUnitTombstoneCleanupCnt:{}. "
+ + "Approximate cleanupErrorCnt:{}, metrics:{}. ",
monitorTime,
- inactiveBrokers,
- inactiveBrokers.size(),
- serviceUnitTombstoneCnt,
+ inactiveBrokers, inactiveBrokers.size(),
+ orphanServiceUnitCleanupCnt,
+ serviceUnitTombstoneCleanupCnt,
totalCleanupErrorCntStart - totalCleanupErrorCnt.get(),
printCleanupMetrics());
@@ -943,17 +1140,19 @@ private void monitorOwnerships(List brokers) {
private String printCleanupMetrics() {
return String.format(
- "{totalCleanupCnt:%d, totalBrokerCleanupTombstoneCnt:%d, "
- + "totalServiceUnitCleanupTombstoneCnt:%d, totalCleanupErrorCnt:%d, "
- + "totalCleanupScheduledCnt%d, totalCleanupIgnoredCnt:%d, totalCleanupCancelledCnt:%d, "
+ "{totalInactiveBrokerCleanupCnt:%d, "
+ + "totalServiceUnitTombstoneCleanupCnt:%d, totalOrphanServiceUnitCleanupCnt:%d, "
+ + "totalCleanupErrorCnt:%d, "
+ + "totalInactiveBrokerCleanupScheduledCnt%d, totalInactiveBrokerCleanupIgnoredCnt:%d, "
+ + "totalInactiveBrokerCleanupCancelledCnt:%d, "
+ " activeCleanupJobs:%d}",
- totalCleanupCnt,
- totalBrokerCleanupTombstoneCnt,
- totalServiceUnitCleanupTombstoneCnt,
+ totalInactiveBrokerCleanupCnt,
+ totalServiceUnitTombstoneCleanupCnt,
+ totalOrphanServiceUnitCleanupCnt,
totalCleanupErrorCnt.get(),
- totalCleanupScheduledCnt,
- totalCleanupIgnoredCnt,
- totalCleanupCancelledCnt,
+ totalInactiveBrokerCleanupScheduledCnt,
+ totalInactiveBrokerCleanupIgnoredCnt,
+ totalInactiveBrokerCleanupCancelledCnt,
cleanupJobs.size()
);
}
@@ -1018,15 +1217,6 @@ public List getMetrics() {
}
}
-
- {
- var dim = new HashMap<>(dimensions);
- dim.put("result", "Total");
- var metric = Metrics.create(dim);
- metric.put("brk_sunit_state_chn_cleanup_ops_total", totalCleanupCnt);
- metrics.add(metric);
- }
-
{
var dim = new HashMap<>(dimensions);
dim.put("result", "Failure");
@@ -1039,7 +1229,7 @@ public List getMetrics() {
var dim = new HashMap<>(dimensions);
dim.put("result", "Skip");
var metric = Metrics.create(dim);
- metric.put("brk_sunit_state_chn_cleanup_ops_total", totalCleanupIgnoredCnt);
+ metric.put("brk_sunit_state_chn_inactive_broker_cleanup_ops_total", totalInactiveBrokerCleanupIgnoredCnt);
metrics.add(metric);
}
@@ -1047,7 +1237,7 @@ public List getMetrics() {
var dim = new HashMap<>(dimensions);
dim.put("result", "Cancel");
var metric = Metrics.create(dim);
- metric.put("brk_sunit_state_chn_cleanup_ops_total", totalCleanupCancelledCnt);
+ metric.put("brk_sunit_state_chn_inactive_broker_cleanup_ops_total", totalInactiveBrokerCleanupCancelledCnt);
metrics.add(metric);
}
@@ -1055,13 +1245,14 @@ public List getMetrics() {
var dim = new HashMap<>(dimensions);
dim.put("result", "Schedule");
var metric = Metrics.create(dim);
- metric.put("brk_sunit_state_chn_cleanup_ops_total", totalCleanupScheduledCnt);
+ metric.put("brk_sunit_state_chn_inactive_broker_cleanup_ops_total", totalInactiveBrokerCleanupScheduledCnt);
metrics.add(metric);
}
var metric = Metrics.create(dimensions);
- metric.put("brk_sunit_state_chn_broker_cleanup_ops_total", totalBrokerCleanupTombstoneCnt);
- metric.put("brk_sunit_state_chn_su_cleanup_ops_total", totalServiceUnitCleanupTombstoneCnt);
+ metric.put("brk_sunit_state_chn_inactive_broker_cleanup_ops_total", totalInactiveBrokerCleanupCnt);
+ metric.put("brk_sunit_state_chn_orphan_su_cleanup_ops_total", totalOrphanServiceUnitCleanupCnt);
+ metric.put("brk_sunit_state_chn_su_tombstone_cleanup_ops_total", totalServiceUnitTombstoneCleanupCnt);
metrics.add(metric);
return metrics;
diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateCompactionStrategy.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateCompactionStrategy.java
index 2b21f830dda92..d2a585af9d9d5 100644
--- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateCompactionStrategy.java
+++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateCompactionStrategy.java
@@ -18,13 +18,11 @@
*/
package org.apache.pulsar.broker.loadbalance.extensions.channel;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Assigned;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Free;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Owned;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Released;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Splitting;
+import static org.apache.commons.lang3.StringUtils.isBlank;
+import static org.apache.commons.lang3.StringUtils.isNotBlank;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateData.state;
import com.google.common.annotations.VisibleForTesting;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.lang3.StringUtils;
import org.apache.pulsar.client.api.Schema;
import org.apache.pulsar.common.topics.TopicCompactionStrategy;
@@ -50,40 +48,69 @@ public void checkBrokers(boolean check) {
@Override
public boolean shouldKeepLeft(ServiceUnitStateData from, ServiceUnitStateData to) {
- ServiceUnitState prevState = from == null ? Free : from.state();
- ServiceUnitState state = to == null ? Free : to.state();
+ if (to == null) {
+ return false;
+ } else if (to.force()) {
+ return false;
+ }
+
+
+ ServiceUnitState prevState = state(from);
+ ServiceUnitState state = state(to);
+
if (!ServiceUnitState.isValidTransition(prevState, state)) {
return true;
}
if (checkBrokers) {
- if (prevState == Free && (state == Assigned || state == Owned)) {
- // Free -> Assigned || Owned broker check
- return StringUtils.isBlank(to.broker());
- } else if (prevState == Owned && state == Assigned) {
- // Owned -> Assigned(transfer) broker check
- return !StringUtils.equals(from.broker(), to.sourceBroker())
- || StringUtils.isBlank(to.broker())
- || StringUtils.equals(from.broker(), to.broker());
- } else if (prevState == Assigned && state == Released) {
- // Assigned -> Released(transfer) broker check
- return !StringUtils.equals(from.broker(), to.broker())
- || !StringUtils.equals(from.sourceBroker(), to.sourceBroker());
- } else if (prevState == Released && state == Owned) {
- // Released -> Owned(transfer) broker check
- return !StringUtils.equals(from.broker(), to.broker())
- || !StringUtils.equals(from.sourceBroker(), to.sourceBroker());
- } else if (prevState == Assigned && state == Owned) {
- // Assigned -> Owned broker check
- return !StringUtils.equals(from.broker(), to.broker())
- || !StringUtils.equals(from.sourceBroker(), to.sourceBroker());
- } else if (prevState == Owned && state == Splitting) {
- // Owned -> Splitting broker check
- return !StringUtils.equals(from.broker(), to.broker());
+ switch (prevState) {
+ case Owned:
+ switch (state) {
+ case Assigning:
+ return invalidTransfer(from, to);
+ case Splitting:
+ case Releasing:
+ return isNotBlank(to.sourceBroker()) || targetNotEquals(from, to);
+ }
+ case Assigning:
+ switch (state) {
+ case Releasing:
+ return isBlank(to.sourceBroker()) || notEquals(from, to);
+ case Owned:
+ return isNotBlank(to.sourceBroker()) || targetNotEquals(from, to);
+ }
+ case Releasing:
+ switch (state) {
+ case Owned:
+ case Free:
+ return notEquals(from, to);
+ }
+ case Splitting:
+ switch (state) {
+ case Deleted:
+ return notEquals(from, to);
+ }
+ case Free:
+ switch (state) {
+ case Assigning:
+ return isNotBlank(to.sourceBroker());
+ }
}
}
-
return false;
}
-}
+ private boolean targetNotEquals(ServiceUnitStateData from, ServiceUnitStateData to) {
+ return !from.broker().equals(to.broker());
+ }
+
+ private boolean notEquals(ServiceUnitStateData from, ServiceUnitStateData to) {
+ return !from.broker().equals(to.broker())
+ || !StringUtils.equals(from.sourceBroker(), to.sourceBroker());
+ }
+
+ private boolean invalidTransfer(ServiceUnitStateData from, ServiceUnitStateData to) {
+ return !from.broker().equals(to.sourceBroker())
+ || from.broker().equals(to.broker());
+ }
+}
\ No newline at end of file
diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateData.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateData.java
index cba459b7875f7..6a04431de64d5 100644
--- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateData.java
+++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateData.java
@@ -20,24 +20,36 @@
import java.util.Objects;
+import org.apache.commons.lang3.StringUtils;
/**
* Defines data for the service unit state changes.
* This data will be broadcast in ServiceUnitStateChannel.
*/
-public record ServiceUnitStateData(ServiceUnitState state, String broker, String sourceBroker, long timestamp) {
+public record ServiceUnitStateData(
+ ServiceUnitState state, String broker, String sourceBroker, boolean force, long timestamp) {
public ServiceUnitStateData {
Objects.requireNonNull(state);
- Objects.requireNonNull(broker);
+ if (StringUtils.isBlank(broker)) {
+ throw new IllegalArgumentException("Empty broker");
+ }
}
public ServiceUnitStateData(ServiceUnitState state, String broker, String sourceBroker) {
- this(state, broker, sourceBroker, System.currentTimeMillis());
+ this(state, broker, sourceBroker, false, System.currentTimeMillis());
}
public ServiceUnitStateData(ServiceUnitState state, String broker) {
- this(state, broker, null, System.currentTimeMillis());
+ this(state, broker, null, false, System.currentTimeMillis());
+ }
+
+ public ServiceUnitStateData(ServiceUnitState state, String broker, boolean force) {
+ this(state, broker, null, force, System.currentTimeMillis());
+ }
+
+ public static ServiceUnitState state(ServiceUnitStateData data) {
+ return data == null ? ServiceUnitState.Init : data.state();
}
}
diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/compaction/StrategicTwoPhaseCompactor.java b/pulsar-broker/src/main/java/org/apache/pulsar/compaction/StrategicTwoPhaseCompactor.java
index 9dc4ec649b62b..37b03e275d6bf 100644
--- a/pulsar-broker/src/main/java/org/apache/pulsar/compaction/StrategicTwoPhaseCompactor.java
+++ b/pulsar-broker/src/main/java/org/apache/pulsar/compaction/StrategicTwoPhaseCompactor.java
@@ -38,7 +38,6 @@
import org.apache.pulsar.client.api.Message;
import org.apache.pulsar.client.api.MessageId;
import org.apache.pulsar.client.api.PulsarClient;
-import org.apache.pulsar.client.api.PulsarClientException;
import org.apache.pulsar.client.api.Reader;
import org.apache.pulsar.client.impl.BatchMessageIdImpl;
import org.apache.pulsar.client.impl.CompactionReaderImpl;
@@ -63,6 +62,7 @@
public class StrategicTwoPhaseCompactor extends TwoPhaseCompactor {
private static final Logger log = LoggerFactory.getLogger(StrategicTwoPhaseCompactor.class);
private static final int MAX_OUTSTANDING = 500;
+ private static final int MAX_READER_RECONNECT_WAITING_TIME_IN_MILLIS = 20 * 1000;
private final Duration phaseOneLoopReadTimeout;
private final RawBatchMessageContainerImpl batchMessageContainer;
@@ -110,7 +110,7 @@ CompletableFuture doCompaction(Reader reader, TopicCompactionStrate
if (!(reader instanceof CompactionReaderImpl)) {
return CompletableFuture.failedFuture(
- new IllegalStateException("reader has to be DelayedAckReaderImpl"));
+ new IllegalStateException("reader has to be CompactionReaderImpl"));
}
return reader.hasMessageAvailableAsync()
.thenCompose(available -> {
@@ -284,9 +284,12 @@ private void phaseOneLoop(Reader reader, CompletableFuture void phaseOneLoop(Reader reader, CompletableFuture void waitForReconnection(Reader reader) {
+ long started = System.currentTimeMillis();
+
+ // initial sleep
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ }
+ while (!reader.isConnected()) {
+ long now = System.currentTimeMillis();
+ if (now - started > MAX_READER_RECONNECT_WAITING_TIME_IN_MILLIS) {
+ String errorMsg = String.format(
+ "Reader has not been reconnected for %d secs. Stopping the compaction.",
+ MAX_READER_RECONNECT_WAITING_TIME_IN_MILLIS / 1000);
+ log.error(errorMsg);
+ throw new RuntimeException(errorMsg);
+ }
+ log.warn(
+ "Reader has not been reconnected after the cursor reset. elapsed :{} ms. Retrying "
+ + "soon.", now - started);
+ try {
+ Thread.sleep(100);
+ } catch (InterruptedException e) {
+ log.warn("The thread got interrupted while waiting. continuing", e);
+ }
+ }
+ }
+
private CompletableFuture phaseTwo(PhaseOneResult phaseOneResult, Reader reader, BookKeeper bk) {
log.info("Completed phase one. Result:{}. ", phaseOneResult);
Map metadata =
diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/ExtensibleLoadManagerImplTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/ExtensibleLoadManagerImplTest.java
index 1ef4f660e4af3..001aac34a4ba2 100644
--- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/ExtensibleLoadManagerImplTest.java
+++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/ExtensibleLoadManagerImplTest.java
@@ -18,14 +18,6 @@
*/
package org.apache.pulsar.broker.loadbalance.extensions;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Assigned;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Free;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Owned;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Released;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Splitting;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateChannelImpl.EventType.Assign;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateChannelImpl.EventType.Split;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateChannelImpl.EventType.Unload;
import static org.apache.pulsar.broker.loadbalance.extensions.models.SplitDecision.Reason.Admin;
import static org.apache.pulsar.broker.loadbalance.extensions.models.SplitDecision.Reason.Bandwidth;
import static org.apache.pulsar.broker.loadbalance.extensions.models.SplitDecision.Reason.MsgRate;
@@ -53,6 +45,7 @@
import static org.testng.Assert.assertTrue;
import com.google.common.collect.Sets;
+import java.util.LinkedHashMap;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
@@ -73,6 +66,7 @@
import org.apache.pulsar.broker.loadbalance.BrokerFilterException;
import org.apache.pulsar.broker.loadbalance.LeaderBroker;
import org.apache.pulsar.broker.loadbalance.LeaderElectionService;
+import org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState;
import org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateChannel;
import org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateChannelImpl;
import org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateData;
@@ -342,17 +336,19 @@ public void testGetMetrics() throws Exception {
FieldUtils.writeDeclaredField(unloadCounter, "loadAvg", 1.5, true);
FieldUtils.writeDeclaredField(unloadCounter, "loadStd", 0.3, true);
FieldUtils.writeDeclaredField(unloadCounter, "breakdownCounters", Map.of(
- Success, Map.of(
- Overloaded, new MutableLong(1),
- Underloaded, new MutableLong(2)),
- Skip, Map.of(
- Balanced, new MutableLong(3),
- NoBundles, new MutableLong(4),
- CoolDown, new MutableLong(5),
- OutDatedData, new MutableLong(6),
- NoLoadData, new MutableLong(7),
- NoBrokers, new MutableLong(8),
- Unknown, new MutableLong(9)),
+ Success, new LinkedHashMap<>() {{
+ put(Overloaded, new MutableLong(1));
+ put(Underloaded, new MutableLong(2));
+ }},
+ Skip, new LinkedHashMap<>() {{
+ put(Balanced, new MutableLong(3));
+ put(NoBundles, new MutableLong(4));
+ put(CoolDown, new MutableLong(5));
+ put(OutDatedData, new MutableLong(6));
+ put(NoLoadData, new MutableLong(7));
+ put(NoBrokers, new MutableLong(8));
+ put(Unknown, new MutableLong(9));
+ }},
Failure, Map.of(
Unknown, new MutableLong(10))
), true);
@@ -363,19 +359,24 @@ Unknown, new MutableLong(10))
FieldUtils.readDeclaredField(primaryLoadManager, "splitMetrics", true);
SplitCounter splitCounter = new SplitCounter();
FieldUtils.writeDeclaredField(splitCounter, "splitCount", 35l, true);
- FieldUtils.writeDeclaredField(splitCounter, "breakdownCounters", Map.of(
- SplitDecision.Label.Success, Map.of(
- Topics, new MutableLong(1),
- Sessions, new MutableLong(2),
- MsgRate, new MutableLong(3),
- Bandwidth, new MutableLong(4),
- Admin, new MutableLong(5)),
- SplitDecision.Label.Skip, Map.of(
+ FieldUtils.writeDeclaredField(splitCounter, "breakdownCounters", new LinkedHashMap<>() {
+ {
+ put(SplitDecision.Label.Success, new LinkedHashMap<>() {
+ {
+ put(Topics, new MutableLong(1));
+ put(Sessions, new MutableLong(2));
+ put(MsgRate, new MutableLong(3));
+ put(Bandwidth, new MutableLong(4));
+ put(Admin, new MutableLong(5));
+ }
+ });
+ put(SplitDecision.Label.Skip, Map.of(
SplitDecision.Reason.Balanced, new MutableLong(6)
- ),
- SplitDecision.Label.Failure, Map.of(
- SplitDecision.Reason.Unknown, new MutableLong(7))
- ), true);
+ ));
+ put(SplitDecision.Label.Failure, Map.of(
+ SplitDecision.Reason.Unknown, new MutableLong(7)));
+ }
+ }, true);
splitMetrics.set(splitCounter.toMetrics(pulsar.getAdvertisedAddress()));
}
@@ -391,34 +392,39 @@ SplitDecision.Reason.Unknown, new MutableLong(7))
}
{
- FieldUtils.writeDeclaredField(channel1, "totalCleanupCnt", 1, true);
- FieldUtils.writeDeclaredField(channel1, "totalBrokerCleanupTombstoneCnt", 2, true);
- FieldUtils.writeDeclaredField(channel1, "totalServiceUnitCleanupTombstoneCnt", 3, true);
- FieldUtils.writeDeclaredField(channel1, "totalCleanupErrorCnt", new AtomicLong(4), true);
- FieldUtils.writeDeclaredField(channel1, "totalCleanupScheduledCnt", 5, true);
- FieldUtils.writeDeclaredField(channel1, "totalCleanupIgnoredCnt", 6, true);
- FieldUtils.writeDeclaredField(channel1, "totalCleanupCancelledCnt", 7, true);
- FieldUtils.writeDeclaredField(channel1, "ownerLookUpCounters", Map.of(
- Owned, new AtomicLong(1),
- Assigned, new AtomicLong(2),
- Released, new AtomicLong(3),
- Splitting, new AtomicLong(4),
- Free, new AtomicLong(5)
- ), true);
- FieldUtils.writeDeclaredField(channel1, "eventCounters", Map.of(
- Assign, new ServiceUnitStateChannelImpl.Counters(new AtomicLong(1), new AtomicLong(2)),
- Split, new ServiceUnitStateChannelImpl.Counters(new AtomicLong(3), new AtomicLong(4)),
- Unload, new ServiceUnitStateChannelImpl.Counters(new AtomicLong(5), new AtomicLong(6))
- ), true);
-
- FieldUtils.writeDeclaredField(channel1, "handlerCounters", Map.of(
- Owned, new ServiceUnitStateChannelImpl.Counters(new AtomicLong(1), new AtomicLong(2)),
- Assigned, new ServiceUnitStateChannelImpl.Counters(new AtomicLong(3), new AtomicLong(4)),
- Released, new ServiceUnitStateChannelImpl.Counters(new AtomicLong(5), new AtomicLong(6)),
- Splitting, new ServiceUnitStateChannelImpl.Counters(new AtomicLong(7), new AtomicLong(8)),
- Free, new ServiceUnitStateChannelImpl.Counters(new AtomicLong(9), new AtomicLong(10))
- ), true);
+ FieldUtils.writeDeclaredField(channel1, "totalInactiveBrokerCleanupCnt", 1, true);
+ FieldUtils.writeDeclaredField(channel1, "totalServiceUnitTombstoneCleanupCnt", 2, true);
+ FieldUtils.writeDeclaredField(channel1, "totalOrphanServiceUnitCleanupCnt", 3, true);
+ FieldUtils.writeDeclaredField(channel1, "totalCleanupErrorCnt", new AtomicLong(4), true);
+ FieldUtils.writeDeclaredField(channel1, "totalInactiveBrokerCleanupScheduledCnt", 5, true);
+ FieldUtils.writeDeclaredField(channel1, "totalInactiveBrokerCleanupIgnoredCnt", 6, true);
+ FieldUtils.writeDeclaredField(channel1, "totalInactiveBrokerCleanupCancelledCnt", 7, true);
+
+ Map ownerLookUpCounters = new LinkedHashMap<>();
+ Map handlerCounters = new LinkedHashMap<>();
+ Map eventCounters =
+ new LinkedHashMap<>();
+ int i = 1;
+ int j = 0;
+ for (var state : ServiceUnitState.values()) {
+ ownerLookUpCounters.put(state, new AtomicLong(i));
+ handlerCounters.put(state,
+ new ServiceUnitStateChannelImpl.Counters(
+ new AtomicLong(j + 1), new AtomicLong(j + 2)));
+ i++;
+ j += 2;
+ }
+ i = 0;
+ for (var type : ServiceUnitStateChannelImpl.EventType.values()) {
+ eventCounters.put(type,
+ new ServiceUnitStateChannelImpl.Counters(
+ new AtomicLong(i + 1), new AtomicLong(i + 2)));
+ i += 2;
+ }
+ FieldUtils.writeDeclaredField(channel1, "ownerLookUpCounters", ownerLookUpCounters, true);
+ FieldUtils.writeDeclaredField(channel1, "eventCounters", eventCounters, true);
+ FieldUtils.writeDeclaredField(channel1, "handlerCounters", handlerCounters, true);
}
var expected = Set.of(
@@ -428,55 +434,60 @@ Free, new AtomicLong(5)
dimensions=[{broker=localhost, feature=max, metric=loadBalancing}], metrics=[{brk_lb_resource_usage=0.04}]
dimensions=[{broker=localhost, metric=bundleUnloading}], metrics=[{brk_lb_unload_broker_total=2, brk_lb_unload_bundle_total=3}]
dimensions=[{broker=localhost, metric=bundleUnloading, reason=Unknown, result=Failure}], metrics=[{brk_lb_unload_broker_breakdown_total=10}]
+ dimensions=[{broker=localhost, metric=bundleUnloading, reason=Balanced, result=Skip}], metrics=[{brk_lb_unload_broker_breakdown_total=3}]
+ dimensions=[{broker=localhost, metric=bundleUnloading, reason=NoBundles, result=Skip}], metrics=[{brk_lb_unload_broker_breakdown_total=4}]
dimensions=[{broker=localhost, metric=bundleUnloading, reason=CoolDown, result=Skip}], metrics=[{brk_lb_unload_broker_breakdown_total=5}]
dimensions=[{broker=localhost, metric=bundleUnloading, reason=OutDatedData, result=Skip}], metrics=[{brk_lb_unload_broker_breakdown_total=6}]
- dimensions=[{broker=localhost, metric=bundleUnloading, reason=NoBundles, result=Skip}], metrics=[{brk_lb_unload_broker_breakdown_total=4}]
dimensions=[{broker=localhost, metric=bundleUnloading, reason=NoLoadData, result=Skip}], metrics=[{brk_lb_unload_broker_breakdown_total=7}]
dimensions=[{broker=localhost, metric=bundleUnloading, reason=NoBrokers, result=Skip}], metrics=[{brk_lb_unload_broker_breakdown_total=8}]
dimensions=[{broker=localhost, metric=bundleUnloading, reason=Unknown, result=Skip}], metrics=[{brk_lb_unload_broker_breakdown_total=9}]
- dimensions=[{broker=localhost, metric=bundleUnloading, reason=Balanced, result=Skip}], metrics=[{brk_lb_unload_broker_breakdown_total=3}]
- dimensions=[{broker=localhost, metric=bundleUnloading, reason=Underloaded, result=Success}], metrics=[{brk_lb_unload_broker_breakdown_total=2}]
dimensions=[{broker=localhost, metric=bundleUnloading, reason=Overloaded, result=Success}], metrics=[{brk_lb_unload_broker_breakdown_total=1}]
+ dimensions=[{broker=localhost, metric=bundleUnloading, reason=Underloaded, result=Success}], metrics=[{brk_lb_unload_broker_breakdown_total=2}]
dimensions=[{broker=localhost, feature=max_ema, metric=bundleUnloading, stat=avg}], metrics=[{brk_lb_resource_usage_stats=1.5}]
dimensions=[{broker=localhost, feature=max_ema, metric=bundleUnloading, stat=std}], metrics=[{brk_lb_resource_usage_stats=0.3}]
dimensions=[{broker=localhost, metric=bundlesSplit}], metrics=[{brk_lb_bundles_split_total=35}]
- dimensions=[{broker=localhost, metric=bundlesSplit, reason=Bandwidth, result=Success}], metrics=[{brk_lb_bundles_split_breakdown_total=4}]
+ dimensions=[{broker=localhost, metric=bundlesSplit, reason=Topics, result=Success}], metrics=[{brk_lb_bundles_split_breakdown_total=1}]
dimensions=[{broker=localhost, metric=bundlesSplit, reason=Sessions, result=Success}], metrics=[{brk_lb_bundles_split_breakdown_total=2}]
dimensions=[{broker=localhost, metric=bundlesSplit, reason=MsgRate, result=Success}], metrics=[{brk_lb_bundles_split_breakdown_total=3}]
+ dimensions=[{broker=localhost, metric=bundlesSplit, reason=Bandwidth, result=Success}], metrics=[{brk_lb_bundles_split_breakdown_total=4}]
dimensions=[{broker=localhost, metric=bundlesSplit, reason=Admin, result=Success}], metrics=[{brk_lb_bundles_split_breakdown_total=5}]
- dimensions=[{broker=localhost, metric=bundlesSplit, reason=Topics, result=Success}], metrics=[{brk_lb_bundles_split_breakdown_total=1}]
dimensions=[{broker=localhost, metric=bundlesSplit, reason=Balanced, result=Skip}], metrics=[{brk_lb_bundles_split_breakdown_total=6}]
dimensions=[{broker=localhost, metric=bundlesSplit, reason=Unknown, result=Failure}], metrics=[{brk_lb_bundles_split_breakdown_total=7}]
dimensions=[{broker=localhost, metric=assign, result=Empty}], metrics=[{brk_lb_assign_broker_breakdown_total=2}]
- dimensions=[{broker=localhost, metric=assign, result=Success}], metrics=[{brk_lb_assign_broker_breakdown_total=1}]
dimensions=[{broker=localhost, metric=assign, result=Skip}], metrics=[{brk_lb_assign_broker_breakdown_total=3}]
- dimensions=[{broker=localhost, metric=sunitStateChn, state=Splitting}], metrics=[{brk_sunit_state_chn_owner_lookup_total=4}]
- dimensions=[{broker=localhost, metric=sunitStateChn, state=Owned}], metrics=[{brk_sunit_state_chn_owner_lookup_total=1}]
- dimensions=[{broker=localhost, metric=sunitStateChn, state=Released}], metrics=[{brk_sunit_state_chn_owner_lookup_total=3}]
- dimensions=[{broker=localhost, metric=sunitStateChn, state=Free}], metrics=[{brk_sunit_state_chn_owner_lookup_total=5}]
- dimensions=[{broker=localhost, metric=sunitStateChn, state=Assigned}], metrics=[{brk_sunit_state_chn_owner_lookup_total=2}]
- dimensions=[{broker=localhost, event=Unload, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_event_publish_ops_total=5}]
- dimensions=[{broker=localhost, event=Unload, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_event_publish_ops_total=6}]
- dimensions=[{broker=localhost, event=Split, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_event_publish_ops_total=3}]
- dimensions=[{broker=localhost, event=Split, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_event_publish_ops_total=4}]
+ dimensions=[{broker=localhost, metric=assign, result=Success}], metrics=[{brk_lb_assign_broker_breakdown_total=1}]
+ dimensions=[{broker=localhost, metric=sunitStateChn, state=Init}], metrics=[{brk_sunit_state_chn_owner_lookup_total=1}]
+ dimensions=[{broker=localhost, metric=sunitStateChn, state=Free}], metrics=[{brk_sunit_state_chn_owner_lookup_total=2}]
+ dimensions=[{broker=localhost, metric=sunitStateChn, state=Owned}], metrics=[{brk_sunit_state_chn_owner_lookup_total=3}]
+ dimensions=[{broker=localhost, metric=sunitStateChn, state=Assigning}], metrics=[{brk_sunit_state_chn_owner_lookup_total=4}]
+ dimensions=[{broker=localhost, metric=sunitStateChn, state=Releasing}], metrics=[{brk_sunit_state_chn_owner_lookup_total=5}]
+ dimensions=[{broker=localhost, metric=sunitStateChn, state=Splitting}], metrics=[{brk_sunit_state_chn_owner_lookup_total=6}]
+ dimensions=[{broker=localhost, metric=sunitStateChn, state=Deleted}], metrics=[{brk_sunit_state_chn_owner_lookup_total=7}]
dimensions=[{broker=localhost, event=Assign, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_event_publish_ops_total=1}]
dimensions=[{broker=localhost, event=Assign, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_event_publish_ops_total=2}]
- dimensions=[{broker=localhost, event=Splitting, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=7}]
- dimensions=[{broker=localhost, event=Splitting, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=8}]
- dimensions=[{broker=localhost, event=Owned, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=1}]
- dimensions=[{broker=localhost, event=Owned, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=2}]
- dimensions=[{broker=localhost, event=Released, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=5}]
- dimensions=[{broker=localhost, event=Released, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=6}]
- dimensions=[{broker=localhost, event=Free, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=9}]
- dimensions=[{broker=localhost, event=Free, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=10}]
- dimensions=[{broker=localhost, event=Assigned, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=3}]
- dimensions=[{broker=localhost, event=Assigned, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=4}]
- dimensions=[{broker=localhost, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_cleanup_ops_total=1}]
+ dimensions=[{broker=localhost, event=Split, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_event_publish_ops_total=3}]
+ dimensions=[{broker=localhost, event=Split, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_event_publish_ops_total=4}]
+ dimensions=[{broker=localhost, event=Unload, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_event_publish_ops_total=5}]
+ dimensions=[{broker=localhost, event=Unload, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_event_publish_ops_total=6}]
+ dimensions=[{broker=localhost, event=Init, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=1}]
+ dimensions=[{broker=localhost, event=Init, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=2}]
+ dimensions=[{broker=localhost, event=Free, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=3}]
+ dimensions=[{broker=localhost, event=Free, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=4}]
+ dimensions=[{broker=localhost, event=Owned, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=5}]
+ dimensions=[{broker=localhost, event=Owned, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=6}]
+ dimensions=[{broker=localhost, event=Assigning, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=7}]
+ dimensions=[{broker=localhost, event=Assigning, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=8}]
+ dimensions=[{broker=localhost, event=Releasing, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=9}]
+ dimensions=[{broker=localhost, event=Releasing, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=10}]
+ dimensions=[{broker=localhost, event=Splitting, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=11}]
+ dimensions=[{broker=localhost, event=Splitting, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=12}]
+ dimensions=[{broker=localhost, event=Deleted, metric=sunitStateChn, result=Total}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=13}]
+ dimensions=[{broker=localhost, event=Deleted, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_subscribe_ops_total=14}]
dimensions=[{broker=localhost, metric=sunitStateChn, result=Failure}], metrics=[{brk_sunit_state_chn_cleanup_ops_total=4}]
- dimensions=[{broker=localhost, metric=sunitStateChn, result=Skip}], metrics=[{brk_sunit_state_chn_cleanup_ops_total=6}]
- dimensions=[{broker=localhost, metric=sunitStateChn, result=Cancel}], metrics=[{brk_sunit_state_chn_cleanup_ops_total=7}]
- dimensions=[{broker=localhost, metric=sunitStateChn, result=Schedule}], metrics=[{brk_sunit_state_chn_cleanup_ops_total=5}]
- dimensions=[{broker=localhost, metric=sunitStateChn}], metrics=[{brk_sunit_state_chn_broker_cleanup_ops_total=2, brk_sunit_state_chn_su_cleanup_ops_total=3}]
+ dimensions=[{broker=localhost, metric=sunitStateChn, result=Skip}], metrics=[{brk_sunit_state_chn_inactive_broker_cleanup_ops_total=6}]
+ dimensions=[{broker=localhost, metric=sunitStateChn, result=Cancel}], metrics=[{brk_sunit_state_chn_inactive_broker_cleanup_ops_total=7}]
+ dimensions=[{broker=localhost, metric=sunitStateChn, result=Schedule}], metrics=[{brk_sunit_state_chn_inactive_broker_cleanup_ops_total=5}]
+ dimensions=[{broker=localhost, metric=sunitStateChn}], metrics=[{brk_sunit_state_chn_inactive_broker_cleanup_ops_total=1, brk_sunit_state_chn_orphan_su_cleanup_ops_total=3, brk_sunit_state_chn_su_tombstone_cleanup_ops_total=2}]
""".split("\n"));
var actual = primaryLoadManager.getMetrics().stream().map(m -> m.toString()).collect(Collectors.toSet());
assertEquals(actual, expected);
diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateChannelTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateChannelTest.java
index 327afa3cb8891..49eee6ecb7aef 100644
--- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateChannelTest.java
+++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateChannelTest.java
@@ -18,15 +18,18 @@
*/
package org.apache.pulsar.broker.loadbalance.extensions.channel;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Assigned;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Assigning;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Deleted;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Free;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Init;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Owned;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Released;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Releasing;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Splitting;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateChannelImpl.EventType.Assign;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateChannelImpl.EventType.Split;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateChannelImpl.EventType.Unload;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateChannelImpl.MAX_CLEAN_UP_DELAY_TIME_IN_SECS;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateData.state;
import static org.apache.pulsar.metadata.api.extended.SessionEvent.ConnectionLost;
import static org.apache.pulsar.metadata.api.extended.SessionEvent.Reconnected;
import static org.apache.pulsar.metadata.api.extended.SessionEvent.SessionLost;
@@ -52,6 +55,7 @@
import static org.testng.AssertJUnit.assertNotNull;
import java.lang.reflect.Field;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
@@ -71,8 +75,11 @@
import org.apache.pulsar.broker.PulsarService;
import org.apache.pulsar.broker.auth.MockedPulsarServiceBaseTest;
import org.apache.pulsar.broker.loadbalance.LeaderElectionService;
+import org.apache.pulsar.broker.loadbalance.extensions.BrokerRegistryImpl;
+import org.apache.pulsar.broker.loadbalance.extensions.LoadManagerContext;
import org.apache.pulsar.broker.loadbalance.extensions.models.Split;
import org.apache.pulsar.broker.loadbalance.extensions.models.Unload;
+import org.apache.pulsar.broker.loadbalance.extensions.strategy.BrokerSelectionStrategy;
import org.apache.pulsar.broker.namespace.NamespaceService;
import org.apache.pulsar.broker.testcontext.PulsarTestContext;
import org.apache.pulsar.client.api.Producer;
@@ -104,6 +111,12 @@ public class ServiceUnitStateChannelTest extends MockedPulsarServiceBaseTest {
private String bundle2;
private PulsarTestContext additionalPulsarTestContext;
+ private LoadManagerContext loadManagerContext;
+
+ private BrokerRegistryImpl registry;
+
+ private BrokerSelectionStrategy brokerSelector;
+
@BeforeClass
@Override
protected void setup() throws Exception {
@@ -117,11 +130,16 @@ protected void setup() throws Exception {
admin.namespaces().createNamespace("public/default");
pulsar1 = pulsar;
+ registry = new BrokerRegistryImpl(pulsar);
+ loadManagerContext = mock(LoadManagerContext.class);
+ brokerSelector = mock(BrokerSelectionStrategy.class);
additionalPulsarTestContext = createAdditionalPulsarTestContext(getDefaultConf());
pulsar2 = additionalPulsarTestContext.getPulsarService();
- channel1 = spy(new ServiceUnitStateChannelImpl(pulsar1));
+
+ channel1 = createChannel(pulsar1);
channel1.start();
- channel2 = spy(new ServiceUnitStateChannelImpl(pulsar2));
+
+ channel2 = createChannel(pulsar2);
channel2.start();
lookupServiceAddress1 = (String)
FieldUtils.readDeclaredField(channel1, "lookupServiceAddress", true);
@@ -137,6 +155,8 @@ protected void setup() throws Exception {
protected void initTableViews() throws Exception {
cleanTableView(channel1, bundle);
cleanTableView(channel2, bundle);
+ cleanOwnershipMonitorCounters(channel1);
+ cleanOwnershipMonitorCounters(channel2);
cleanOpsCounters(channel1);
cleanOpsCounters(channel2);
}
@@ -187,7 +207,7 @@ public void channelOwnerTest() throws Exception {
public void channelValidationTest()
throws ExecutionException, InterruptedException, IllegalAccessException, PulsarServerException,
TimeoutException {
- var channel = new ServiceUnitStateChannelImpl(pulsar);
+ var channel = createChannel(pulsar);
int errorCnt = validateChannelStart(channel);
assertEquals(6, errorCnt);
ExecutorService executor = Executors.newSingleThreadExecutor();
@@ -333,8 +353,8 @@ public void assignmentTest()
assertEquals(getOwnerRequests1.size(), 0);
assertEquals(getOwnerRequests2.size(), 0);
- validateHandlerCounters(channel1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0);
- validateHandlerCounters(channel2, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0);
+ validateHandlerCounters(channel1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ validateHandlerCounters(channel2, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
validateEventCounters(channel1, 1, 0, 0, 0, 0, 0);
validateEventCounters(channel2, 1, 0, 0, 0, 0, 0);
}
@@ -379,7 +399,7 @@ public void assignmentTestWhenOneAssignmentFails()
}
@Test(priority = 4)
- public void unloadTest()
+ public void transferTest()
throws ExecutionException, InterruptedException, TimeoutException, IllegalAccessException {
var owner1 = channel1.getOwnerAsync(bundle);
@@ -409,14 +429,14 @@ public void unloadTest()
assertEquals(ownerAddr1, ownerAddr2);
assertEquals(ownerAddr1, Optional.of(lookupServiceAddress2));
- validateHandlerCounters(channel1, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0);
- validateHandlerCounters(channel2, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0);
+ validateHandlerCounters(channel1, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ validateHandlerCounters(channel2, 2, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0);
validateEventCounters(channel1, 1, 0, 0, 0, 1, 0);
validateEventCounters(channel2, 0, 0, 0, 0, 0, 0);
}
@Test(priority = 5)
- public void unloadTestWhenDestBrokerFails()
+ public void transferTestWhenDestBrokerFails()
throws ExecutionException, InterruptedException, IllegalAccessException {
var getOwnerRequests1 = getOwnerRequests(channel1);
@@ -450,8 +470,8 @@ public void unloadTestWhenDestBrokerFails()
Unload unload = new Unload(lookupServiceAddress1, bundle, Optional.of(lookupServiceAddress2));
channel1.publishUnloadEventAsync(unload);
// channel1 is broken. the ownership transfer won't be complete.
- waitUntilNewState(channel1, bundle);
- waitUntilNewState(channel2, bundle);
+ waitUntilState(channel1, bundle);
+ waitUntilState(channel2, bundle);
var owner1 = channel1.getOwnerAsync(bundle);
var owner2 = channel2.getOwnerAsync(bundle);
@@ -461,7 +481,7 @@ public void unloadTestWhenDestBrokerFails()
assertEquals(1, getOwnerRequests1.size());
assertEquals(1, getOwnerRequests2.size());
- // In 10 secs, the getOwnerAsync requests(lookup requests) should time out.
+ // In 5 secs, the getOwnerAsync requests(lookup requests) should time out.
Awaitility.await().atMost(5, TimeUnit.SECONDS)
.untilAsserted(() -> assertTrue(owner1.isCompletedExceptionally()));
Awaitility.await().atMost(5, TimeUnit.SECONDS)
@@ -470,19 +490,42 @@ public void unloadTestWhenDestBrokerFails()
assertEquals(0, getOwnerRequests1.size());
assertEquals(0, getOwnerRequests2.size());
+ // recovered, check the monitor update state : Assigned -> Owned
+ FieldUtils.writeDeclaredField(channel2, "producer", producer, true);
+ FieldUtils.writeDeclaredField(channel1,
+ "inFlightStateWaitingTimeInMillis", 1 , true);
+ FieldUtils.writeDeclaredField(channel2,
+ "inFlightStateWaitingTimeInMillis", 1 , true);
+
+ ((ServiceUnitStateChannelImpl) channel1).monitorOwnerships(
+ List.of(lookupServiceAddress1, lookupServiceAddress2));
+ ((ServiceUnitStateChannelImpl) channel2).monitorOwnerships(
+ List.of(lookupServiceAddress1, lookupServiceAddress2));
+
+
+ waitUntilNewOwner(channel1, bundle, lookupServiceAddress1);
+ waitUntilNewOwner(channel2, bundle, lookupServiceAddress1);
+ ownerAddr1 = channel1.getOwnerAsync(bundle).get();
+ ownerAddr2 = channel2.getOwnerAsync(bundle).get();
+
+ assertEquals(ownerAddr1, ownerAddr2);
+ assertEquals(ownerAddr1, Optional.of(lookupServiceAddress1));
+
+ var leader = channel1.isChannelOwnerAsync().get() ? channel1 : channel2;
+ validateMonitorCounters(leader,
+ 0,
+ 0,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0);
- // TODO: retry lookups and assert that the monitor cleans up the stuck assignments
- /*
- owner1 = channel1.getOwnerAsync(bundle);
- owner2 = channel2.getOwnerAsync(bundle);
- assertFalse(channel1.getOwnerAsync(bundle).isDone());
- assertFalse(channel1.getOwnerAsync(bundle).isDone());
- */
FieldUtils.writeDeclaredField(channel1,
"inFlightStateWaitingTimeInMillis", 30 * 1000, true);
FieldUtils.writeDeclaredField(channel2,
"inFlightStateWaitingTimeInMillis", 30 * 1000, true);
- FieldUtils.writeDeclaredField(channel2, "producer", producer, true);
+
}
@Test(priority = 6)
@@ -514,11 +557,11 @@ public void splitAndRetryTest() throws Exception {
Split split = new Split(bundle, ownerAddr1.get(), new HashMap<>());
channel1.publishSplitEventAsync(split);
- waitUntilNewOwner(channel1, bundle, null);
- waitUntilNewOwner(channel2, bundle, null);
+ waitUntilState(channel1, bundle, Deleted);
+ waitUntilState(channel2, bundle, Deleted);
- validateHandlerCounters(channel1, 1, 0, 9, 0, 0, 0, 1, 0, 7, 0);
- validateHandlerCounters(channel2, 1, 0, 9, 0, 0, 0, 1, 0, 7, 0);
+ validateHandlerCounters(channel1, 1, 0, 9, 0, 0, 0, 1, 0, 0, 0, 6, 0, 1, 0);
+ validateHandlerCounters(channel2, 1, 0, 9, 0, 0, 0, 1, 0, 0, 0, 6, 0, 1, 0);
validateEventCounters(channel1, 1, 0, 1, 0, 0, 0);
validateEventCounters(channel2, 0, 0, 0, 0, 0, 0);
// Verify the retry count
@@ -538,10 +581,49 @@ public void splitAndRetryTest() throws Exception {
assertEquals(Optional.of(lookupServiceAddress1), channel2.getOwnerAsync(childBundle1).get());
assertEquals(Optional.of(lookupServiceAddress1), channel2.getOwnerAsync(childBundle2).get());
+
+ // try the monitor and check the monitor moves `Deleted` -> `Init`
+ FieldUtils.writeDeclaredField(channel1,
+ "inFlightStateWaitingTimeInMillis", 1 , true);
+ FieldUtils.writeDeclaredField(channel1,
+ "semiTerminalStateWaitingTimeInMillis", 1, true);
+
+ FieldUtils.writeDeclaredField(channel2,
+ "inFlightStateWaitingTimeInMillis", 1 , true);
+ FieldUtils.writeDeclaredField(channel2,
+ "semiTerminalStateWaitingTimeInMillis", 1, true);
+
+ ((ServiceUnitStateChannelImpl) channel1).monitorOwnerships(
+ List.of(lookupServiceAddress1, lookupServiceAddress2));
+ ((ServiceUnitStateChannelImpl) channel2).monitorOwnerships(
+ List.of(lookupServiceAddress1, lookupServiceAddress2));
+ waitUntilState(channel1, bundle, Init);
+ waitUntilState(channel2, bundle, Init);
+
+ var leader = channel1.isChannelOwnerAsync().get() ? channel1 : channel2;
+ validateMonitorCounters(leader,
+ 0,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0);
+
cleanTableView(channel1, childBundle1);
cleanTableView(channel2, childBundle1);
cleanTableView(channel1, childBundle2);
cleanTableView(channel2, childBundle2);
+
+ FieldUtils.writeDeclaredField(channel1,
+ "inFlightStateWaitingTimeInMillis", 30 * 1000, true);
+ FieldUtils.writeDeclaredField(channel1,
+ "semiTerminalStateWaitingTimeInMillis", 300 * 1000, true);
+
+ FieldUtils.writeDeclaredField(channel2,
+ "inFlightStateWaitingTimeInMillis", 30 * 1000, true);
+ FieldUtils.writeDeclaredField(channel2,
+ "semiTerminalStateWaitingTimeInMillis", 300 * 1000, true);
}
@Test(priority = 7)
@@ -622,6 +704,7 @@ public void handleBrokerDeletionEventTest()
var owner1 = channel1.getOwnerAsync(bundle1);
var owner2 = channel2.getOwnerAsync(bundle2);
+ doReturn(Optional.of(lookupServiceAddress2)).when(brokerSelector).select(any(), any(), any());
assertTrue(owner1.get().isEmpty());
assertTrue(owner2.get().isEmpty());
@@ -634,6 +717,10 @@ public void handleBrokerDeletionEventTest()
waitUntilNewOwner(channel1, bundle2, broker);
waitUntilNewOwner(channel2, bundle2, broker);
+ channel1.publishUnloadEventAsync(new Unload(broker, bundle1, Optional.of(lookupServiceAddress2)));
+ waitUntilNewOwner(channel1, bundle1, lookupServiceAddress2);
+ waitUntilNewOwner(channel2, bundle1, lookupServiceAddress2);
+
// test stable metadata state
leaderChannel.handleMetadataSessionEvent(SessionReestablished);
followerChannel.handleMetadataSessionEvent(SessionReestablished);
@@ -644,26 +731,30 @@ public void handleBrokerDeletionEventTest()
leaderChannel.handleBrokerRegistrationEvent(broker, NotificationType.Deleted);
followerChannel.handleBrokerRegistrationEvent(broker, NotificationType.Deleted);
- waitUntilNewOwner(channel1, bundle1, null);
- waitUntilNewOwner(channel2, bundle1, null);
- waitUntilNewOwner(channel1, bundle2, null);
- waitUntilNewOwner(channel2, bundle2, null);
+ waitUntilNewOwner(channel1, bundle1, lookupServiceAddress2);
+ waitUntilNewOwner(channel2, bundle1, lookupServiceAddress2);
+ waitUntilNewOwner(channel1, bundle2, lookupServiceAddress2);
+ waitUntilNewOwner(channel2, bundle2, lookupServiceAddress2);
verify(leaderCleanupJobs, times(1)).computeIfAbsent(eq(broker), any());
verify(followerCleanupJobs, times(0)).computeIfAbsent(eq(broker), any());
+
+
assertEquals(0, leaderCleanupJobs.size());
assertEquals(0, followerCleanupJobs.size());
- assertEquals(1, getCleanupMetric(leaderChannel, "totalCleanupCnt"));
- assertEquals(1, getCleanupMetric(leaderChannel, "totalBrokerCleanupTombstoneCnt"));
- assertEquals(2, getCleanupMetric(leaderChannel, "totalServiceUnitCleanupTombstoneCnt"));
- assertEquals(0, getCleanupMetric(leaderChannel, "totalCleanupErrorCnt"));
- assertEquals(1, getCleanupMetric(leaderChannel, "totalCleanupScheduledCnt"));
- assertEquals(0, getCleanupMetric(leaderChannel, "totalCleanupIgnoredCnt"));
- assertEquals(0, getCleanupMetric(leaderChannel, "totalCleanupCancelledCnt"));
+ validateMonitorCounters(leaderChannel,
+ 1,
+ 0,
+ 1,
+ 0,
+ 1,
+ 0,
+ 0);
+
// test jittery metadata state
- channel1.publishAssignEventAsync(bundle1, broker);
- channel2.publishAssignEventAsync(bundle2, broker);
+ channel1.publishUnloadEventAsync(new Unload(lookupServiceAddress2, bundle1, Optional.of(broker)));
+ channel1.publishUnloadEventAsync(new Unload(lookupServiceAddress2, bundle2, Optional.of(broker)));
waitUntilNewOwner(channel1, bundle1, broker);
waitUntilNewOwner(channel2, bundle1, broker);
waitUntilNewOwner(channel1, bundle2, broker);
@@ -678,13 +769,14 @@ public void handleBrokerDeletionEventTest()
verify(followerCleanupJobs, times(0)).computeIfAbsent(eq(broker), any());
assertEquals(1, leaderCleanupJobs.size());
assertEquals(0, followerCleanupJobs.size());
- assertEquals(1, getCleanupMetric(leaderChannel, "totalCleanupCnt"));
- assertEquals(1, getCleanupMetric(leaderChannel, "totalBrokerCleanupTombstoneCnt"));
- assertEquals(2, getCleanupMetric(leaderChannel, "totalServiceUnitCleanupTombstoneCnt"));
- assertEquals(0, getCleanupMetric(leaderChannel, "totalCleanupErrorCnt"));
- assertEquals(2, getCleanupMetric(leaderChannel, "totalCleanupScheduledCnt"));
- assertEquals(0, getCleanupMetric(leaderChannel, "totalCleanupIgnoredCnt"));
- assertEquals(0, getCleanupMetric(leaderChannel, "totalCleanupCancelledCnt"));
+ validateMonitorCounters(leaderChannel,
+ 1,
+ 0,
+ 1,
+ 0,
+ 2,
+ 0,
+ 0);
// broker is back online
leaderChannel.handleBrokerRegistrationEvent(broker, NotificationType.Created);
@@ -694,13 +786,14 @@ public void handleBrokerDeletionEventTest()
verify(followerCleanupJobs, times(0)).computeIfAbsent(eq(broker), any());
assertEquals(0, leaderCleanupJobs.size());
assertEquals(0, followerCleanupJobs.size());
- assertEquals(1, getCleanupMetric(leaderChannel, "totalCleanupCnt"));
- assertEquals(1, getCleanupMetric(leaderChannel, "totalBrokerCleanupTombstoneCnt"));
- assertEquals(2, getCleanupMetric(leaderChannel, "totalServiceUnitCleanupTombstoneCnt"));
- assertEquals(0, getCleanupMetric(leaderChannel, "totalCleanupErrorCnt"));
- assertEquals(2, getCleanupMetric(leaderChannel, "totalCleanupScheduledCnt"));
- assertEquals(0, getCleanupMetric(leaderChannel, "totalCleanupIgnoredCnt"));
- assertEquals(1, getCleanupMetric(leaderChannel, "totalCleanupCancelledCnt"));
+ validateMonitorCounters(leaderChannel,
+ 1,
+ 0,
+ 1,
+ 0,
+ 2,
+ 0,
+ 1);
// broker is offline again
@@ -712,35 +805,37 @@ public void handleBrokerDeletionEventTest()
verify(followerCleanupJobs, times(0)).computeIfAbsent(eq(broker), any());
assertEquals(1, leaderCleanupJobs.size());
assertEquals(0, followerCleanupJobs.size());
- assertEquals(1, getCleanupMetric(leaderChannel, "totalCleanupCnt"));
- assertEquals(1, getCleanupMetric(leaderChannel, "totalBrokerCleanupTombstoneCnt"));
- assertEquals(2, getCleanupMetric(leaderChannel, "totalServiceUnitCleanupTombstoneCnt"));
- assertEquals(0, getCleanupMetric(leaderChannel, "totalCleanupErrorCnt"));
- assertEquals(3, getCleanupMetric(leaderChannel, "totalCleanupScheduledCnt"));
- assertEquals(0, getCleanupMetric(leaderChannel, "totalCleanupIgnoredCnt"));
- assertEquals(1, getCleanupMetric(leaderChannel, "totalCleanupCancelledCnt"));
+ validateMonitorCounters(leaderChannel,
+ 1,
+ 0,
+ 1,
+ 0,
+ 3,
+ 0,
+ 1);
// finally cleanup
- waitUntilNewOwner(channel1, bundle1, null);
- waitUntilNewOwner(channel2, bundle1, null);
- waitUntilNewOwner(channel1, bundle2, null);
- waitUntilNewOwner(channel2, bundle2, null);
+ waitUntilNewOwner(channel1, bundle1, lookupServiceAddress2);
+ waitUntilNewOwner(channel2, bundle1, lookupServiceAddress2);
+ waitUntilNewOwner(channel1, bundle2, lookupServiceAddress2);
+ waitUntilNewOwner(channel2, bundle2, lookupServiceAddress2);
verify(leaderCleanupJobs, times(3)).computeIfAbsent(eq(broker), any());
verify(followerCleanupJobs, times(0)).computeIfAbsent(eq(broker), any());
assertEquals(0, leaderCleanupJobs.size());
assertEquals(0, followerCleanupJobs.size());
- assertEquals(2, getCleanupMetric(leaderChannel, "totalCleanupCnt"));
- assertEquals(2, getCleanupMetric(leaderChannel, "totalBrokerCleanupTombstoneCnt"));
- assertEquals(4, getCleanupMetric(leaderChannel, "totalServiceUnitCleanupTombstoneCnt"));
- assertEquals(0, getCleanupMetric(leaderChannel, "totalCleanupErrorCnt"));
- assertEquals(3, getCleanupMetric(leaderChannel, "totalCleanupScheduledCnt"));
- assertEquals(0, getCleanupMetric(leaderChannel, "totalCleanupIgnoredCnt"));
- assertEquals(1, getCleanupMetric(leaderChannel, "totalCleanupCancelledCnt"));
+ validateMonitorCounters(leaderChannel,
+ 2,
+ 0,
+ 3,
+ 0,
+ 3,
+ 0,
+ 1);
// test unstable state
- channel1.publishAssignEventAsync(bundle1, broker);
- channel2.publishAssignEventAsync(bundle2, broker);
+ channel1.publishUnloadEventAsync(new Unload(lookupServiceAddress2, bundle1, Optional.of(broker)));
+ channel1.publishUnloadEventAsync(new Unload(lookupServiceAddress2, bundle2, Optional.of(broker)));
waitUntilNewOwner(channel1, bundle1, broker);
waitUntilNewOwner(channel2, bundle1, broker);
waitUntilNewOwner(channel1, bundle2, broker);
@@ -755,13 +850,14 @@ public void handleBrokerDeletionEventTest()
verify(followerCleanupJobs, times(0)).computeIfAbsent(eq(broker), any());
assertEquals(0, leaderCleanupJobs.size());
assertEquals(0, followerCleanupJobs.size());
- assertEquals(2, getCleanupMetric(leaderChannel, "totalCleanupCnt"));
- assertEquals(2, getCleanupMetric(leaderChannel, "totalBrokerCleanupTombstoneCnt"));
- assertEquals(4, getCleanupMetric(leaderChannel, "totalServiceUnitCleanupTombstoneCnt"));
- assertEquals(0, getCleanupMetric(leaderChannel, "totalCleanupErrorCnt"));
- assertEquals(3, getCleanupMetric(leaderChannel, "totalCleanupScheduledCnt"));
- assertEquals(1, getCleanupMetric(leaderChannel, "totalCleanupIgnoredCnt"));
- assertEquals(1, getCleanupMetric(leaderChannel, "totalCleanupCancelledCnt"));
+ validateMonitorCounters(leaderChannel,
+ 2,
+ 0,
+ 3,
+ 0,
+ 3,
+ 1,
+ 1);
// clean-up
FieldUtils.writeDeclaredField(leaderChannel, "maxCleanupDelayTimeInSecs", 3 * 60, true);
@@ -774,9 +870,7 @@ public void handleBrokerDeletionEventTest()
@Test(priority = 10)
public void conflictAndCompactionTest() throws ExecutionException, InterruptedException, TimeoutException,
IllegalAccessException, PulsarClientException, PulsarServerException {
-
- var producer = (Producer) FieldUtils.readDeclaredField(channel1, "producer", true);
- producer.newMessage().key(bundle).send();
+ String bundle = String.format("%s/%s", "public/default", "0x0000000a_0xffffffff");
var owner1 = channel1.getOwnerAsync(bundle);
var owner2 = channel2.getOwnerAsync(bundle);
assertTrue(owner1.get().isEmpty());
@@ -815,7 +909,7 @@ public void conflictAndCompactionTest() throws ExecutionException, InterruptedEx
.untilAsserted(() -> verify(compactor, times(1))
.compact(eq(ServiceUnitStateChannelImpl.TOPIC), any()));
- var channel3 = new ServiceUnitStateChannelImpl(pulsar1);
+ var channel3 = createChannel(pulsar);
channel3.start();
Awaitility.await()
.pollInterval(200, TimeUnit.MILLISECONDS)
@@ -830,10 +924,7 @@ public void conflictAndCompactionTest() throws ExecutionException, InterruptedEx
@Test(priority = 11)
public void ownerLookupCountTests() throws IllegalAccessException {
- overrideTableView(channel1, bundle, null);
- channel1.getOwnerAsync(bundle);
-
- overrideTableView(channel1, bundle, new ServiceUnitStateData(Assigned, "b1"));
+ overrideTableView(channel1, bundle, new ServiceUnitStateData(Assigning, "b1"));
channel1.getOwnerAsync(bundle);
channel1.getOwnerAsync(bundle);
@@ -842,19 +933,273 @@ public void ownerLookupCountTests() throws IllegalAccessException {
channel1.getOwnerAsync(bundle);
channel1.getOwnerAsync(bundle);
- overrideTableView(channel1, bundle, new ServiceUnitStateData(Released, "b1"));
+ overrideTableView(channel1, bundle, new ServiceUnitStateData(Releasing, "b1"));
channel1.getOwnerAsync(bundle);
channel1.getOwnerAsync(bundle);
overrideTableView(channel1, bundle, new ServiceUnitStateData(Splitting, "b1"));
channel1.getOwnerAsync(bundle);
- validateOwnerLookUpCounters(channel1, 2, 3, 2, 1, 1);
+ overrideTableView(channel1, bundle, new ServiceUnitStateData(Free, "b1"));
+ channel1.getOwnerAsync(bundle);
+
+ overrideTableView(channel1, bundle, new ServiceUnitStateData(Deleted, "b1"));
+ channel1.getOwnerAsync(bundle);
+ channel1.getOwnerAsync(bundle);
+
+ overrideTableView(channel1, bundle, null);
+ channel1.getOwnerAsync(bundle);
+ channel1.getOwnerAsync(bundle);
+ channel1.getOwnerAsync(bundle);
+
+ validateOwnerLookUpCounters(channel1, 2, 3, 2, 1, 1, 2, 3);
+
+ }
+
+ @Test(priority = 12)
+ public void unloadTest()
+ throws ExecutionException, InterruptedException, IllegalAccessException {
+
+ channel1.publishAssignEventAsync(bundle, lookupServiceAddress1);
+
+ waitUntilNewOwner(channel1, bundle, lookupServiceAddress1);
+ waitUntilNewOwner(channel2, bundle, lookupServiceAddress1);
+ var ownerAddr1 = channel1.getOwnerAsync(bundle).get();
+ var ownerAddr2 = channel2.getOwnerAsync(bundle).get();
+
+ assertEquals(ownerAddr1, ownerAddr2);
+ assertEquals(ownerAddr1, Optional.of(lookupServiceAddress1));
+ Unload unload = new Unload(lookupServiceAddress1, bundle, Optional.empty());
+
+ channel1.publishUnloadEventAsync(unload);
+
+ waitUntilState(channel1, bundle, Free);
+ waitUntilState(channel2, bundle, Free);
+ var owner1 = channel1.getOwnerAsync(bundle);
+ var owner2 = channel2.getOwnerAsync(bundle);
+
+ assertEquals(Optional.empty(), owner1.get());
+ assertEquals(Optional.empty(), owner2.get());
+
+ channel2.publishAssignEventAsync(bundle, lookupServiceAddress2);
+
+ waitUntilNewOwner(channel1, bundle, lookupServiceAddress2);
+ waitUntilNewOwner(channel2, bundle, lookupServiceAddress2);
+
+ ownerAddr1 = channel1.getOwnerAsync(bundle).get();
+ ownerAddr2 = channel2.getOwnerAsync(bundle).get();
+
+ assertEquals(ownerAddr1, ownerAddr2);
+ assertEquals(ownerAddr1, Optional.of(lookupServiceAddress2));
+ Unload unload2 = new Unload(lookupServiceAddress2, bundle, Optional.empty());
+
+ channel2.publishUnloadEventAsync(unload2);
+
+ waitUntilState(channel1, bundle, Free);
+ waitUntilState(channel2, bundle, Free);
+
+ // test monitor if Free -> Init
+ FieldUtils.writeDeclaredField(channel1,
+ "inFlightStateWaitingTimeInMillis", 1 , true);
+ FieldUtils.writeDeclaredField(channel1,
+ "semiTerminalStateWaitingTimeInMillis", 1, true);
+
+ FieldUtils.writeDeclaredField(channel2,
+ "inFlightStateWaitingTimeInMillis", 1 , true);
+ FieldUtils.writeDeclaredField(channel2,
+ "semiTerminalStateWaitingTimeInMillis", 1, true);
+
+ ((ServiceUnitStateChannelImpl) channel1).monitorOwnerships(
+ List.of(lookupServiceAddress1, lookupServiceAddress2));
+ ((ServiceUnitStateChannelImpl) channel2).monitorOwnerships(
+ List.of(lookupServiceAddress1, lookupServiceAddress2));
+ waitUntilState(channel1, bundle, Init);
+ waitUntilState(channel2, bundle, Init);
+
+ var leader = channel1.isChannelOwnerAsync().get() ? channel1 : channel2;
+ validateMonitorCounters(leader,
+ 0,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0);
+
+
+ FieldUtils.writeDeclaredField(channel1,
+ "inFlightStateWaitingTimeInMillis", 30 * 1000, true);
+ FieldUtils.writeDeclaredField(channel1,
+ "semiTerminalStateWaitingTimeInMillis", 30 * 1000, true);
+
+ FieldUtils.writeDeclaredField(channel2,
+ "inFlightStateWaitingTimeInMillis", 300 * 1000, true);
+ FieldUtils.writeDeclaredField(channel2,
+ "semiTerminalStateWaitingTimeInMillis", 300 * 1000, true);
+ }
+
+ @Test(priority = 13)
+ public void assignTestWhenDestBrokerFails()
+ throws ExecutionException, InterruptedException, IllegalAccessException {
+
+ Unload unload = new Unload(lookupServiceAddress1, bundle, Optional.empty());
+
+ channel1.publishUnloadEventAsync(unload);
+
+ waitUntilState(channel1, bundle, Free);
+ waitUntilState(channel2, bundle, Free);
+
+ assertEquals(Optional.empty(), channel1.getOwnerAsync(bundle).get());
+ assertEquals(Optional.empty(), channel2.getOwnerAsync(bundle).get());
+
+ var producer = (Producer) FieldUtils.readDeclaredField(channel1,
+ "producer", true);
+ var spyProducer = spy(producer);
+ var msg = mock(TypedMessageBuilder.class);
+ var future = CompletableFuture.failedFuture(new RuntimeException());
+ doReturn(msg).when(spyProducer).newMessage();
+ doReturn(msg).when(msg).key(any());
+ doReturn(msg).when(msg).value(any());
+ doReturn(future).when(msg).sendAsync();
+ FieldUtils.writeDeclaredField(channel2, "producer", spyProducer, true);
+ FieldUtils.writeDeclaredField(channel1,
+ "inFlightStateWaitingTimeInMillis", 3 * 1000, true);
+ FieldUtils.writeDeclaredField(channel2,
+ "inFlightStateWaitingTimeInMillis", 3 * 1000, true);
+ doReturn(Optional.of(lookupServiceAddress2)).when(brokerSelector).select(any(), any(), any());
+ channel1.publishAssignEventAsync(bundle, lookupServiceAddress2);
+ // channel1 is broken. the assign won't be complete.
+ waitUntilState(channel1, bundle);
+ waitUntilState(channel2, bundle);
+ var owner1 = channel1.getOwnerAsync(bundle);
+ var owner2 = channel2.getOwnerAsync(bundle);
+
+ assertFalse(owner1.isDone());
+ assertFalse(owner2.isDone());
+
+ // In 5 secs, the getOwnerAsync requests(lookup requests) should time out.
+ Awaitility.await().atMost(5, TimeUnit.SECONDS)
+ .untilAsserted(() -> assertTrue(owner1.isCompletedExceptionally()));
+ Awaitility.await().atMost(5, TimeUnit.SECONDS)
+ .untilAsserted(() -> assertTrue(owner2.isCompletedExceptionally()));
+
+ // recovered, check the monitor update state : Assigned -> Owned
+ FieldUtils.writeDeclaredField(channel2, "producer", producer, true);
+ FieldUtils.writeDeclaredField(channel1,
+ "inFlightStateWaitingTimeInMillis", 1 , true);
+ FieldUtils.writeDeclaredField(channel2,
+ "inFlightStateWaitingTimeInMillis", 1 , true);
+
+ ((ServiceUnitStateChannelImpl) channel1).monitorOwnerships(
+ List.of(lookupServiceAddress1, lookupServiceAddress2));
+ ((ServiceUnitStateChannelImpl) channel2).monitorOwnerships(
+ List.of(lookupServiceAddress1, lookupServiceAddress2));
+
+
+ waitUntilNewOwner(channel1, bundle, lookupServiceAddress2);
+ waitUntilNewOwner(channel2, bundle, lookupServiceAddress2);
+ var ownerAddr1 = channel1.getOwnerAsync(bundle).get();
+ var ownerAddr2 = channel2.getOwnerAsync(bundle).get();
+
+ assertEquals(ownerAddr1, ownerAddr2);
+ assertEquals(ownerAddr1, Optional.of(lookupServiceAddress2));
+
+ var leader = channel1.isChannelOwnerAsync().get() ? channel1 : channel2;
+ validateMonitorCounters(leader,
+ 0,
+ 0,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0);
+
+ FieldUtils.writeDeclaredField(channel1,
+ "inFlightStateWaitingTimeInMillis", 30 * 1000, true);
+ FieldUtils.writeDeclaredField(channel2,
+ "inFlightStateWaitingTimeInMillis", 30 * 1000, true);
}
+ @Test(priority = 14)
+ public void splitTestWhenDestBrokerFails()
+ throws ExecutionException, InterruptedException, IllegalAccessException {
+
+
+ Unload unload = new Unload(lookupServiceAddress1, bundle, Optional.empty());
+
+ channel1.publishUnloadEventAsync(unload);
+
+ waitUntilState(channel1, bundle, Free);
+ waitUntilState(channel2, bundle, Free);
+
+ channel1.publishAssignEventAsync(bundle, lookupServiceAddress1);
+
+ waitUntilState(channel1, bundle, Owned);
+ waitUntilState(channel2, bundle, Owned);
+
+ assertEquals(lookupServiceAddress1, channel1.getOwnerAsync(bundle).get().get());
+ assertEquals(lookupServiceAddress1, channel2.getOwnerAsync(bundle).get().get());
+
+ var producer = (Producer) FieldUtils.readDeclaredField(channel1,
+ "producer", true);
+ var spyProducer = spy(producer);
+ var msg = mock(TypedMessageBuilder.class);
+ var future = CompletableFuture.failedFuture(new RuntimeException());
+ doReturn(msg).when(spyProducer).newMessage();
+ doReturn(msg).when(msg).key(any());
+ doReturn(msg).when(msg).value(any());
+ doReturn(future).when(msg).sendAsync();
+ FieldUtils.writeDeclaredField(channel1, "producer", spyProducer, true);
+ FieldUtils.writeDeclaredField(channel1,
+ "inFlightStateWaitingTimeInMillis", 3 * 1000, true);
+ FieldUtils.writeDeclaredField(channel2,
+ "inFlightStateWaitingTimeInMillis", 3 * 1000, true);
+ channel2.publishSplitEventAsync(new Split(bundle, lookupServiceAddress1, null));
+ // channel1 is broken. the split won't be complete.
+ waitUntilState(channel1, bundle);
+ waitUntilState(channel2, bundle);
+ var owner1 = channel1.getOwnerAsync(bundle);
+ var owner2 = channel2.getOwnerAsync(bundle);
+
+
+ // recovered, check the monitor update state : Splitting -> Owned
+ FieldUtils.writeDeclaredField(channel1, "producer", producer, true);
+ FieldUtils.writeDeclaredField(channel1,
+ "inFlightStateWaitingTimeInMillis", 1 , true);
+ FieldUtils.writeDeclaredField(channel2,
+ "inFlightStateWaitingTimeInMillis", 1 , true);
+
+ ((ServiceUnitStateChannelImpl) channel1).monitorOwnerships(
+ List.of(lookupServiceAddress1, lookupServiceAddress2));
+ ((ServiceUnitStateChannelImpl) channel2).monitorOwnerships(
+ List.of(lookupServiceAddress1, lookupServiceAddress2));
+
+
+ waitUntilNewOwner(channel1, bundle, lookupServiceAddress1);
+ waitUntilNewOwner(channel2, bundle, lookupServiceAddress1);
+ var ownerAddr1 = channel1.getOwnerAsync(bundle).get();
+ var ownerAddr2 = channel2.getOwnerAsync(bundle).get();
+
+ assertEquals(ownerAddr1, ownerAddr2);
+ assertEquals(ownerAddr1, Optional.of(lookupServiceAddress1));
+
+ var leader = channel1.isChannelOwnerAsync().get() ? channel1 : channel2;
+ validateMonitorCounters(leader,
+ 0,
+ 0,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0);
+
+ FieldUtils.writeDeclaredField(channel1,
+ "inFlightStateWaitingTimeInMillis", 30 * 1000, true);
+ FieldUtils.writeDeclaredField(channel2,
+ "inFlightStateWaitingTimeInMillis", 30 * 1000, true);
- // TODO: add the channel recovery test when broker registry is added.
+ }
private static ConcurrentOpenHashMap>> getOwnerRequests(
ServiceUnitStateChannel channel) throws IllegalAccessException {
@@ -926,7 +1271,7 @@ private static void waitUntilNewOwner(ServiceUnitStateChannel channel, String se
});
}
- private static void waitUntilNewState(ServiceUnitStateChannel channel, String key)
+ private static void waitUntilState(ServiceUnitStateChannel channel, String key)
throws IllegalAccessException {
TableViewImpl tv = (TableViewImpl)
FieldUtils.readField(channel, "tableview", true);
@@ -943,6 +1288,20 @@ private static void waitUntilNewState(ServiceUnitStateChannel channel, String ke
});
}
+ private static void waitUntilState(ServiceUnitStateChannel channel, String key, ServiceUnitState expected)
+ throws IllegalAccessException {
+ TableViewImpl tv = (TableViewImpl)
+ FieldUtils.readField(channel, "tableview", true);
+ Awaitility.await()
+ .pollInterval(200, TimeUnit.MILLISECONDS)
+ .atMost(10, TimeUnit.SECONDS)
+ .until(() -> { // wait until true
+ ServiceUnitStateData data = tv.get(key);
+ ServiceUnitState actual = state(data);
+ return actual == expected;
+ });
+ }
+
private static void cleanTableView(ServiceUnitStateChannel channel, String serviceUnit)
throws IllegalAccessException {
var tv = (TableViewImpl)
@@ -994,6 +1353,16 @@ private static void cleanOpsCounters(ServiceUnitStateChannel channel)
}
}
+ private void cleanOwnershipMonitorCounters(ServiceUnitStateChannel channel) throws IllegalAccessException {
+ FieldUtils.writeDeclaredField(channel, "totalInactiveBrokerCleanupCnt", 0, true);
+ FieldUtils.writeDeclaredField(channel, "totalServiceUnitTombstoneCleanupCnt", 0, true);
+ FieldUtils.writeDeclaredField(channel, "totalOrphanServiceUnitCleanupCnt", 0, true);
+ FieldUtils.writeDeclaredField(channel, "totalCleanupErrorCnt", new AtomicLong(0), true);
+ FieldUtils.writeDeclaredField(channel, "totalInactiveBrokerCleanupScheduledCnt", 0, true);
+ FieldUtils.writeDeclaredField(channel, "totalInactiveBrokerCleanupIgnoredCnt", 0, true);
+ FieldUtils.writeDeclaredField(channel, "totalInactiveBrokerCleanupCancelledCnt", 0, true);
+ }
+
private static long getCleanupMetric(ServiceUnitStateChannel channel, String metric)
throws IllegalAccessException {
Object var = FieldUtils.readDeclaredField(channel, metric, true);
@@ -1009,7 +1378,9 @@ private static void validateHandlerCounters(ServiceUnitStateChannel channel,
long ownedT, long ownedF,
long releasedT, long releasedF,
long splittingT, long splittingF,
- long freeT, long freeF)
+ long freeT, long freeF,
+ long initT, long initF,
+ long deletedT, long deletedF)
throws IllegalAccessException {
var handlerCounters =
(Map)
@@ -1019,16 +1390,20 @@ private static void validateHandlerCounters(ServiceUnitStateChannel channel,
.pollInterval(200, TimeUnit.MILLISECONDS)
.atMost(10, TimeUnit.SECONDS)
.untilAsserted(() -> { // wait until true
- assertEquals(assignedT, handlerCounters.get(Assigned).getTotal().get());
- assertEquals(assignedF, handlerCounters.get(Assigned).getFailure().get());
+ assertEquals(assignedT, handlerCounters.get(Assigning).getTotal().get());
+ assertEquals(assignedF, handlerCounters.get(Assigning).getFailure().get());
assertEquals(ownedT, handlerCounters.get(Owned).getTotal().get());
assertEquals(ownedF, handlerCounters.get(Owned).getFailure().get());
- assertEquals(releasedT, handlerCounters.get(Released).getTotal().get());
- assertEquals(releasedF, handlerCounters.get(Released).getFailure().get());
+ assertEquals(releasedT, handlerCounters.get(Releasing).getTotal().get());
+ assertEquals(releasedF, handlerCounters.get(Releasing).getFailure().get());
assertEquals(splittingT, handlerCounters.get(Splitting).getTotal().get());
assertEquals(splittingF, handlerCounters.get(Splitting).getFailure().get());
assertEquals(freeT, handlerCounters.get(Free).getTotal().get());
assertEquals(freeF, handlerCounters.get(Free).getFailure().get());
+ assertEquals(initT, handlerCounters.get(Init).getTotal().get());
+ assertEquals(initF, handlerCounters.get(Init).getFailure().get());
+ assertEquals(deletedT, handlerCounters.get(Deleted).getTotal().get());
+ assertEquals(deletedF, handlerCounters.get(Deleted).getFailure().get());
});
}
@@ -1059,7 +1434,10 @@ private static void validateOwnerLookUpCounters(ServiceUnitStateChannel channel,
long owned,
long released,
long splitting,
- long free)
+ long free,
+ long deleted,
+ long init
+ )
throws IllegalAccessException {
var ownerLookUpCounters =
(Map)
@@ -1069,11 +1447,48 @@ private static void validateOwnerLookUpCounters(ServiceUnitStateChannel channel,
.pollInterval(200, TimeUnit.MILLISECONDS)
.atMost(10, TimeUnit.SECONDS)
.untilAsserted(() -> { // wait until true
- assertEquals(assigned, ownerLookUpCounters.get(Assigned).get());
+ assertEquals(assigned, ownerLookUpCounters.get(Assigning).get());
assertEquals(owned, ownerLookUpCounters.get(Owned).get());
- assertEquals(released, ownerLookUpCounters.get(Released).get());
+ assertEquals(released, ownerLookUpCounters.get(Releasing).get());
assertEquals(splitting, ownerLookUpCounters.get(Splitting).get());
assertEquals(free, ownerLookUpCounters.get(Free).get());
+ assertEquals(deleted, ownerLookUpCounters.get(Deleted).get());
+ assertEquals(init, ownerLookUpCounters.get(Init).get());
});
}
+
+ private static void validateMonitorCounters(ServiceUnitStateChannel channel,
+ long totalInactiveBrokerCleanupCnt,
+ long totalServiceUnitTombstoneCleanupCnt,
+ long totalOrphanServiceUnitCleanupCnt,
+ long totalCleanupErrorCnt,
+ long totalInactiveBrokerCleanupScheduledCnt,
+ long totalInactiveBrokerCleanupIgnoredCnt,
+ long totalInactiveBrokerCleanupCancelledCnt)
+ throws IllegalAccessException {
+ assertEquals(totalInactiveBrokerCleanupCnt, getCleanupMetric(channel, "totalInactiveBrokerCleanupCnt"));
+ assertEquals(totalServiceUnitTombstoneCleanupCnt,
+ getCleanupMetric(channel, "totalServiceUnitTombstoneCleanupCnt"));
+ assertEquals(totalOrphanServiceUnitCleanupCnt, getCleanupMetric(channel, "totalOrphanServiceUnitCleanupCnt"));
+ assertEquals(totalCleanupErrorCnt, getCleanupMetric(channel, "totalCleanupErrorCnt"));
+ assertEquals(totalInactiveBrokerCleanupScheduledCnt,
+ getCleanupMetric(channel, "totalInactiveBrokerCleanupScheduledCnt"));
+ assertEquals(totalInactiveBrokerCleanupIgnoredCnt,
+ getCleanupMetric(channel, "totalInactiveBrokerCleanupIgnoredCnt"));
+ assertEquals(totalInactiveBrokerCleanupCancelledCnt,
+ getCleanupMetric(channel, "totalInactiveBrokerCleanupCancelledCnt"));
+ }
+
+ ServiceUnitStateChannelImpl createChannel(PulsarService pulsar)
+ throws IllegalAccessException {
+ var tmpChannel = new ServiceUnitStateChannelImpl(pulsar);
+ FieldUtils.writeDeclaredField(tmpChannel, "ownershipMonitorDelayTimeInSecs", 5, true);
+ var channel = spy(tmpChannel);
+
+ doReturn(loadManagerContext).when(channel).getContext();
+ doReturn(registry).when(channel).getBrokerRegistry();
+ doReturn(brokerSelector).when(channel).getBrokerSelector();
+
+ return channel;
+ }
}
diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateCompactionStrategyTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateCompactionStrategyTest.java
index 49b55f7660a81..1a4aba15f9e6f 100644
--- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateCompactionStrategyTest.java
+++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateCompactionStrategyTest.java
@@ -18,10 +18,12 @@
*/
package org.apache.pulsar.broker.loadbalance.extensions.channel;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Assigned;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Assigning;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Deleted;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Free;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Init;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Owned;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Released;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Releasing;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Splitting;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.testng.Assert.assertTrue;
@@ -36,7 +38,7 @@ ServiceUnitStateData data(ServiceUnitState state) {
}
ServiceUnitStateData data(ServiceUnitState state, String dst) {
- return new ServiceUnitStateData(state, dst, "broker");
+ return new ServiceUnitStateData(state, dst, null);
}
ServiceUnitStateData data(ServiceUnitState state, String src, String dst) {
return new ServiceUnitStateData(state, dst, src);
@@ -45,46 +47,95 @@ ServiceUnitStateData data(ServiceUnitState state, String src, String dst) {
@Test
public void test() throws InterruptedException {
String dst = "dst";
- assertTrue(strategy.shouldKeepLeft(data(Free), data(Free)));
- assertFalse(strategy.shouldKeepLeft(data(Free), data(Assigned)));
- assertTrue(strategy.shouldKeepLeft(data(Free), data(Assigned, "")));
- assertFalse(strategy.shouldKeepLeft(data(Free), data(Owned)));
- assertTrue(strategy.shouldKeepLeft(data(Free), data(Owned, "")));
- assertFalse(strategy.shouldKeepLeft(data(Free), data(Released)));
- assertFalse(strategy.shouldKeepLeft(data(Free), data(Splitting)));
+ String src = "src";
+
+ assertFalse(strategy.shouldKeepLeft(
+ new ServiceUnitStateData(Init, dst),
+ new ServiceUnitStateData(Init, dst, true)));
+
+ assertFalse(strategy.shouldKeepLeft(
+ data(Owned), null));
+
+ assertTrue(strategy.shouldKeepLeft(data(Init), data(Init)));
+ assertFalse(strategy.shouldKeepLeft(data(Init), data(Free)));
+ assertFalse(strategy.shouldKeepLeft(data(Init), data(Assigning)));
+ assertFalse(strategy.shouldKeepLeft(data(Init), data(Owned)));
+ assertFalse(strategy.shouldKeepLeft(data(Init), data(Releasing)));
+ assertFalse(strategy.shouldKeepLeft(data(Init), data(Splitting)));
+ assertFalse(strategy.shouldKeepLeft(data(Init), data(Deleted)));
- assertFalse(strategy.shouldKeepLeft(data(Assigned), data(Free)));
- assertTrue(strategy.shouldKeepLeft(data(Assigned), data(Assigned)));
- assertTrue(strategy.shouldKeepLeft(data(Assigned, "dst2"), data(Owned, dst)));
- assertTrue(strategy.shouldKeepLeft(data(Assigned, "src1", dst), data(Owned, "src2", dst)));
- assertFalse(strategy.shouldKeepLeft(data(Assigned), data(Owned)));
- assertTrue(strategy.shouldKeepLeft(data(Assigned, "dst2"), data(Released, dst)));
- assertTrue(strategy.shouldKeepLeft(data(Assigned, "src1", dst), data(Released, "src2", dst)));
- assertFalse(strategy.shouldKeepLeft(data(Assigned, dst), data(Released, dst)));
- assertTrue(strategy.shouldKeepLeft(data(Assigned), data(Splitting)));
+ assertTrue(strategy.shouldKeepLeft(data(Assigning), data(Init)));
+ assertTrue(strategy.shouldKeepLeft(data(Assigning), data(Free)));
+ assertTrue(strategy.shouldKeepLeft(data(Assigning), data(Assigning)));
+ assertTrue(strategy.shouldKeepLeft(data(Assigning, "dst1"), data(Owned, "dst2")));
+ assertTrue(strategy.shouldKeepLeft(data(Assigning, dst), data(Owned, src, dst)));
+ assertFalse(strategy.shouldKeepLeft(data(Assigning, dst), data(Owned, dst)));
+ assertTrue(strategy.shouldKeepLeft(data(Assigning, src, dst), data(Releasing, dst)));
+ assertTrue(strategy.shouldKeepLeft(data(Assigning, src, "dst1"), data(Releasing, src, "dst2")));
+ assertTrue(strategy.shouldKeepLeft(data(Assigning, "src1", dst), data(Releasing, "src2", dst)));
+ assertFalse(strategy.shouldKeepLeft(data(Assigning, src, dst), data(Releasing, src, dst)));
+ assertTrue(strategy.shouldKeepLeft(data(Assigning), data(Splitting, dst)));
+ assertTrue(strategy.shouldKeepLeft(data(Assigning), data(Deleted, dst)));
- assertFalse(strategy.shouldKeepLeft(data(Owned), data(Free)));
- assertTrue(strategy.shouldKeepLeft(data(Owned), data(Assigned)));
- assertTrue(strategy.shouldKeepLeft(data(Owned), data(Assigned, "")));
- assertTrue(strategy.shouldKeepLeft(data(Owned), data(Assigned, "src", dst)));
- assertFalse(strategy.shouldKeepLeft(data(Owned), data(Assigned, dst)));
+ assertTrue(strategy.shouldKeepLeft(data(Owned), data(Init)));
+ assertTrue(strategy.shouldKeepLeft(data(Owned), data(Free)));
+ assertTrue(strategy.shouldKeepLeft(data(Owned, src, "dst1"), data(Assigning, src, "dst2")));
+ assertTrue(strategy.shouldKeepLeft(data(Owned, src, dst), data(Assigning, dst)));
+ assertTrue(strategy.shouldKeepLeft(data(Owned, src, dst), data(Assigning, src, dst)));
+ assertTrue(strategy.shouldKeepLeft(data(Owned, src, dst), data(Assigning, dst, dst)));
+ assertFalse(strategy.shouldKeepLeft(data(Owned, src, dst), data(Assigning, dst, "dst1")));
assertTrue(strategy.shouldKeepLeft(data(Owned), data(Owned)));
- assertTrue(strategy.shouldKeepLeft(data(Owned), data(Released)));
- assertTrue(strategy.shouldKeepLeft(data(Owned,"dst2"), data(Splitting, dst)));
- assertFalse(strategy.shouldKeepLeft(data(Owned), data(Splitting)));
+ assertTrue(strategy.shouldKeepLeft(data(Owned), data(Releasing, dst)));
+ assertTrue(strategy.shouldKeepLeft(data(Owned, src, "dst1"), data(Releasing, src, "dst2")));
+ assertTrue(strategy.shouldKeepLeft(data(Owned, "dst1"), data(Releasing, "dst2")));
+ assertFalse(strategy.shouldKeepLeft(data(Owned, dst), data(Releasing, dst)));
+ assertFalse(strategy.shouldKeepLeft(data(Owned, src, dst), data(Releasing, dst)));
+ assertTrue(strategy.shouldKeepLeft(data(Owned, src, "dst1"), data(Splitting, src, "dst2")));
+ assertTrue(strategy.shouldKeepLeft(data(Owned, "dst1"), data(Splitting, "dst2")));
+ assertFalse(strategy.shouldKeepLeft(data(Owned, dst), data(Splitting, dst)));
+ assertFalse(strategy.shouldKeepLeft(data(Owned, src, dst), data(Splitting, dst)));
+ assertTrue(strategy.shouldKeepLeft(data(Owned), data(Deleted, dst)));
- assertFalse(strategy.shouldKeepLeft(data(Released), data(Free)));
- assertTrue(strategy.shouldKeepLeft(data(Released), data(Assigned)));
- assertTrue(strategy.shouldKeepLeft(data(Released, "dst2"), data(Owned, dst)));
- assertTrue(strategy.shouldKeepLeft(data(Released, "src1", dst), data(Owned, "src2", dst)));
- assertFalse(strategy.shouldKeepLeft(data(Released), data(Owned)));
- assertTrue(strategy.shouldKeepLeft(data(Released), data(Released)));
- assertTrue(strategy.shouldKeepLeft(data(Released), data(Splitting)));
+ assertTrue(strategy.shouldKeepLeft(data(Releasing), data(Init)));
+ assertFalse(strategy.shouldKeepLeft(data(Releasing), data(Free)));
+ assertTrue(strategy.shouldKeepLeft(data(Releasing, "dst1"), data(Free, "dst2")));
+ assertTrue(strategy.shouldKeepLeft(data(Releasing, "src1", dst), data(Free, "src2", dst)));
+ assertTrue(strategy.shouldKeepLeft(data(Releasing), data(Assigning)));
+ assertTrue(strategy.shouldKeepLeft(data(Releasing, "dst1"), data(Owned, "dst2")));
+ assertTrue(strategy.shouldKeepLeft(data(Releasing, src, "dst1"), data(Owned, src, "dst2")));
+ assertTrue(strategy.shouldKeepLeft(data(Releasing, "src1", dst), data(Owned, "src2", dst)));
+ assertFalse(strategy.shouldKeepLeft(data(Releasing, src, dst), data(Owned, src, dst)));
+ assertTrue(strategy.shouldKeepLeft(data(Releasing), data(Releasing)));
+ assertTrue(strategy.shouldKeepLeft(data(Releasing), data(Splitting)));
+ assertTrue(strategy.shouldKeepLeft(data(Releasing), data(Deleted, dst)));
- assertFalse(strategy.shouldKeepLeft(data(Splitting), data(Free)));
- assertTrue(strategy.shouldKeepLeft(data(Splitting), data(Assigned)));
+ assertTrue(strategy.shouldKeepLeft(data(Splitting), data(Init)));
+ assertTrue(strategy.shouldKeepLeft(data(Splitting), data(Free)));
+ assertTrue(strategy.shouldKeepLeft(data(Splitting), data(Assigning)));
assertTrue(strategy.shouldKeepLeft(data(Splitting), data(Owned)));
- assertTrue(strategy.shouldKeepLeft(data(Splitting), data(Released)));
+ assertTrue(strategy.shouldKeepLeft(data(Splitting), data(Releasing)));
assertTrue(strategy.shouldKeepLeft(data(Splitting), data(Splitting)));
+ assertTrue(strategy.shouldKeepLeft(data(Splitting, src, "dst1"), data(Deleted, src, "dst2")));
+ assertTrue(strategy.shouldKeepLeft(data(Splitting, "dst1"), data(Deleted, "dst2")));
+ assertTrue(strategy.shouldKeepLeft(data(Splitting, "src1", dst), data(Deleted, "src2", dst)));
+ assertFalse(strategy.shouldKeepLeft(data(Splitting, dst), data(Deleted, dst)));
+ assertFalse(strategy.shouldKeepLeft(data(Splitting, src, dst), data(Deleted, src, dst)));
+
+ assertFalse(strategy.shouldKeepLeft(data(Deleted), data(Init)));
+ assertTrue(strategy.shouldKeepLeft(data(Deleted), data(Free)));
+ assertTrue(strategy.shouldKeepLeft(data(Deleted), data(Assigning)));
+ assertTrue(strategy.shouldKeepLeft(data(Deleted), data(Owned)));
+ assertTrue(strategy.shouldKeepLeft(data(Deleted), data(Releasing)));
+ assertTrue(strategy.shouldKeepLeft(data(Deleted), data(Splitting)));
+ assertTrue(strategy.shouldKeepLeft(data(Deleted), data(Deleted)));
+
+ assertFalse(strategy.shouldKeepLeft(data(Free), data(Init)));
+ assertTrue(strategy.shouldKeepLeft(data(Free), data(Free)));
+ assertFalse(strategy.shouldKeepLeft(data(Free), data(Assigning)));
+ assertTrue(strategy.shouldKeepLeft(data(Free), data(Assigning, src, dst)));
+ assertTrue(strategy.shouldKeepLeft(data(Free), data(Owned)));
+ assertTrue(strategy.shouldKeepLeft(data(Free), data(Releasing)));
+ assertTrue(strategy.shouldKeepLeft(data(Free), data(Splitting)));
+ assertTrue(strategy.shouldKeepLeft(data(Free), data(Deleted)));
}
}
diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateDataTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateDataTest.java
index 7b9afee9ce2d1..9617c8a8c2bd0 100644
--- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateDataTest.java
+++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateDataTest.java
@@ -18,7 +18,7 @@
*/
package org.apache.pulsar.broker.loadbalance.extensions.channel;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Assigned;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Assigning;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Owned;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNull;
@@ -41,8 +41,8 @@ public void testConstructors() throws InterruptedException {
Thread.sleep(10);
- ServiceUnitStateData data2 = new ServiceUnitStateData(Assigned, "A", "B");
- assertEquals(data2.state(), Assigned);
+ ServiceUnitStateData data2 = new ServiceUnitStateData(Assigning, "A", "B");
+ assertEquals(data2.state(), Assigning);
assertEquals(data2.broker(), "A");
assertEquals(data2.sourceBroker(), "B");
assertThat(data2.timestamp()).isGreaterThan(data1.timestamp());
@@ -53,15 +53,20 @@ public void testNullState() {
new ServiceUnitStateData(null, "A");
}
- @Test(expectedExceptions = NullPointerException.class)
+ @Test(expectedExceptions = IllegalArgumentException.class)
public void testNullBroker() {
new ServiceUnitStateData(Owned, null);
}
+ @Test(expectedExceptions = IllegalArgumentException.class)
+ public void testEmptyBroker() {
+ new ServiceUnitStateData(Owned, "");
+ }
+
@Test
public void jsonWriteAndReadTest() throws JsonProcessingException {
ObjectMapper mapper = ObjectMapperFactory.create();
- final ServiceUnitStateData src = new ServiceUnitStateData(Assigned, "A", "B");
+ final ServiceUnitStateData src = new ServiceUnitStateData(Assigning, "A", "B");
String json = mapper.writeValueAsString(src);
ServiceUnitStateData dst = mapper.readValue(json, ServiceUnitStateData.class);
assertEquals(dst, src);
diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateTest.java
index 69e6a2d204c0e..f5f1fe7bc575f 100644
--- a/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateTest.java
+++ b/pulsar-broker/src/test/java/org/apache/pulsar/broker/loadbalance/extensions/channel/ServiceUnitStateTest.java
@@ -18,10 +18,12 @@
*/
package org.apache.pulsar.broker.loadbalance.extensions.channel;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Assigned;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Assigning;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Deleted;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Free;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Init;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Owned;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Released;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Releasing;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Splitting;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
@@ -29,39 +31,75 @@
@Test(groups = "broker")
public class ServiceUnitStateTest {
+ @Test
+ public void testInFlights() {
+ assertFalse(ServiceUnitState.isInFlightState(Init));
+ assertFalse(ServiceUnitState.isInFlightState(Free));
+ assertFalse(ServiceUnitState.isInFlightState(Owned));
+ assertTrue(ServiceUnitState.isInFlightState(Assigning));
+ assertTrue(ServiceUnitState.isInFlightState(Releasing));
+ assertTrue(ServiceUnitState.isInFlightState(Splitting));
+ assertFalse(ServiceUnitState.isInFlightState(Deleted));
+ }
@Test
public void testTransitions() {
+ assertFalse(ServiceUnitState.isValidTransition(Init, Init));
+ assertTrue(ServiceUnitState.isValidTransition(Init, Free));
+ assertTrue(ServiceUnitState.isValidTransition(Init, Owned));
+ assertTrue(ServiceUnitState.isValidTransition(Init, Assigning));
+ assertTrue(ServiceUnitState.isValidTransition(Init, Releasing));
+ assertTrue(ServiceUnitState.isValidTransition(Init, Splitting));
+ assertTrue(ServiceUnitState.isValidTransition(Init, Deleted));
+
+ assertTrue(ServiceUnitState.isValidTransition(Free, Init));
assertFalse(ServiceUnitState.isValidTransition(Free, Free));
- assertTrue(ServiceUnitState.isValidTransition(Free, Assigned));
- assertTrue(ServiceUnitState.isValidTransition(Free, Owned));
- assertTrue(ServiceUnitState.isValidTransition(Free, Released));
- assertTrue(ServiceUnitState.isValidTransition(Free, Splitting));
+ assertFalse(ServiceUnitState.isValidTransition(Free, Owned));
+ assertTrue(ServiceUnitState.isValidTransition(Free, Assigning));
+ assertFalse(ServiceUnitState.isValidTransition(Free, Releasing));
+ assertFalse(ServiceUnitState.isValidTransition(Free, Splitting));
+ assertFalse(ServiceUnitState.isValidTransition(Free, Deleted));
- assertTrue(ServiceUnitState.isValidTransition(Assigned, Free));
- assertFalse(ServiceUnitState.isValidTransition(Assigned, Assigned));
- assertTrue(ServiceUnitState.isValidTransition(Assigned, Owned));
- assertTrue(ServiceUnitState.isValidTransition(Assigned, Released));
- assertFalse(ServiceUnitState.isValidTransition(Assigned, Splitting));
+ assertFalse(ServiceUnitState.isValidTransition(Assigning, Init));
+ assertFalse(ServiceUnitState.isValidTransition(Assigning, Free));
+ assertFalse(ServiceUnitState.isValidTransition(Assigning, Assigning));
+ assertTrue(ServiceUnitState.isValidTransition(Assigning, Owned));
+ assertTrue(ServiceUnitState.isValidTransition(Assigning, Releasing));
+ assertFalse(ServiceUnitState.isValidTransition(Assigning, Splitting));
+ assertFalse(ServiceUnitState.isValidTransition(Assigning, Deleted));
- assertTrue(ServiceUnitState.isValidTransition(Owned, Free));
- assertTrue(ServiceUnitState.isValidTransition(Owned, Assigned));
+ assertFalse(ServiceUnitState.isValidTransition(Owned, Init));
+ assertFalse(ServiceUnitState.isValidTransition(Owned, Free));
+ assertTrue(ServiceUnitState.isValidTransition(Owned, Assigning));
assertFalse(ServiceUnitState.isValidTransition(Owned, Owned));
- assertFalse(ServiceUnitState.isValidTransition(Owned, Released));
+ assertTrue(ServiceUnitState.isValidTransition(Owned, Releasing));
assertTrue(ServiceUnitState.isValidTransition(Owned, Splitting));
+ assertFalse(ServiceUnitState.isValidTransition(Owned, Deleted));
- assertTrue(ServiceUnitState.isValidTransition(Released, Free));
- assertFalse(ServiceUnitState.isValidTransition(Released, Assigned));
- assertTrue(ServiceUnitState.isValidTransition(Released, Owned));
- assertFalse(ServiceUnitState.isValidTransition(Released, Released));
- assertFalse(ServiceUnitState.isValidTransition(Released, Splitting));
+ assertFalse(ServiceUnitState.isValidTransition(Releasing, Init));
+ assertTrue(ServiceUnitState.isValidTransition(Releasing, Free));
+ assertFalse(ServiceUnitState.isValidTransition(Releasing, Assigning));
+ assertTrue(ServiceUnitState.isValidTransition(Releasing, Owned));
+ assertFalse(ServiceUnitState.isValidTransition(Releasing, Releasing));
+ assertFalse(ServiceUnitState.isValidTransition(Releasing, Splitting));
+ assertFalse(ServiceUnitState.isValidTransition(Releasing, Deleted));
- assertTrue(ServiceUnitState.isValidTransition(Splitting, Free));
- assertFalse(ServiceUnitState.isValidTransition(Splitting, Assigned));
+ assertFalse(ServiceUnitState.isValidTransition(Splitting, Init));
+ assertFalse(ServiceUnitState.isValidTransition(Splitting, Free));
+ assertFalse(ServiceUnitState.isValidTransition(Splitting, Assigning));
assertFalse(ServiceUnitState.isValidTransition(Splitting, Owned));
- assertFalse(ServiceUnitState.isValidTransition(Splitting, Released));
+ assertFalse(ServiceUnitState.isValidTransition(Splitting, Releasing));
assertFalse(ServiceUnitState.isValidTransition(Splitting, Splitting));
+ assertTrue(ServiceUnitState.isValidTransition(Splitting, Deleted));
+
+ assertTrue(ServiceUnitState.isValidTransition(Deleted, Init));
+ assertFalse(ServiceUnitState.isValidTransition(Deleted, Free));
+ assertFalse(ServiceUnitState.isValidTransition(Deleted, Assigning));
+ assertFalse(ServiceUnitState.isValidTransition(Deleted, Owned));
+ assertFalse(ServiceUnitState.isValidTransition(Deleted, Releasing));
+ assertFalse(ServiceUnitState.isValidTransition(Deleted, Splitting));
+ assertFalse(ServiceUnitState.isValidTransition(Deleted, Deleted));
}
}
\ No newline at end of file
diff --git a/pulsar-broker/src/test/java/org/apache/pulsar/compaction/ServiceUnitStateCompactionTest.java b/pulsar-broker/src/test/java/org/apache/pulsar/compaction/ServiceUnitStateCompactionTest.java
index 41eaa640d28db..4c1d4f7d2a89d 100644
--- a/pulsar-broker/src/test/java/org/apache/pulsar/compaction/ServiceUnitStateCompactionTest.java
+++ b/pulsar-broker/src/test/java/org/apache/pulsar/compaction/ServiceUnitStateCompactionTest.java
@@ -18,11 +18,14 @@
*/
package org.apache.pulsar.compaction;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Free;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Deleted;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Init;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Owned;
-import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Assigned;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Assigning;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Releasing;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.Splitting;
import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitState.isValidTransition;
+import static org.apache.pulsar.broker.loadbalance.extensions.channel.ServiceUnitStateData.state;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertNull;
@@ -42,7 +45,9 @@
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
import org.apache.bookkeeper.client.BookKeeper;
import org.apache.commons.lang.reflect.FieldUtils;
@@ -81,55 +86,36 @@ public class ServiceUnitStateCompactionTest extends MockedPulsarServiceBaseTest
private Schema schema;
private ServiceUnitStateCompactionStrategy strategy;
- private ServiceUnitState testState0 = Free;
- private ServiceUnitState testState1 = Free;
- private ServiceUnitState testState2 = Free;
- private ServiceUnitState testState3 = Free;
- private ServiceUnitState testState4 = Free;
+ private ServiceUnitState testState = Init;
private static Random RANDOM = new Random();
private ServiceUnitStateData testValue(ServiceUnitState state, String broker) {
- if (state == Free) {
+ if (state == Init) {
return null;
}
return new ServiceUnitStateData(state, broker);
}
- private ServiceUnitStateData testValue0(String broker) {
- ServiceUnitState to = nextValidState(testState0);
- testState0 = to;
+ private ServiceUnitStateData testValue(String broker) {
+ ServiceUnitState to = nextValidStateNonSplit(testState);
+ testState = to;
return testValue(to, broker);
}
- private ServiceUnitStateData testValue1(String broker) {
- ServiceUnitState to = nextValidState(testState1);
- testState1 = to;
- return testValue(to, broker);
- }
-
- private ServiceUnitStateData testValue2(String broker) {
- ServiceUnitState to = nextValidState(testState2);
- testState2 = to;
- return testValue(to, broker);
- }
-
- private ServiceUnitStateData testValue3(String broker) {
- ServiceUnitState to = nextValidState(testState3);
- testState3 = to;
- return testValue(to, broker);
- }
-
- private ServiceUnitStateData testValue4(String broker) {
- ServiceUnitState to = nextValidState(testState4);
- testState4 = to;
- return testValue(to, broker);
+ private ServiceUnitState nextValidState(ServiceUnitState from) {
+ List candidates = Arrays.stream(ServiceUnitState.values())
+ .filter(to -> isValidTransition(from, to))
+ .collect(Collectors.toList());
+ var state= candidates.get(RANDOM.nextInt(candidates.size()));
+ return state;
}
- private ServiceUnitState nextValidState(ServiceUnitState from) {
+ private ServiceUnitState nextValidStateNonSplit(ServiceUnitState from) {
List candidates = Arrays.stream(ServiceUnitState.values())
- .filter(to -> to != Free && to != Splitting && isValidTransition(from, to))
+ .filter(to -> to != Init && to != Splitting && to != Deleted
+ && isValidTransition(from, to))
.collect(Collectors.toList());
var state= candidates.get(RANDOM.nextInt(candidates.size()));
return state;
@@ -140,23 +126,11 @@ private ServiceUnitState nextInvalidState(ServiceUnitState from) {
.filter(to -> !isValidTransition(from, to))
.collect(Collectors.toList());
if (candidates.size() == 0) {
- return null;
+ return Init;
}
return candidates.get(RANDOM.nextInt(candidates.size()));
}
- private List nextStatesToNull(ServiceUnitState from) {
- if (from == null) {
- return List.of();
- }
- return switch (from) {
- case Assigned -> List.of(Owned);
- case Owned -> List.of();
- case Splitting -> List.of();
- default -> List.of();
- };
- }
-
@BeforeMethod
@Override
public void setup() throws Exception {
@@ -174,6 +148,7 @@ public void setup() throws Exception {
strategy = new ServiceUnitStateCompactionStrategy();
strategy.checkBrokers(false);
+ testState = Init;
}
@@ -222,10 +197,21 @@ TestData generateTestData() throws PulsarAdminException, PulsarClientException {
int keyIndex = r.nextInt(maxKeys);
String key = "key" + keyIndex;
ServiceUnitStateData prev = expected.get(key);
- ServiceUnitState prevState = prev == null ? Free : prev.state();
- ServiceUnitState state = r.nextBoolean() ? nextInvalidState(prevState) :
+ ServiceUnitState prevState = state(prev);
+ boolean invalid = r.nextBoolean();
+ ServiceUnitState state = invalid ? nextInvalidState(prevState) :
nextValidState(prevState);
- ServiceUnitStateData value = new ServiceUnitStateData(state, key + ":" + j);
+ ServiceUnitStateData value;
+ if (invalid) {
+ value = new ServiceUnitStateData(state, key + ":" + j, false);
+ } else {
+ if (state == Init) {
+ value = new ServiceUnitStateData(state, key + ":" + j, true);
+ } else {
+ value = new ServiceUnitStateData(state, key + ":" + j, false);
+ }
+ }
+
producer.newMessage().key(key).value(value).send();
if (!strategy.shouldKeepLeft(prev, value)) {
expected.put(key, value);
@@ -387,24 +373,26 @@ public void testReadCompactedBeforeCompaction() throws Exception {
.create();
pulsarClient.newConsumer(schema).topic(topic).subscriptionName("sub1").readCompacted(true).subscribe().close();
-
- producer.newMessage().key("key0").value(testValue0( "content0")).send();
- producer.newMessage().key("key0").value(testValue0("content1")).send();
- producer.newMessage().key("key0").value(testValue0( "content2")).send();
+ String key = "key0";
+ var testValues = Arrays.asList(
+ testValue("content0"), testValue("content1"), testValue("content2"));
+ for (var val : testValues) {
+ producer.newMessage().key(key).value(val).send();
+ }
try (Consumer consumer = pulsarClient.newConsumer(schema).topic(topic).subscriptionName("sub1")
.readCompacted(true).subscribe()) {
Message m = consumer.receive();
- Assert.assertEquals(m.getKey(), "key0");
- Assert.assertEquals(m.getValue().broker(), "content0");
+ Assert.assertEquals(m.getKey(), key);
+ Assert.assertEquals(m.getValue(), testValues.get(0));
m = consumer.receive();
- Assert.assertEquals(m.getKey(), "key0");
- Assert.assertEquals(m.getValue().broker(), "content1");
+ Assert.assertEquals(m.getKey(), key);
+ Assert.assertEquals(m.getValue(), testValues.get(1));
m = consumer.receive();
- Assert.assertEquals(m.getKey(), "key0");
- Assert.assertEquals(m.getValue().broker(), "content2");
+ Assert.assertEquals(m.getKey(), key);
+ Assert.assertEquals(m.getValue(), testValues.get(2));
}
StrategicTwoPhaseCompactor compactor
@@ -414,8 +402,8 @@ public void testReadCompactedBeforeCompaction() throws Exception {
try (Consumer consumer = pulsarClient.newConsumer(schema).topic(topic).subscriptionName("sub1")
.readCompacted(true).subscribe()) {
Message m = consumer.receive();
- Assert.assertEquals(m.getKey(), "key0");
- Assert.assertEquals(m.getValue().broker(), "content2");
+ Assert.assertEquals(m.getKey(), key);
+ Assert.assertEquals(m.getValue(), testValues.get(2));
}
}
@@ -430,30 +418,37 @@ public void testReadEntriesAfterCompaction() throws Exception {
pulsarClient.newConsumer(schema).topic(topic).subscriptionName("sub1").readCompacted(true).subscribe().close();
- producer.newMessage().key("key0").value(testValue0( "content0")).send();
- producer.newMessage().key("key0").value(testValue0("content1")).send();
- producer.newMessage().key("key0").value(testValue0( "content2")).send();
+ String key = "key0";
+ var testValues = Arrays.asList(
+ testValue( "content0"),
+ testValue("content1"),
+ testValue( "content2"),
+ testValue("content3"));
+ producer.newMessage().key(key).value(testValues.get(0)).send();
+ producer.newMessage().key(key).value(testValues.get(1)).send();
+ producer.newMessage().key(key).value(testValues.get(2)).send();
StrategicTwoPhaseCompactor compactor
= new StrategicTwoPhaseCompactor(conf, pulsarClient, bk, compactionScheduler);
compactor.compact(topic, strategy).get();
- producer.newMessage().key("key0").value(testValue0("content3")).send();
+ producer.newMessage().key(key).value(testValues.get(3)).send();
try (Consumer consumer = pulsarClient.newConsumer(schema).topic(topic).subscriptionName("sub1")
.readCompacted(true).subscribe()) {
Message m = consumer.receive();
- Assert.assertEquals(m.getKey(), "key0");
- Assert.assertEquals(m.getValue().broker(), "content2");
+ Assert.assertEquals(m.getKey(), key);
+ Assert.assertEquals(m.getValue(), testValues.get(2));
m = consumer.receive();
- Assert.assertEquals(m.getKey(), "key0");
- Assert.assertEquals(m.getValue().broker(), "content3");
+ Assert.assertEquals(m.getKey(), key);
+ Assert.assertEquals(m.getValue(), testValues.get(3));
}
}
@Test
public void testSeekEarliestAfterCompaction() throws Exception {
+
String topic = "persistent://my-property/use/my-ns/my-topic1";
Producer producer = pulsarClient.newProducer(schema)
@@ -461,9 +456,14 @@ public void testSeekEarliestAfterCompaction() throws Exception {
.enableBatching(true)
.create();
- producer.newMessage().key("key0").value(testValue0( "content0")).send();
- producer.newMessage().key("key0").value(testValue0("content1")).send();
- producer.newMessage().key("key0").value(testValue0( "content2")).send();
+ String key = "key0";
+ var testValues = Arrays.asList(
+ testValue("content0"),
+ testValue("content1"),
+ testValue("content2"));
+ for (var val : testValues) {
+ producer.newMessage().key(key).value(val).send();
+ }
StrategicTwoPhaseCompactor compactor
= new StrategicTwoPhaseCompactor(conf, pulsarClient, bk, compactionScheduler);
@@ -473,8 +473,8 @@ public void testSeekEarliestAfterCompaction() throws Exception {
.readCompacted(true).subscribe()) {
consumer.seek(MessageId.earliest);
Message m = consumer.receive();
- Assert.assertEquals(m.getKey(), "key0");
- Assert.assertEquals(m.getValue().broker(), "content2");
+ Assert.assertEquals(m.getKey(), key);
+ Assert.assertEquals(m.getValue(), testValues.get(2));
}
try (Consumer consumer = pulsarClient.newConsumer(schema).topic(topic).subscriptionName("sub1")
@@ -482,34 +482,153 @@ public void testSeekEarliestAfterCompaction() throws Exception {
consumer.seek(MessageId.earliest);
Message m = consumer.receive();
- Assert.assertEquals(m.getKey(), "key0");
- Assert.assertEquals(m.getValue().broker(), "content0");
+ Assert.assertEquals(m.getKey(), key);
+ Assert.assertEquals(m.getValue(), testValues.get(0));
m = consumer.receive();
- Assert.assertEquals(m.getKey(), "key0");
- Assert.assertEquals(m.getValue().broker(), "content1");
+ Assert.assertEquals(m.getKey(), key);
+ Assert.assertEquals(m.getValue(), testValues.get(1));
m = consumer.receive();
- Assert.assertEquals(m.getKey(), "key0");
- Assert.assertEquals(m.getValue().broker(), "content2");
+ Assert.assertEquals(m.getKey(), key);
+ Assert.assertEquals(m.getValue(), testValues.get(2));
}
}
@Test
- public void testBrokerRestartAfterCompaction() throws Exception {
+ public void testSlowTableviewAfterCompaction() throws Exception {
String topic = "persistent://my-property/use/my-ns/my-topic1";
+ String strategyClassName = "topicCompactionStrategyClassName";
+ strategy.checkBrokers(true);
+
+ pulsarClient.newConsumer(schema)
+ .topic(topic)
+ .subscriptionName("sub1")
+ .readCompacted(true)
+ .subscribe().close();
+
+ var fastTV = pulsar.getClient().newTableViewBuilder(schema)
+ .topic(topic)
+ .subscriptionName("fastTV")
+ .loadConf(Map.of(
+ strategyClassName,
+ ServiceUnitStateCompactionStrategy.class.getName()))
+ .create();
+
+ var defaultConf = getDefaultConf();
+ var additionalPulsarTestContext = createAdditionalPulsarTestContext(defaultConf);
+ var pulsar2 = additionalPulsarTestContext.getPulsarService();
+
+ var slowTV = pulsar2.getClient().newTableViewBuilder(schema)
+ .topic(topic)
+ .subscriptionName("slowTV")
+ .loadConf(Map.of(
+ strategyClassName,
+ ServiceUnitStateCompactionStrategy.class.getName()))
+ .create();
+
+ var semaphore = new Semaphore(0);
+ AtomicBoolean handledReleased = new AtomicBoolean(false);
+
+ slowTV.listen((k, v) -> {
+ if (v.state() == Assigning) {
+ try {
+ // Stuck at handling Assigned
+ handledReleased.set(false);
+ semaphore.acquire();
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ } else if (v.state() == Releasing) {
+ handledReleased.set(true);
+ }
+ });
+
+ // Configure retention to ensue data is retained for reader
+ admin.namespaces().setRetention("my-property/use/my-ns",
+ new RetentionPolicies(-1, -1));
Producer producer = pulsarClient.newProducer(schema)
.topic(topic)
.enableBatching(true)
+ .messageRoutingMode(MessageRoutingMode.SinglePartition)
.create();
- pulsarClient.newConsumer(schema).topic(topic).subscriptionName("sub1").readCompacted(true).subscribe().close();
+ StrategicTwoPhaseCompactor compactor
+ = new StrategicTwoPhaseCompactor(conf, pulsarClient, bk, compactionScheduler);
+
+ String bundle = "bundle1";
+ String src = "broker0";
+ String dst = "broker1";
+ producer.newMessage().key(bundle).value(new ServiceUnitStateData(Owned, src)).send();
+ for (int i = 0; i < 3; i++) {
+ var assignedStateData = new ServiceUnitStateData(Assigning, dst, src);
+ producer.newMessage().key(bundle).value(assignedStateData).send();
+ producer.newMessage().key(bundle).value(assignedStateData).send();
+ var releasedStateData = new ServiceUnitStateData(Releasing, dst, src);
+ producer.newMessage().key(bundle).value(releasedStateData).send();
+ producer.newMessage().key(bundle).value(releasedStateData).send();
+ var ownedStateData = new ServiceUnitStateData(Owned, dst, src);
+ producer.newMessage().key(bundle).value(ownedStateData).send();
+ producer.newMessage().key(bundle).value(ownedStateData).send();
+ compactor.compact(topic, strategy).get();
+
+ Awaitility.await()
+ .pollInterval(200, TimeUnit.MILLISECONDS)
+ .atMost(10, TimeUnit.SECONDS)
+ .untilAsserted(() -> assertEquals(fastTV.get(bundle), ownedStateData));
+
+ Awaitility.await()
+ .pollInterval(200, TimeUnit.MILLISECONDS)
+ .atMost(10, TimeUnit.SECONDS)
+ .untilAsserted(() -> assertEquals(slowTV.get(bundle), assignedStateData));
+ assertTrue(!handledReleased.get());
+ semaphore.release();
+
+ Awaitility.await()
+ .pollInterval(200, TimeUnit.MILLISECONDS)
+ .atMost(10, TimeUnit.SECONDS)
+ .untilAsserted(() -> assertEquals(slowTV.get(bundle), ownedStateData));
+
+ var newTv = pulsar.getClient().newTableView(schema)
+ .topic(topic)
+ .loadConf(Map.of(
+ strategyClassName,
+ ServiceUnitStateCompactionStrategy.class.getName()))
+ .create();
+ Awaitility.await()
+ .pollInterval(200, TimeUnit.MILLISECONDS)
+ .atMost(10, TimeUnit.SECONDS)
+ .untilAsserted(() -> assertEquals(newTv.get(bundle), ownedStateData));
+
+ src = dst;
+ dst = "broker" + (i + 2);
+ newTv.close();
+ }
+
+ producer.close();
+ slowTV.close();
+ fastTV.close();
+ pulsar2.close();
+
+ }
- producer.newMessage().key("key0").value(testValue0( "content0")).send();
- producer.newMessage().key("key0").value(testValue0("content1")).send();
- producer.newMessage().key("key0").value(testValue0( "content2")).send();
+ @Test
+ public void testBrokerRestartAfterCompaction() throws Exception {
+ String topic = "persistent://my-property/use/my-ns/my-topic1";
+
+ Producer producer = pulsarClient.newProducer(schema)
+ .topic(topic)
+ .enableBatching(true)
+ .create();
+ String key = "key0";
+ pulsarClient.newConsumer(schema).topic(topic).subscriptionName("sub1").readCompacted(true).subscribe().close();
+ var testValues = Arrays.asList(
+ testValue("content0"), testValue("content1"), testValue("content2"));
+ for (var val : testValues) {
+ producer.newMessage().key(key).value(val).send();
+ }
StrategicTwoPhaseCompactor compactor
= new StrategicTwoPhaseCompactor(conf, pulsarClient, bk, compactionScheduler);
compactor.compact(topic, strategy).get();
@@ -517,8 +636,8 @@ public void testBrokerRestartAfterCompaction() throws Exception {
try (Consumer consumer = pulsarClient.newConsumer(schema).topic(topic).subscriptionName("sub1")
.readCompacted(true).subscribe()) {
Message m = consumer.receive();
- Assert.assertEquals(m.getKey(), "key0");
- Assert.assertEquals(m.getValue().broker(), "content2");
+ Assert.assertEquals(m.getKey(), key);
+ Assert.assertEquals(m.getValue(), testValues.get(testValues.size() - 1));
}
stopBroker();
@@ -534,8 +653,8 @@ public void testBrokerRestartAfterCompaction() throws Exception {
try (Consumer consumer = pulsarClient.newConsumer(schema).topic(topic).subscriptionName("sub1")
.readCompacted(true).subscribe()) {
Message m = consumer.receive();
- Assert.assertEquals(m.getKey(), "key0");
- Assert.assertEquals(m.getValue().broker(), "content2");
+ Assert.assertEquals(m.getKey(), key);
+ Assert.assertEquals(m.getValue(), testValues.get(testValues.size() - 1));
}
}
@@ -554,13 +673,14 @@ public void testCompactEmptyTopic() throws Exception {
= new StrategicTwoPhaseCompactor(conf, pulsarClient, bk, compactionScheduler);
compactor.compact(topic, strategy).get();
- producer.newMessage().key("key0").value(testValue0( "content0")).send();
+ var testValue = testValue( "content0");
+ producer.newMessage().key("key0").value(testValue).send();
try (Consumer consumer = pulsarClient.newConsumer(schema).topic(topic).subscriptionName("sub1")
.readCompacted(true).subscribe()) {
Message m = consumer.receive();
Assert.assertEquals(m.getKey(), "key0");
- Assert.assertEquals(m.getValue().broker(), "content0");
+ Assert.assertEquals(m.getValue(), testValue);
}
}
@@ -583,10 +703,10 @@ public void testWholeBatchCompactedOut() throws Exception {
.batchingMaxPublishDelay(1, TimeUnit.HOURS)
.messageRoutingMode(MessageRoutingMode.SinglePartition)
.create()) {
- producerBatch.newMessage().key("key1").value(testValue1("my-message-1")).sendAsync();
- producerBatch.newMessage().key("key1").value(testValue1( "my-message-2")).sendAsync();
- producerBatch.newMessage().key("key1").value(testValue1("my-message-3")).sendAsync();
- producerNormal.newMessage().key("key1").value(testValue1( "my-message-4")).send();
+ producerBatch.newMessage().key("key1").value(testValue("my-message-1")).sendAsync();
+ producerBatch.newMessage().key("key1").value(testValue( "my-message-2")).sendAsync();
+ producerBatch.newMessage().key("key1").value(testValue("my-message-3")).sendAsync();
+ producerNormal.newMessage().key("key1").value(testValue( "my-message-4")).send();
}
// compact the topic
@@ -610,9 +730,9 @@ public void testCompactionWithLastDeletedKey() throws Exception {
pulsarClient.newConsumer(schema).topic(topic).subscriptionName("sub1").readCompacted(true).subscribe().close();
- producer.newMessage().key("1").value(testValue(Owned, "1")).send();
- producer.newMessage().key("2").value(testValue(Owned, "3")).send();
- producer.newMessage().key("3").value(testValue(Owned, "5")).send();
+ producer.newMessage().key("1").value(testValue("1")).send();
+ producer.newMessage().key("2").value(testValue("3")).send();
+ producer.newMessage().key("3").value(testValue( "5")).send();
producer.newMessage().key("1").value(null).send();
producer.newMessage().key("2").value(null).send();
@@ -707,7 +827,7 @@ public void testCompactMultipleTimesWithoutEmptyMessage()
List> futures = new ArrayList<>(messages);
for (int i = 0; i < messages; i++) {
- futures.add(producer.newMessage().key(key).value(testValue0((i + ""))).sendAsync());
+ futures.add(producer.newMessage().key(key).value(testValue((i + ""))).sendAsync());
}
FutureUtil.waitForAll(futures).get();
@@ -720,7 +840,7 @@ public void testCompactMultipleTimesWithoutEmptyMessage()
// 3. Send more ten messages
futures.clear();
for (int i = 0; i < messages; i++) {
- futures.add(producer.newMessage().key(key).value(testValue0((i + 10 + ""))).sendAsync());
+ futures.add(producer.newMessage().key(key).value(testValue((i + 10 + ""))).sendAsync());
}
FutureUtil.waitForAll(futures).get();
@@ -754,7 +874,7 @@ public void testReadUnCompacted()
List> futures = new ArrayList<>(messages);
for (int i = 0; i < messages; i++) {
- futures.add(producer.newMessage().key(key).value(testValue0((i + ""))).sendAsync());
+ futures.add(producer.newMessage().key(key).value(testValue((i + ""))).sendAsync());
}
FutureUtil.waitForAll(futures).get();
@@ -767,7 +887,7 @@ public void testReadUnCompacted()
// 3. Send more ten messages
futures.clear();
for (int i = 0; i < messages; i++) {
- futures.add(producer.newMessage().key(key).value(testValue0((i + 10 + ""))).sendAsync());
+ futures.add(producer.newMessage().key(key).value(testValue((i + 10 + ""))).sendAsync());
}
FutureUtil.waitForAll(futures).get();
try (Consumer consumer = pulsarClient.newConsumer(schema)
@@ -788,9 +908,6 @@ public void testReadUnCompacted()
}
// 4.Send empty message to delete the key-value in the compacted topic.
- for (ServiceUnitState state : nextStatesToNull(testState0)) {
- producer.newMessage().key(key).value(new ServiceUnitStateData(state, "xx")).send();
- }
producer.newMessage().key(key).value(null).send();
// 5.compact the topic.
@@ -807,7 +924,7 @@ public void testReadUnCompacted()
}
for (int i = 0; i < messages; i++) {
- futures.add(producer.newMessage().key(key).value(testValue0((i + 20 + ""))).sendAsync());
+ futures.add(producer.newMessage().key(key).value(testValue((i + 20 + ""))).sendAsync());
}
FutureUtil.waitForAll(futures).get();
From 870334029119dce41f2a6e986db5ca46d023220f Mon Sep 17 00:00:00 2001
From: Asaf Mesika
Date: Fri, 24 Feb 2023 02:14:42 +0200
Subject: [PATCH 011/404] [improve][doc] Changing subject prefix for PIPs in
the mailing list (#19617)
---
wiki/proposals/PIP.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/wiki/proposals/PIP.md b/wiki/proposals/PIP.md
index afd0eac5fdfd8..d972956df0276 100644
--- a/wiki/proposals/PIP.md
+++ b/wiki/proposals/PIP.md
@@ -80,7 +80,7 @@ The process works in the following way:
1. The author(s) of the proposal will create a GitHub issue ticket choosing the
template for PIP proposals.
2. The author(s) will send a note to the dev@pulsar.apache.org mailing list
- to start the discussion, using subject prefix `[PIP] xxx`. The discussion
+ to start the discussion, using subject prefix `[DISCUSS] PIP-xxx: `. The discussion
need to happen in the mailing list. Please avoid discussing it using
GitHub comments in the PIP GitHub issue, as it creates two tracks
of feedback.
@@ -147,4 +147,4 @@ If there are alternatives that were already considered by the authors or,
after the discussion, by the community, and were rejected, please list them
here along with the reason why they were rejected.
-```
\ No newline at end of file
+```
From 3b075a60f04938fa1a90acc2fd856168ca0cadef Mon Sep 17 00:00:00 2001
From: Asaf Mesika
Date: Fri, 24 Feb 2023 02:15:15 +0200
Subject: [PATCH 012/404] [improve][doc] Clarify where to grab the number for
the PIP (#19610)
---
wiki/proposals/PIP.md | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/wiki/proposals/PIP.md b/wiki/proposals/PIP.md
index d972956df0276..f76c9f0f7a235 100644
--- a/wiki/proposals/PIP.md
+++ b/wiki/proposals/PIP.md
@@ -78,7 +78,9 @@ A PIP proposal can be in these states:
The process works in the following way:
1. The author(s) of the proposal will create a GitHub issue ticket choosing the
- template for PIP proposals.
+ template for PIP proposals. The issue title should be "PIP-xxx: title", where
+ the "xxx" number should be chosen to be the next number from the existing PIP
+ issues, listed [here]([url](https://github.com/apache/pulsar/labels/PIP)).
2. The author(s) will send a note to the dev@pulsar.apache.org mailing list
to start the discussion, using subject prefix `[DISCUSS] PIP-xxx: `. The discussion
need to happen in the mailing list. Please avoid discussing it using
@@ -89,12 +91,12 @@ The process works in the following way:
4. Once some consensus is reached, there will be a vote to formally approve
the proposal.
The vote will be held on the dev@pulsar.apache.org mailing list. Everyone
- is welcome to vote on the proposal, though it will considered to be binding
+ is welcome to vote on the proposal, though it will be considered to be binding
only the vote of PMC members.
I would be required to have a lazy majority of at least 3 binding +1s votes.
The vote should stay open for at least 48 hours.
5. When the vote is closed, if the outcome is positive, the state of the
- proposal is updated and the Pull Requests associated with this proposal can
+ proposal is updated, and the Pull Requests associated with this proposal can
start to get merged into the master branch.
All the Pull Requests that are created, should always reference the
From bf982f4995e624659021191982f7fedc13fc3ba0 Mon Sep 17 00:00:00 2001
From: Neng Lu
Date: Thu, 23 Feb 2023 18:44:09 -0800
Subject: [PATCH 013/404] [improve] configure whether function consumer should
skip to latest (#17214)
---
.../org/apache/pulsar/common/functions/FunctionConfig.java | 2 ++
.../java/org/apache/pulsar/admin/cli/CmdFunctions.java | 7 +++++++
.../pulsar/functions/instance/JavaInstanceRunnable.java | 5 +++++
.../pulsar/functions/source/MultiConsumerPulsarSource.java | 6 ++++++
.../apache/pulsar/functions/source/PulsarSourceConfig.java | 3 ++-
.../functions/source/SingleConsumerPulsarSource.java | 6 ++++++
pulsar-functions/proto/src/main/proto/Function.proto | 1 +
.../apache/pulsar/functions/utils/FunctionConfigUtils.java | 6 ++++++
8 files changed, 35 insertions(+), 1 deletion(-)
diff --git a/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/functions/FunctionConfig.java b/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/functions/FunctionConfig.java
index 0b26e7e93b5f0..e304f25d5d373 100644
--- a/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/functions/FunctionConfig.java
+++ b/pulsar-client-admin-api/src/main/java/org/apache/pulsar/common/functions/FunctionConfig.java
@@ -131,6 +131,8 @@ public enum Runtime {
private Integer maxPendingAsyncRequests;
// Whether the pulsar admin client exposed to function context, default is disabled.
private Boolean exposePulsarAdminClientEnabled;
+ // Whether the consumer should skip to latest position in case of failure recovery
+ private Boolean skipToLatest;
@Builder.Default
private SubscriptionInitialPosition subscriptionPosition = SubscriptionInitialPosition.Latest;
diff --git a/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdFunctions.java b/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdFunctions.java
index bc2585bc67bc9..05bab9c6f198b 100644
--- a/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdFunctions.java
+++ b/pulsar-client-tools/src/main/java/org/apache/pulsar/admin/cli/CmdFunctions.java
@@ -325,6 +325,9 @@ abstract class FunctionDetailsCommand extends BaseCommand {
@Parameter(names = "--subs-position", description = "Pulsar source subscription position if user wants to "
+ "consume messages from the specified location #Java")
protected SubscriptionInitialPosition subsPosition;
+ @Parameter(names = "--skip-to-latest", description = "Whether or not the consumer skip to latest message "
+ + "upon function instance restart", arity = 1)
+ protected Boolean skipToLatest;
@Parameter(names = "--parallelism", description = "The parallelism factor of a Pulsar Function "
+ "(i.e. the number of function instances to run) #Java")
protected Integer parallelism;
@@ -548,6 +551,10 @@ void processArguments() throws Exception {
functionConfig.setSubscriptionPosition(subsPosition);
}
+ if (null != skipToLatest) {
+ functionConfig.setSkipToLatest(skipToLatest);
+ }
+
if (null != userConfigString) {
Type type = new TypeToken