From a9eb28d0a4bef334d7e78a00ea88760f97e83680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20Villalba=C2=A0=20=C2=A0=20=C2=A0=20=C2=A0?= Date: Tue, 17 Mar 2020 15:00:03 -0300 Subject: [PATCH 01/20] Add consumer tier storage capability --- build.gradle | 2 + .../internal/util/HeaderInternalField.java | 8 ++ .../databus/consumer/DatabusConsumer.java | 19 +-- .../databus/consumer/DatabusPushConsumer.java | 68 +++++++++- .../databus/entities/S3TierStorage.java | 124 ++++++++++++++++++ .../opendxl/databus/entities/TierStorage.java | 8 ++ .../databus/serialization/SerdeDatabus.java | 14 +- .../internal/AvroMessageDeserializer.java | 51 ++++++- .../internal/InternalDeserializer.java | 5 + .../internal/LegacyMessageDeserializer.java | 6 + .../internal/MessageDeserializer.java | 15 ++- .../internal/RawMessageDeserializer.java | 6 + .../internal/MessageSerializationTest.java | 2 +- 13 files changed, 309 insertions(+), 19 deletions(-) create mode 100644 src/main/java/com/opendxl/databus/entities/S3TierStorage.java create mode 100644 src/main/java/com/opendxl/databus/entities/TierStorage.java diff --git a/build.gradle b/build.gradle index eca9986..3967732 100644 --- a/build.gradle +++ b/build.gradle @@ -65,6 +65,8 @@ dependencies { implementation 'commons-lang:commons-lang:2.6' implementation 'org.slf4j:slf4j-api:1.7.30' implementation 'net.sf.jopt-simple:jopt-simple:5.0.4' + implementation 'software.amazon.awssdk:bom:2.10.73' + implementation 'com.amazonaws:aws-java-sdk-s3:1.11.731' testImplementation 'org.apache.kafka:kafka_2.12:2.3.1' testImplementation 'org.apache.zookeeper:zookeeper:3.5.5' testImplementation('io.netty:netty-all:4.1.43.Final') { diff --git a/src/main/java/com/opendxl/databus/common/internal/util/HeaderInternalField.java b/src/main/java/com/opendxl/databus/common/internal/util/HeaderInternalField.java index 84bb9fd..4169b12 100644 --- a/src/main/java/com/opendxl/databus/common/internal/util/HeaderInternalField.java +++ b/src/main/java/com/opendxl/databus/common/internal/util/HeaderInternalField.java @@ -10,6 +10,7 @@ */ public final class HeaderInternalField { + private HeaderInternalField() { } @@ -27,4 +28,11 @@ private HeaderInternalField() { * The topic name key name. */ public static final String TOPIC_NAME_KEY = INTERNAL_HEADER_IDENTIFIER + "TN" + INTERNAL_HEADER_IDENTIFIER; + + public static final String TIER_STORAGE_BUCKET_NAME_KEY = INTERNAL_HEADER_IDENTIFIER + "BN" + + INTERNAL_HEADER_IDENTIFIER;; + + public static final String TIER_STORAGE_OBJECT_NAME_KEY = INTERNAL_HEADER_IDENTIFIER + "OB" + + INTERNAL_HEADER_IDENTIFIER;; + } diff --git a/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java b/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java index b2eb98a..2c3b7c2 100644 --- a/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java +++ b/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java @@ -5,6 +5,7 @@ package com.opendxl.databus.consumer; import com.opendxl.databus.credential.Credential; +import com.opendxl.databus.entities.TierStorage; import com.opendxl.databus.exception.DatabusClientRuntimeException; import com.opendxl.databus.producer.DatabusProducer; import com.opendxl.databus.serialization.Deserializer; @@ -71,7 +72,7 @@ public class DatabusConsumer

extends Consumer

{ * @throws DatabusClientRuntimeException if a DatabusConsumer getInstance was not able to be created */ public DatabusConsumer(final Map configs, final Deserializer

messageDeserializer) { - this(configs, messageDeserializer, null); + this(configs, messageDeserializer, null, null); } /** @@ -89,11 +90,11 @@ public DatabusConsumer(final Map configs, final Deserializer

* @throws DatabusClientRuntimeException if a DatabusConsumer getInstance was not able to be created */ public DatabusConsumer(final Map configs, final Deserializer

messageDeserializer, - final Credential credential) { + final Credential credential, final TierStorage tierStorage) { try { Map configuration = configureCredential(configs, credential); configuration = configureClientId(configuration); - setFieldMembers(messageDeserializer, configuration); + setFieldMembers(messageDeserializer, configuration, tierStorage); setConsumer(new KafkaConsumer(configuration, getKeyDeserializer(), getValueDeserializer())); } catch (DatabusClientRuntimeException e) { throw e; @@ -117,7 +118,7 @@ public DatabusConsumer(final Map configs, final Deserializer

* @throws DatabusClientRuntimeException if a DatabusConsumer getInstance was not able to be created */ public DatabusConsumer(final Properties properties, final Deserializer

messageDeserializer) { - this(properties, messageDeserializer, null); + this(properties, messageDeserializer, null, null); } /** @@ -135,11 +136,11 @@ public DatabusConsumer(final Properties properties, final Deserializer

messag * @throws DatabusClientRuntimeException if a DatabusConsumer getInstance was not able to be created */ public DatabusConsumer(final Properties properties, final Deserializer

messageDeserializer, - final Credential credential) { + final Credential credential, final TierStorage tierStorage) { try { Map configuration = configureCredential((Map) properties, credential); configuration = configureClientId(configuration); - setFieldMembers(messageDeserializer, configuration); + setFieldMembers(messageDeserializer, configuration, tierStorage); setConsumer(new KafkaConsumer(configuration, getKeyDeserializer(), getValueDeserializer())); } catch (DatabusClientRuntimeException e) { throw e; @@ -156,14 +157,16 @@ public DatabusConsumer(final Properties properties, final Deserializer

messag * @param configuration The consumer configuration map. * @param messageDeserializer a {@link Deserializer} getInstance implemented by SDK's user. */ - private void setFieldMembers(final Deserializer

messageDeserializer, final Map configuration) { + private void setFieldMembers(final Deserializer

messageDeserializer, + final Map configuration, + final TierStorage tierStorage) { if (messageDeserializer == null) { throw new DatabusClientRuntimeException(DATABUS_CONSUMER_INSTANCE_CANNOT_BE_CREATED_MESSAGE + "Message Deserializer cannot be null" , DatabusConsumer.class); } setKeyDeserializer(new DatabusKeyDeserializer()); - setValueDeserializer(new MessageDeserializer()); + setValueDeserializer(new MessageDeserializer(tierStorage)); setConsumerRecordsAdapter(new ConsumerRecordsAdapter

(messageDeserializer)); setClientId((String) configuration.get(ConsumerConfiguration.CLIENT_ID_CONFIG)); } diff --git a/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java b/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java index 1dd2af1..64183c1 100644 --- a/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java +++ b/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java @@ -6,6 +6,7 @@ import com.opendxl.databus.common.TopicPartition; import com.opendxl.databus.credential.Credential; +import com.opendxl.databus.entities.TierStorage; import com.opendxl.databus.exception.DatabusClientRuntimeException; import com.opendxl.databus.serialization.Deserializer; import org.apache.kafka.common.errors.WakeupException; @@ -100,6 +101,22 @@ public DatabusPushConsumer(final Map configs, this.consumerListener = consumerListener; } + /** + * Constructor + * + * @param configs consumer configuration + * @param messageDeserializer consumer message deserializer + * @param consumerListener consumer listener + * @param tierStorage Tier storage + */ + public DatabusPushConsumer(final Map configs, + final Deserializer

messageDeserializer, + final DatabusPushConsumerListener consumerListener, + final TierStorage tierStorage) { + super(configs, messageDeserializer, null, tierStorage); + this.consumerListener = consumerListener; + } + /** * @param configs consumer configuration * @param messageDeserializer consumer message deserializer @@ -110,7 +127,23 @@ public DatabusPushConsumer(final Map configs, final Deserializer

messageDeserializer, final DatabusPushConsumerListener consumerListener, final Credential credential) { - super(configs, messageDeserializer, credential); + super(configs, messageDeserializer, credential, null); + this.consumerListener = consumerListener; + } + + /** + * @param configs consumer configuration + * @param messageDeserializer consumer message deserializer + * @param consumerListener consumer listener + * @param credential credential to get access to Databus in case security is enabled + * @param tierStorage Tier storage + */ + public DatabusPushConsumer(final Map configs, + final Deserializer

messageDeserializer, + final DatabusPushConsumerListener consumerListener, + final Credential credential, + final TierStorage tierStorage) { + super(configs, messageDeserializer, credential, tierStorage); this.consumerListener = consumerListener; } @@ -127,6 +160,21 @@ public DatabusPushConsumer(final Properties properties, } + /** + * @param properties consumer configuration + * @param messageDeserializer consumer message deserializer + * @param consumerListener consumer listener + * @param tierStorage Tier storage + */ + public DatabusPushConsumer(final Properties properties, + final Deserializer

messageDeserializer, + final DatabusPushConsumerListener consumerListener, + final TierStorage tierStorage) { + super(properties, messageDeserializer, null, tierStorage); + this.consumerListener = consumerListener; + + } + /** * @param properties consumer configuration * @param messageDeserializer consumer message deserializer @@ -137,11 +185,25 @@ public DatabusPushConsumer(final Properties properties, final Deserializer

messageDeserializer, final DatabusPushConsumerListener consumerListener, final Credential credential) { - super(properties, messageDeserializer, credential); + super(properties, messageDeserializer, credential, null); this.consumerListener = consumerListener; } - + /** + * @param properties consumer configuration + * @param messageDeserializer consumer message deserializer + * @param consumerListener consumer listener + * @param credential credential to get access to Databus in case security is enabled + * @param tierStorage Tier storage + */ + public DatabusPushConsumer(final Properties properties, + final Deserializer

messageDeserializer, + final DatabusPushConsumerListener consumerListener, + final Credential credential, + final TierStorage tierStorage) { + super(properties, messageDeserializer, credential, tierStorage); + this.consumerListener = consumerListener; + } /** * {@inheritDoc} */ diff --git a/src/main/java/com/opendxl/databus/entities/S3TierStorage.java b/src/main/java/com/opendxl/databus/entities/S3TierStorage.java new file mode 100644 index 0000000..a99bdf6 --- /dev/null +++ b/src/main/java/com/opendxl/databus/entities/S3TierStorage.java @@ -0,0 +1,124 @@ +package com.opendxl.databus.entities; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.InstanceProfileCredentialsProvider; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.internal.Mimetypes; +import com.amazonaws.services.s3.model.GetObjectRequest; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.PutObjectResult; +import com.amazonaws.services.s3.model.S3Object; +import com.opendxl.databus.exception.DatabusClientRuntimeException; +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; + +public class S3TierStorage implements TierStorage { + + /** + * The logger object. + */ + private static final Logger LOG = LoggerFactory.getLogger(S3TierStorage.class); + + /** + * S3 client + */ + private AmazonS3 s3Client; + + public S3TierStorage(final String awsRegion, + final ClientConfiguration config) { + + AmazonS3ClientBuilder s3Builder = AmazonS3ClientBuilder.standard(); + s3Builder.withCredentials(new InstanceProfileCredentialsProvider(false)); + s3Builder.withRegion(awsRegion); + if (config != null) { + s3Builder.withClientConfiguration(config); + } + try { + this.s3Client = s3Builder.build(); + } catch (Exception e) { + e.printStackTrace(); + + } + } + + public S3TierStorage(final String awsAccessKey, + final String awsSecretKey, + final String awsRegion, + final ClientConfiguration config) { + + + AmazonS3ClientBuilder s3Builder = AmazonS3ClientBuilder.standard(); + s3Builder.withCredentials( + new AWSStaticCredentialsProvider( + new BasicAWSCredentials(awsAccessKey, awsSecretKey))); + s3Builder.withRegion(awsRegion); + if (config != null) { + s3Builder.withClientConfiguration(config); + } + + try { + this.s3Client = s3Builder.build(); + } catch (Exception e) { + e.printStackTrace(); + } + + + } + + @Override + public void put(final String s3BucketName, + final String s3KeyName, + final byte[] payload) { + + try { + if (!s3Client.doesBucketExistV2(s3BucketName)) { + s3Client.createBucket(s3BucketName); + } + + ObjectMetadata metadata = new ObjectMetadata(); + metadata.setContentLength(payload.length); + metadata.setContentType(Mimetypes.MIMETYPE_HTML); + InputStream s3Object = new ByteArrayInputStream(payload); + PutObjectResult putObjectResult = s3Client.putObject(s3BucketName, s3KeyName, s3Object, metadata); + + } catch (Exception e) { + final String errMsg = "Error uploading S3 object: Bucket: " + " Object: " + + s3KeyName + " " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); + } + + } + + + public boolean doesObjectExist(String s3BucketName, String s3KeyName) { + try { + return s3Client.doesObjectExist(s3BucketName, s3KeyName); + } catch (Exception e) { + final String errMsg = "Error trying to reach S3 object: Bucket: " + " Object: " + s3KeyName + " " + + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); + } + } + + @Override + public byte[] get(String s3BucketName, String s3KeyName) { + try { + S3Object s3Object = s3Client.getObject(new GetObjectRequest(s3BucketName, s3KeyName)); + return IOUtils.toByteArray(s3Object.getObjectContent()); + } catch (Exception e) { + final String errMsg = "Error reading S3 object: Bucket: " + " Object: " + s3KeyName + " " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); + } + } + +} diff --git a/src/main/java/com/opendxl/databus/entities/TierStorage.java b/src/main/java/com/opendxl/databus/entities/TierStorage.java new file mode 100644 index 0000000..620ffb0 --- /dev/null +++ b/src/main/java/com/opendxl/databus/entities/TierStorage.java @@ -0,0 +1,8 @@ +package com.opendxl.databus.entities; + +public interface TierStorage { + void put(String bucketName, String objectName, byte[] payload); + byte[] get(String bucketName, String objectName); + boolean doesObjectExist(String bucketName, String objectName); +} + diff --git a/src/main/java/com/opendxl/databus/serialization/SerdeDatabus.java b/src/main/java/com/opendxl/databus/serialization/SerdeDatabus.java index a9393ff..6629124 100644 --- a/src/main/java/com/opendxl/databus/serialization/SerdeDatabus.java +++ b/src/main/java/com/opendxl/databus/serialization/SerdeDatabus.java @@ -4,6 +4,7 @@ package com.opendxl.databus.serialization; +import com.opendxl.databus.entities.TierStorage; import com.opendxl.databus.entities.internal.DatabusMessage; import com.opendxl.databus.serialization.internal.MessageDeserializer; import com.opendxl.databus.serialization.internal.MessageSerializer; @@ -17,6 +18,17 @@ */ public class SerdeDatabus implements Serde { + private final TierStorage tierStorage; + + public SerdeDatabus(final TierStorage tierStorage) { + + this.tierStorage = tierStorage; + } + + public SerdeDatabus() { + this(null); + } + /** * Not implemented. */ @@ -50,6 +62,6 @@ public Serializer serializer() { */ @Override public Deserializer deserializer() { - return new MessageDeserializer(); + return new MessageDeserializer(tierStorage); } } diff --git a/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java index 3707b51..9de0383 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java @@ -5,9 +5,11 @@ package com.opendxl.databus.serialization.internal; +import com.opendxl.databus.common.internal.util.HeaderInternalField; import com.opendxl.databus.consumer.ConsumerRecord; import com.opendxl.databus.consumer.DatabusConsumer; import com.opendxl.databus.entities.Headers; +import com.opendxl.databus.entities.TierStorage; import com.opendxl.databus.exception.DatabusClientRuntimeException; import com.opendxl.databus.common.internal.adapter.HeadersAvroDeserializedAdapter; import com.opendxl.databus.common.internal.adapter.PayloadHeadersAvroDeserializedAdapter; @@ -17,6 +19,8 @@ import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumReader; import org.apache.avro.io.DecoderFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Avro Message Deserializer @@ -25,6 +29,11 @@ */ public final class AvroMessageDeserializer implements InternalDeserializer { + /** + * The logger object. + */ + private static final Logger LOG = LoggerFactory.getLogger(AvroMessageDeserializer.class); + /** * The schema to define the message. */ @@ -52,24 +61,56 @@ public AvroMessageDeserializer(final Schema schema) { */ @Override public DatabusMessage deserialize(final String topic, final byte[] data) { + return this.deserialize(topic, data, null); + } + + @Override + public DatabusMessage deserialize(String topic, byte[] data, TierStorage tierStorage) { try { - final GenericRecord avroRecord = reader.read(null, DecoderFactory.get().binaryDecoder(data, null)); + GenericRecord avroRecord = reader.read(null, DecoderFactory.get().binaryDecoder(data, null)); - final Headers headers = + Headers headers = new HeadersAvroDeserializedAdapter() .adapt(avroRecord.get("headers")); - final byte[] payload = + byte[] payload = new PayloadHeadersAvroDeserializedAdapter() .adapt(avroRecord.get("payload")); + + // Tier Storage Section + if (tierStorage != null) { + final String bucketName = headers.get(HeaderInternalField.TIER_STORAGE_BUCKET_NAME_KEY); + final String objectName = headers.get(HeaderInternalField.TIER_STORAGE_OBJECT_NAME_KEY); + if (bucketName != null && objectName != null) { + byte[] object = null; + try { + object = tierStorage.get(bucketName, objectName); + } catch (Exception e) { + LOG.error("Error when reading message from Tier Storage. Bucket Name: " + + bucketName + "Object Name: " + + objectName, e); + } + + if (object != null || object.length > 0) { + avroRecord = reader.read(null, DecoderFactory.get().binaryDecoder(object, null)); + headers = new HeadersAvroDeserializedAdapter().adapt(avroRecord.get("headers")); + payload = new PayloadHeadersAvroDeserializedAdapter().adapt(avroRecord.get("payload")); + } else { + LOG.warn("Object content reading from Tier Storage is null or empty. Bucket: " + bucketName + + " Object: " + objectName); + } + } + } + final DatabusMessage message = new DatabusMessage(headers, payload); return message; } catch (Exception e) { - throw new DatabusClientRuntimeException("Error deserializing Avro schema:" + schema.toString(true), - e, AvroMessageDeserializer.class); + final String errMsg = "Error deserializing Avro schema:" + schema.toString(true); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, AvroMessageDeserializer.class); } } } diff --git a/src/main/java/com/opendxl/databus/serialization/internal/InternalDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/InternalDeserializer.java index 3215ebb..ea02265 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/InternalDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/InternalDeserializer.java @@ -4,6 +4,8 @@ package com.opendxl.databus.serialization.internal; +import com.opendxl.databus.entities.TierStorage; + /** * Internal Deserializer * Used by SDK to deserialize an object of P type, @@ -22,4 +24,7 @@ public interface InternalDeserializer

{ */ P deserialize(String topic, byte[] data); + P deserialize(String topic, byte[] data, TierStorage tierStorage); + + } diff --git a/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java index 8105ab7..db85664 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java @@ -6,6 +6,7 @@ import com.google.gson.Gson; +import com.opendxl.databus.entities.TierStorage; import com.opendxl.databus.exception.DatabusClientRuntimeException; import com.opendxl.databus.common.internal.builder.TopicNameBuilder; import com.opendxl.databus.common.internal.util.HeaderInternalField; @@ -47,6 +48,11 @@ public DatabusMessage deserialize(final String topic, final byte[] data) { } + @Override + public DatabusMessage deserialize(String topic, byte[] data, TierStorage tierStorage) { + return deserialize(topic, data); + } + /** * This class is filled by GSON framework, based on a JSON as byte[], * then, it can create a {@link DatabusMessage} getInstance diff --git a/src/main/java/com/opendxl/databus/serialization/internal/MessageDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/MessageDeserializer.java index c532071..dcca9e4 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/MessageDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/MessageDeserializer.java @@ -4,6 +4,7 @@ package com.opendxl.databus.serialization.internal; +import com.opendxl.databus.entities.TierStorage; import com.opendxl.databus.entities.internal.DatabusMessage; import java.util.Map; @@ -13,6 +14,18 @@ */ public final class MessageDeserializer implements org.apache.kafka.common.serialization.Deserializer { + + private TierStorage tierStorage; + + public MessageDeserializer(final TierStorage tierStorage) { + this.tierStorage = tierStorage; + } + + public MessageDeserializer() { + this(null); + } + + /** * Not implemented. */ @@ -33,7 +46,7 @@ public DatabusMessage deserialize(final String topic, final byte[] serializedMes final MessageStructure messageStructure = MessageStructureFactory.getStructure(serializedMessage); final Integer version = messageStructure.getVersion(); final InternalDeserializer deserializer = DeserializerRegistry.getDeserializer(version); - return deserializer.deserialize(topic, messageStructure.getPayload()); + return deserializer.deserialize(topic, messageStructure.getPayload(), tierStorage); } diff --git a/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java index a2801d2..964d5fa 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java @@ -4,6 +4,7 @@ package com.opendxl.databus.serialization.internal; +import com.opendxl.databus.entities.TierStorage; import com.opendxl.databus.entities.internal.DatabusMessage; /** @@ -23,4 +24,9 @@ public final class RawMessageDeserializer implements InternalDeserializer Date: Thu, 19 Mar 2020 14:53:07 -0300 Subject: [PATCH 02/20] Add producer and consumer with tier storage capability. Add S3 Mock for UT purpose. Addd UT --- build.gradle | 6 + response.txt | 137 ++++++++++ ...cS3TierStorageConsumerProducerExample.java | 250 ++++++++++++++++++ .../cli/operation/ProduceOperation.java | 3 +- .../adapter/DatabusProducerRecordAdapter.java | 17 +- .../adapter/MessagePayloadAdapter.java | 24 +- .../databus/consumer/DatabusConsumer.java | 25 ++ .../opendxl/databus/entities/RoutingData.java | 29 +- .../databus/entities/TierStorageMetadata.java | 22 ++ .../databus/producer/DatabusProducer.java | 8 +- .../producer/DatabusTierStorageProducer.java | 229 ++++++++++++++++ .../opendxl/databus/producer/Producer.java | 70 +++-- .../internal/AvroMessageDeserializer.java | 14 +- .../databus/entities/S3TierStorageTest.java | 102 +++++++ 14 files changed, 877 insertions(+), 59 deletions(-) create mode 100644 response.txt create mode 100644 sample/src/sample/BasicS3TierStorageConsumerProducerExample.java create mode 100644 src/main/java/com/opendxl/databus/entities/TierStorageMetadata.java create mode 100644 src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java create mode 100644 src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java diff --git a/build.gradle b/build.gradle index 3967732..41e12a9 100644 --- a/build.gradle +++ b/build.gradle @@ -75,6 +75,12 @@ dependencies { testImplementation 'commons-io:commons-io:2.6' testImplementation 'junit:junit:4.12' testImplementation 'com.github.stefanbirkner:system-rules:1.19.0' + //testImplementation 'com.adobe.testing:s3mock:2.1.19' + // https://mvnrepository.com/artifact/io.findify/s3mock + testImplementation 'io.findify:s3mock_2.12:0.2.5' + // https://mvnrepository.com/artifact/com.e-movimento.tinytools/privilegedaccessor + testImplementation 'com.e-movimento.tinytools:privilegedaccessor:1.2.2' + kafkaInMemory 'org.apache.kafka:kafka_2.12:2.3.1' kafkaInMemory 'commons-io:commons-io:2.6' diff --git a/response.txt b/response.txt new file mode 100644 index 0000000..f3a67ce --- /dev/null +++ b/response.txt @@ -0,0 +1,137 @@ +HTTP/1.0 200 Connection established + +HTTP/1.1 403 cnmismatch +Connection: Keep-Alive +Content-Type: text/html +Cache-Control: no-cache +Content-Length: 3646 +X-Frame-Options: deny + + + + + + + + + McAfee Web Gateway - Notification + + + + + + + + + + +
+ + + + + +
+ + +
+ + + + + + + + +
+ Common Name Mismatch. +
+ + + + + + + +
+ The certificate verification failed due to a common name mismatch. +
+ + + + + + + +
+ Host: e1581825-govprodsupportgat-28a7-27882486.us-gov-west-1.elb.amazonaws.com
+ Common name: ui.soc.mcafee-gov.com
+ Alternative subject names: regex(ui\.soc\.mcafee-gov\.com), regex(api\.soc\.mcafee-gov\.com)
+
+ + + + + + + + + + + + + + +
+ For assistance, please contact the InfoSec team by submitting this form. +
+ + + + +
+ + CA Certificate installed. +
+ + + + + generated 2020-03-18 17:14:28 by MWG: us-dal-mwg6, IP 10.52.64.203, ProxyPort: 9090 +Client IP: 10.52.241.166, RuleName: Block Incident, User: ,AuthProto: +
+ User-Agent curl/7.54.0 +
+ +
+ + + diff --git a/sample/src/sample/BasicS3TierStorageConsumerProducerExample.java b/sample/src/sample/BasicS3TierStorageConsumerProducerExample.java new file mode 100644 index 0000000..f68eae9 --- /dev/null +++ b/sample/src/sample/BasicS3TierStorageConsumerProducerExample.java @@ -0,0 +1,250 @@ +/*---------------------------------------------------------------------------* + * Copyright (c) 2019 McAfee, LLC - All Rights Reserved. * + *---------------------------------------------------------------------------*/ + +package sample; + +import broker.ClusterHelper; +import com.amazonaws.ClientConfiguration; +import com.opendxl.databus.common.RecordMetadata; +import com.opendxl.databus.common.internal.builder.TopicNameBuilder; +import com.opendxl.databus.consumer.*; +import com.opendxl.databus.entities.*; +import com.opendxl.databus.producer.*; +import com.opendxl.databus.serialization.ByteArrayDeserializer; +import com.opendxl.databus.serialization.ByteArraySerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.charset.Charset; +import java.time.LocalDateTime; +import java.util.*; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + + +public class BasicS3TierStorageConsumerProducerExample { + + private final Producer producer; + private final ExecutorService executor; + private final TierStorage tierStorage; + private Consumer consumer; + private String producerTopic = "topic1"; + private String consumerTopic = "topic1"; + + private static final long PRODUCER_TIME_CADENCE_MS = 1000L; + private static final long CONSUMER_TIME_CADENCE_MS = 1000L; + private final AtomicBoolean closed = new AtomicBoolean(false); + + private static Logger LOG = LoggerFactory.getLogger(BasicS3TierStorageConsumerProducerExample.class); + + public BasicS3TierStorageConsumerProducerExample() { + + // Start Kafka cluster + ClusterHelper + .getInstance() + .addBroker(9092) + .zookeeperPort(2181) + .start(); + + + ClientConfiguration awsClientConfiguration = new ClientConfiguration(); + + this.tierStorage = new S3TierStorage("", + "", + "", awsClientConfiguration); + + // Prepare a Producer + this.producer = getProducer(); + + // Prepare a Consumer + this.consumer = getConsumer(); + + // Subscribe to topic + this.consumer.subscribe(Collections.singletonList(consumerTopic)); + + this.executor = Executors.newFixedThreadPool(2); + + } + + public Producer getProducer() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString()); + config.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, true); + return new DatabusTierStorageProducer(config, new ByteArraySerializer(), null); + } + + public Consumer getConsumer() { + final Properties consumerProps = new Properties(); + consumerProps.put(ConsumerConfiguration.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + consumerProps.put(ConsumerConfiguration.GROUP_ID_CONFIG, "cg1"); + consumerProps.put(ConsumerConfiguration.ENABLE_AUTO_COMMIT_CONFIG, "true"); + consumerProps.put(ConsumerConfiguration.SESSION_TIMEOUT_MS_CONFIG, "30000"); + consumerProps.put(ConsumerConfiguration.CLIENT_ID_CONFIG, "consumer-id-sample"); + return new DatabusConsumer(consumerProps, new ByteArrayDeserializer(), tierStorage); + } + + private Runnable getProducerTask() { + return () -> { + LOG.info("Producer started"); + while (!closed.get()) { + + // Prepare a record + final String message = "Hello World at "+ LocalDateTime.now(); + + // user should provide the encoding + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final ProducerRecord producerRecord = getProducerRecord(producerTopic, payload); + + // Send the record + // Get headers as String + final StringBuilder headers = new StringBuilder().append("["); + producerRecord.getHeaders().getAll().forEach((k, v) -> headers.append("[" + k + ":" + v + "]")); + headers.append("]"); + + producer.send(producerRecord, new MyCallback(producerRecord.getRoutingData().getShardingKey())); + LOG.info("[PRODUCER -> KAFKA][SENDING MSG] ID " + producerRecord.getRoutingData().getShardingKey() + + " TOPIC:" + TopicNameBuilder.getTopicName(producerTopic, null) + + " HEADERS:" + headers + + " PAYLOAD:" + message); + + justWait(PRODUCER_TIME_CADENCE_MS); + } + producer.flush(); + producer.close(); + LOG.info("Producer closed"); + + }; + } + + private Runnable getConsumerTask() { + return () -> { + try { + LOG.info("Consumer started"); + while (!closed.get()) { + + // Polling the databus + final ConsumerRecords records = consumer.poll(CONSUMER_TIME_CADENCE_MS); + + // Iterate records + for (ConsumerRecord record : records) { + + // Get headers as String + final StringBuilder headers = new StringBuilder().append("["); + record.getHeaders().getAll().forEach((k, v) -> headers.append("[" + k + ":" + v + "]")); + headers.append("]"); + + LOG.info("[CONSUMER <- KAFKA][MSG RCEIVED] ID " + record.getKey() + + " TOPIC:" + record.getComposedTopic() + + " KEY:" + record.getKey() + + " PARTITION:" + record.getPartition() + + " OFFSET:" + record.getOffset() + + " TIMESTAMP:" + record.getTimestamp() + + " HEADERS:" + headers + + " PAYLOAD:" + new String(record.getMessagePayload().getPayload())); + } + consumer.commitAsync(); + } + } catch (Exception e) { + LOG.error(e.getMessage()); + } finally { + consumer.unsubscribe(); + try { + consumer.close(); + } catch (IOException e) { + LOG.error(e.getMessage()); + } + LOG.info("Consumer closed"); + + } + + }; + } + + public ProducerRecord getProducerRecord(final String topic, final byte[] payload) { + String key = String.valueOf(System.currentTimeMillis()); + TierStorageMetadata tStorageMetadata = new TierStorageMetadata("databus-poc-test", topic + key); + RoutingData routingData = new RoutingData(topic, key, null, tStorageMetadata); + + Headers headers = new Headers(); + headers.put("k","v"); + + MessagePayload messagePayload = new MessagePayload<>(payload); + return new ProducerRecord<>(routingData, headers, messagePayload); + } + + private void justWait(long time) { + try { + Thread.sleep(time); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + + private static class MyCallback implements Callback { + + private String shardingKey; + + public MyCallback(String shardingKey) { + + this.shardingKey = shardingKey; + } + + public void onCompletion(RecordMetadata metadata, Exception exception) { + if (exception != null) { + LOG.warn("Error sending a record " + exception.getMessage()); + return; + } + LOG.info("[PRODUCER <- KAFKA][OK MSG SENT] ID " + shardingKey + + " TOPIC:" + metadata.topic() + + " PARTITION:" + metadata.partition() + + " OFFSET:" + metadata.offset()); + } + } + + synchronized private void stopExample(final ExecutorService executor) { + try { + closed.set(true); + consumer.wakeup(); + ClusterHelper.getInstance().stop(); + executor.shutdown(); + executor.awaitTermination(5, TimeUnit.SECONDS); + } catch (InterruptedException e) { + } finally { + executor.shutdownNow(); + } + } + + public void startExample() throws InterruptedException { + + Runnable consumerTask = getConsumerTask(); + Runnable producerTask = getProducerTask(); + + executor.submit(consumerTask); + executor.submit(producerTask); + + Runtime.getRuntime().addShutdownHook( + new Thread( + new Runnable() { + public void run() { + stopExample(executor); + LOG.info("Example finished"); + } + })); + + } + + + public static void main(String[] args) throws InterruptedException { + LOG.info("Ctrl-C to finish"); + new BasicS3TierStorageConsumerProducerExample().startExample(); + } + +} diff --git a/src/main/java/com/opendxl/databus/cli/operation/ProduceOperation.java b/src/main/java/com/opendxl/databus/cli/operation/ProduceOperation.java index cf08bb3..6b90a05 100644 --- a/src/main/java/com/opendxl/databus/cli/operation/ProduceOperation.java +++ b/src/main/java/com/opendxl/databus/cli/operation/ProduceOperation.java @@ -196,7 +196,8 @@ private RoutingData getRoutingData(final String topic, final String shardingKey, final String partition) { RoutingData routingData; if (!partition.isEmpty()) { - routingData = new RoutingData(topic, shardingKey, tenantGroup, Integer.parseInt(partition)); + routingData = new RoutingData(topic, shardingKey, tenantGroup, Integer.parseInt(partition), + null); } else { routingData = new RoutingData(topic, shardingKey, tenantGroup); } diff --git a/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java b/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java index bc9067e..9b99273 100644 --- a/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java +++ b/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java @@ -7,6 +7,7 @@ import com.opendxl.databus.common.internal.builder.TopicNameBuilder; import com.opendxl.databus.common.internal.util.HeaderInternalField; import com.opendxl.databus.entities.Headers; +import com.opendxl.databus.entities.TierStorageMetadata; import com.opendxl.databus.entities.internal.DatabusMessage; import com.opendxl.databus.producer.ProducerRecord; import com.opendxl.databus.serialization.Serializer; @@ -19,7 +20,7 @@ * @param

payload's type */ public final class DatabusProducerRecordAdapter

- implements Adapter, org.apache.kafka.clients.producer.ProducerRecord> { /** @@ -58,9 +59,19 @@ public DatabusProducerRecordAdapter(final Serializer

messageSerializer) { sourceProducerRecord.getRoutingData().getTopic()); } + // Add internal headers to let consumer knows the payload is tiered storage + TierStorageMetadata tierStorageMetadata = sourceProducerRecord.getRoutingData().getTierStorageMetadata(); + if (tierStorageMetadata != null + && tierStorageMetadata.getBucketName() != null && !tierStorageMetadata.getBucketName().isEmpty() + && tierStorageMetadata.getObjectName() != null && !tierStorageMetadata.getObjectName().isEmpty() + ) { + clonedHeaders.put(HeaderInternalField.TIER_STORAGE_BUCKET_NAME_KEY, tierStorageMetadata.getBucketName()); + clonedHeaders.put(HeaderInternalField.TIER_STORAGE_OBJECT_NAME_KEY, tierStorageMetadata.getObjectName()); + } + final DatabusMessage databusMessage = - new MessagePayloadAdapter(messageSerializer, clonedHeaders) - .adapt(sourceProducerRecord.payload()); + new MessagePayloadAdapter

(messageSerializer) + .adapt(sourceProducerRecord.payload(), clonedHeaders); final String targetTopic = TopicNameBuilder.getTopicName(sourceProducerRecord.getRoutingData().getTopic(), diff --git a/src/main/java/com/opendxl/databus/common/internal/adapter/MessagePayloadAdapter.java b/src/main/java/com/opendxl/databus/common/internal/adapter/MessagePayloadAdapter.java index 234b850..85239c8 100644 --- a/src/main/java/com/opendxl/databus/common/internal/adapter/MessagePayloadAdapter.java +++ b/src/main/java/com/opendxl/databus/common/internal/adapter/MessagePayloadAdapter.java @@ -16,30 +16,22 @@ * * @param

payload's type */ -public final class MessagePayloadAdapter

implements Adapter, DatabusMessage> { +public final class MessagePayloadAdapter

{ /** * The message deserializer. */ - private final Serializer

messageSerializer; - - /** - * The headers map. - */ - private final Headers headers; + private final Serializer

userSerializer; /** * Constructor * - * @param messageSerializer A {@link Serializer} instance + * @param userSerializer A {@link Serializer} instance * or creating a {@link DatabusMessage}. - * @param headers Headers map. */ - public MessagePayloadAdapter(final Serializer

messageSerializer, - final Headers headers) { + public MessagePayloadAdapter(final Serializer

userSerializer) { - this.messageSerializer = messageSerializer; - this.headers = headers; + this.userSerializer = userSerializer; } /** @@ -48,10 +40,10 @@ public MessagePayloadAdapter(final Serializer

messageSerializer, * * @param messagePayload a {@link MessagePayload} instance to be adapted. * @return a {@link DatabusMessage} instance. + * @param headers headers */ - @Override - public DatabusMessage adapt(final MessagePayload

messagePayload) { - final byte[] payload = messageSerializer.serialize(messagePayload.getPayload()); + public DatabusMessage adapt(final MessagePayload

messagePayload, final Headers headers) { + final byte[] payload = userSerializer.serialize(messagePayload.getPayload()); return new DatabusMessage(headers, payload); } diff --git a/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java b/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java index 2c3b7c2..b959753 100644 --- a/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java +++ b/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java @@ -75,6 +75,23 @@ public DatabusConsumer(final Map configs, final Deserializer

this(configs, messageDeserializer, null, null); } + /** + * A consumer is instantiated by providing a set of key-value pairs as configuration. Valid configuration strings + * are documented here. Values can be + * either strings or objects of the appropriate type (for example a numeric configuration would accept either the + * string "42" or the integer 42). + *

+ * Valid configuration strings are documented at {@link org.apache.kafka.clients.consumer.ConsumerConfig} + * + * @param configs The consumer configs + * @param messageDeserializer a {@link Deserializer} getInstance implementd by SDK's user + * @throws DatabusClientRuntimeException if a DatabusConsumer getInstance was not able to be created + * @param tierStorage Tier Storage + */ + public DatabusConsumer(final Map configs, final Deserializer

messageDeserializer, + final TierStorage tierStorage) { + this(configs, messageDeserializer, null, tierStorage); + } /** * A consumer is instantiated by providing a set of key-value pairs as configuration. Valid configuration strings * are documented here. Values can be @@ -86,6 +103,7 @@ public DatabusConsumer(final Map configs, final Deserializer

* @param configs The consumer configs * @param messageDeserializer a {@link Deserializer} getInstance implementd by SDK's user * @param credential identity to authenticate/authorization + * @param tierStorage Tier Storage * * @throws DatabusClientRuntimeException if a DatabusConsumer getInstance was not able to be created */ @@ -121,6 +139,11 @@ public DatabusConsumer(final Properties properties, final Deserializer

messag this(properties, messageDeserializer, null, null); } + public DatabusConsumer(final Properties properties, final Deserializer

messageDeserializer, + final TierStorage tierStorage) { + this(properties, messageDeserializer, null, tierStorage); + } + /** * A consumer is instantiated by providing a {@link Properties} object as configuration. Valid * configuration strings are documented at {@link org.apache.kafka.clients.consumer.ConsumerConfig} @@ -132,6 +155,7 @@ public DatabusConsumer(final Properties properties, final Deserializer

messag * @param properties The consumer configuration properties * @param messageDeserializer a {@link Deserializer} getInstance implementd by SDK's user * @param credential identity to authenticate/authorization + * @param tierStorage Tier Storage * * @throws DatabusClientRuntimeException if a DatabusConsumer getInstance was not able to be created */ @@ -140,6 +164,7 @@ public DatabusConsumer(final Properties properties, final Deserializer

messag try { Map configuration = configureCredential((Map) properties, credential); configuration = configureClientId(configuration); + configuration.put(ConsumerConfiguration.ISOLATION_LEVEL_CONFIG, "read_committed"); setFieldMembers(messageDeserializer, configuration, tierStorage); setConsumer(new KafkaConsumer(configuration, getKeyDeserializer(), getValueDeserializer())); } catch (DatabusClientRuntimeException e) { diff --git a/src/main/java/com/opendxl/databus/entities/RoutingData.java b/src/main/java/com/opendxl/databus/entities/RoutingData.java index b08d720..76ed3b1 100644 --- a/src/main/java/com/opendxl/databus/entities/RoutingData.java +++ b/src/main/java/com/opendxl/databus/entities/RoutingData.java @@ -49,15 +49,23 @@ public class RoutingData { */ private String tenantGroup = DEFAULT_TENANT_GROUP; + /** + * Tier Storage Metadata + */ + private TierStorageMetadata tierStorageMetadata; + /** * RoutingData constructor with only topic name parameter * * @param topic The topic name where the message must be sent */ public RoutingData(final String topic) { - this(topic, null, null, null); + this(topic, null, null, null, null); } + public RoutingData(final String topic, final TierStorageMetadata tierStorageMetadata) { + this(topic, null, null, null, tierStorageMetadata); + } /** * RoutingData constructor with topic name sharding key and tenant group parameters * @@ -66,9 +74,15 @@ public RoutingData(final String topic) { * @param tenantGroup The name that groups topics */ public RoutingData(final String topic, final String shardingKey, final String tenantGroup) { - this(topic, shardingKey, tenantGroup, null); + this(topic, shardingKey, tenantGroup, null, null); } + public RoutingData(final String topic, + final String shardingKey, + final String tenantGroup, + final TierStorageMetadata tierStorageMetadata) { + this(topic, shardingKey, tenantGroup, null, tierStorageMetadata); + } /** * RoutingData constructor with all parameters * @@ -76,10 +90,11 @@ public RoutingData(final String topic, final String shardingKey, final String te * @param shardingKey The Databus sharding key * @param tenantGroup The name that groups topics * @param partition The partition number + * @param tierStorageMetadata Tier Storage Metadata + * */ public RoutingData(final String topic, final String shardingKey, final String tenantGroup, - final Integer partition) { - + final Integer partition, final TierStorageMetadata tierStorageMetadata) { if (StringUtils.isBlank(topic)) { throw new DatabusClientRuntimeException("topic cannot be empty or null", RoutingData.class); } @@ -87,6 +102,7 @@ public RoutingData(final String topic, final String shardingKey, final String te this.tenantGroup = Optional.ofNullable(tenantGroup).orElse("").trim(); this.shardingKey = shardingKey; this.partition = partition; + this.tierStorageMetadata = tierStorageMetadata; } /** @@ -124,4 +140,9 @@ public String getTenantGroup() { public Integer getPartition() { return partition; } + + public TierStorageMetadata getTierStorageMetadata() { + return tierStorageMetadata; + } + } diff --git a/src/main/java/com/opendxl/databus/entities/TierStorageMetadata.java b/src/main/java/com/opendxl/databus/entities/TierStorageMetadata.java new file mode 100644 index 0000000..2b76455 --- /dev/null +++ b/src/main/java/com/opendxl/databus/entities/TierStorageMetadata.java @@ -0,0 +1,22 @@ +package com.opendxl.databus.entities; + +public class TierStorageMetadata { + + private final String bucketName; + private final String objectName; + + public TierStorageMetadata(final String bucketName, final String objectName) { + this.bucketName = bucketName.trim(); + this.objectName = objectName.trim(); + } + + public String getBucketName() { + return bucketName; + } + + public String getObjectName() { + return objectName; + } + + +} diff --git a/src/main/java/com/opendxl/databus/producer/DatabusProducer.java b/src/main/java/com/opendxl/databus/producer/DatabusProducer.java index 95cff21..7c0549b 100644 --- a/src/main/java/com/opendxl/databus/producer/DatabusProducer.java +++ b/src/main/java/com/opendxl/databus/producer/DatabusProducer.java @@ -128,7 +128,7 @@ public DatabusProducer(final Map configs, final Serializer

me setFieldMembers(messageSerializer); this.setConfiguration(overrideConfig(configs)); this.configureCredential(getConfiguration(), credential); - setProducer(new KafkaProducer(this.getConfiguration(), getKeySerializer(), getValueSerializer())); + setProducer(new KafkaProducer(this.getConfiguration(), getKeySerializer(), getKafkaValueSerializer())); setClientId((String) configs.get(ProducerConfig.CLIENT_ID_CONFIG)); } catch (DatabusClientRuntimeException e) { throw e; @@ -176,7 +176,7 @@ public DatabusProducer(final Properties properties, final Serializer

messageS Properties fixedProperties = overrideConfig(properties); this.setConfiguration((Map) fixedProperties); this.configureCredential(getConfiguration(), credential); - setProducer(new KafkaProducer(this.getConfiguration(), getKeySerializer(), getValueSerializer())); + setProducer(new KafkaProducer(this.getConfiguration(), getKeySerializer(), getKafkaValueSerializer())); setClientId((String) fixedProperties.get(ProducerConfig.CLIENT_ID_CONFIG)); } catch (DatabusClientRuntimeException e) { throw e; @@ -197,8 +197,8 @@ private void setFieldMembers(final Serializer

messageSerializer) { + "Message Serializer cannot be null" , DatabusProducer.class); } - setKeySerializer(new DatabusKeySerializer()); - setValueSerializer(new MessageSerializer()); + setKafkaKeySerializer(new DatabusKeySerializer()); + setKafkaValueSerializer(new MessageSerializer()); setDatabusProducerRecordAdapter(new DatabusProducerRecordAdapter

(messageSerializer)); } diff --git a/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java b/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java new file mode 100644 index 0000000..7a3ee8a --- /dev/null +++ b/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java @@ -0,0 +1,229 @@ +/*---------------------------------------------------------------------------* + * Copyright (c) 2019 McAfee, LLC - All Rights Reserved. * + *---------------------------------------------------------------------------*/ + +package com.opendxl.databus.producer; + +import com.opendxl.databus.common.RecordMetadata; +import com.opendxl.databus.common.internal.adapter.DatabusProducerRecordAdapter; +import com.opendxl.databus.common.internal.adapter.MessagePayloadAdapter; +import com.opendxl.databus.credential.Credential; +import com.opendxl.databus.entities.MessagePayload; +import com.opendxl.databus.entities.TierStorage; +import com.opendxl.databus.entities.TierStorageMetadata; +import com.opendxl.databus.entities.internal.DatabusMessage; +import com.opendxl.databus.exception.DatabusClientRuntimeException; +import com.opendxl.databus.serialization.Serializer; +import com.opendxl.databus.serialization.internal.MessageSerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; +import java.util.Optional; +import java.util.Properties; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + + +public class DatabusTierStorageProducer

extends DatabusProducer

{ + + private static final Logger LOG = LoggerFactory.getLogger(DatabusTierStorageProducer.class); + + private TierStorage tierStorage; + private MessagePayloadAdapter

messagePayloadAdapter; + + public DatabusTierStorageProducer(final Map configs, final Serializer

userSerializer, + final TierStorage tierStorage) { + this(configs, userSerializer, null, tierStorage); + } + + + public DatabusTierStorageProducer(final Map configs, final Serializer

userSerializer, + final Credential credential, final TierStorage tierStorage) { + super(configs, userSerializer, credential); + if (tierStorage == null) { + throw new IllegalArgumentException("Tier Storage cannot be null"); + } + this.tierStorage = tierStorage; + setFieldMembers(userSerializer); + initTransactions(); + } + + + public DatabusTierStorageProducer(final Properties properties, final Serializer

userSerializer, + final TierStorage tierStorage) { + this(properties, userSerializer, null, tierStorage); + } + + + public DatabusTierStorageProducer(final Properties properties, final Serializer

userSerializer, + final Credential credential, final TierStorage tierStorage) { + + super(properties, userSerializer, credential); + if (tierStorage == null) { + throw new IllegalArgumentException("Tier Storage cannot be null"); + } + setFieldMembers(userSerializer); + initTransactions(); + } + + private void setFieldMembers(Serializer

userSerializer) { + setKafkaValueSerializer(new MessageSerializer()); // The serializer used bu Kafka + this.messagePayloadAdapter = new MessagePayloadAdapter

(userSerializer); + setDatabusProducerRecordAdapter(new DatabusProducerRecordAdapter

(userSerializer)); + } + + + @Override + public void send(final ProducerRecord record) { + send(record, null); + } + + @Override + public void send(ProducerRecord

producerRecord, final Callback callback) { + + if (producerRecord.getRoutingData().getTierStorageMetadata() == null + || producerRecord.getRoutingData().getTierStorageMetadata().getBucketName() == null + || producerRecord.getRoutingData().getTierStorageMetadata().getBucketName().isEmpty() + || producerRecord.getRoutingData().getTierStorageMetadata().getObjectName() == null + || producerRecord.getRoutingData().getTierStorageMetadata().getObjectName().isEmpty() + ) { + final String errMsg = "Bucket metadatada is invalid"; + LOG.error(errMsg); + throw new IllegalArgumentException(errMsg); + } + + final TierStorageMetadata tierStorageMetadata = + producerRecord.getRoutingData().getTierStorageMetadata(); + + // Serialize the producerRecord payload to be stored with TieredStorage when callback being invoked by Kafka + final DatabusMessage databusMessage = + messagePayloadAdapter.adapt(producerRecord.payload(), producerRecord.getHeaders()); + final byte[] databusMessageSerialized = getKafkaValueSerializer().serialize("", databusMessage); + + // Remove the producerRecord headers and payload + final ProducerRecord

adaptedProducerRecord = new ProducerRecord(producerRecord.getRoutingData(), + producerRecord.getHeaders(), + new MessagePayload(null)); + + + // Get a Kafka Producer Record made up by a DatabusMessage: + // version = AVRO_1_S3_TIER_STORAGE_VERSION_NUMBER + // headers = empty + // payload = empty + org.apache.kafka.clients.producer.ProducerRecord targetProducerRecord = + getDatabusProducerRecordAdapter().adapt(adaptedProducerRecord); + + // Create the callback + CountDownLatch latch = new CountDownLatch(1); + final CallbackAdapterTierStorage callbackAdapterTierStorage; + if (callback != null) { + callbackAdapterTierStorage = new CallbackAdapterTierStorage(callback, + databusMessageSerialized, + latch, + tierStorageMetadata); + } else { + callbackAdapterTierStorage = null; + } + + beginTransaction(); + super.sendKafkaRecord(targetProducerRecord, callbackAdapterTierStorage); + try { + // wait for callback ends + final boolean callbackFinished = latch.await(10000, TimeUnit.MILLISECONDS); + if (callbackFinished) { + if (callbackAdapterTierStorage.isOk()) { + commitTransaction(); + } else { + abortTransaction(); + } + } else { // means that the callback has not finished in time + LOG.error("Record not produced. Too long time taken by tier storage."); + abortTransaction(); + } + + } catch (InterruptedException e) { + abortTransaction(); + } + } + + + /** + * Callback Adapter + *

+ * It forwards a kafka callback to databus callback + */ + private class CallbackAdapterTierStorage implements org.apache.kafka.clients.producer.Callback { + private final Callback callback; + private final byte[] databusMessageSerialized; + private CountDownLatch latch; + private TierStorageMetadata tierStorageMetadata; + private AtomicBoolean isOk = new AtomicBoolean(false); + + /** + * @param callback Databus callback + * @param databusMessageSerialized + * @param latch + */ + CallbackAdapterTierStorage(final Callback callback, + final byte[] databusMessageSerialized, + final CountDownLatch latch, + final TierStorageMetadata tierStorageMetadata) { + this.callback = callback; + this.databusMessageSerialized = databusMessageSerialized; + this.latch = latch; + this.tierStorageMetadata = tierStorageMetadata; + } + + /** + * It is called as a send result. Then it is forwarded and adapted to databus callback + * + * @param recordMetadata Kafka RecordMetadata + * @param exception An exception thrown by Databus broker + */ + @Override + public void onCompletion(final org.apache.kafka.clients.producer.RecordMetadata recordMetadata, + final Exception exception) { + + if (exception != null) { + LOG.error("The record was not produced. " + exception.getMessage(), exception); + response(recordMetadata, exception); + return; + } + + try { + + tierStorage.put(tierStorageMetadata.getBucketName(), + tierStorageMetadata.getObjectName(), + databusMessageSerialized); + response(recordMetadata, exception); + } catch (DatabusClientRuntimeException databusException) { + LOG.error("The record was not produced. " + databusException.getMessage(), databusException); + response(recordMetadata, databusException); + } + } + + /** + * Send callback response + * + * @param recordMetadata recordMetadata + * @param exception exception + */ + private void response(final org.apache.kafka.clients.producer.RecordMetadata kafkaRecordMetadata, + final Exception exception) { + isOk.set(exception == null); + latch.countDown(); + final RecordMetadata databusRecordMetadata = + Optional.ofNullable(new RecordMetadata(kafkaRecordMetadata)) + .orElse(null); + callback.onCompletion(databusRecordMetadata, exception); + } + + protected boolean isOk() { + return isOk.get(); + } + } + + +} diff --git a/src/main/java/com/opendxl/databus/producer/Producer.java b/src/main/java/com/opendxl/databus/producer/Producer.java index 8e7288e..bcc6b25 100644 --- a/src/main/java/com/opendxl/databus/producer/Producer.java +++ b/src/main/java/com/opendxl/databus/producer/Producer.java @@ -30,7 +30,8 @@ import java.util.concurrent.TimeUnit; /** - * A abstract producer, responsible for handling Databus outgoing messages. + * A abstract producer, responsible for handling Databus outgoing messages. + * * @param

payload's type */ public abstract class Producer

{ @@ -43,7 +44,7 @@ public abstract class Producer

{ /** * A Kafka Serializer of {@link DatabusMessage}. */ - private org.apache.kafka.common.serialization.Serializer valueSerializer; + private org.apache.kafka.common.serialization.Serializer kafkaValueSerializer; /** * A configuration map for the producer. @@ -133,9 +134,9 @@ public void send(final ProducerRecord record) { * expensive callbacks it is recommended to use your own {@link java.util.concurrent.Executor} in the callback body * to parallelize processing. * - * @param producerRecord The non-null record to send - * @param callback A user-supplied callback to execute when the record has been acknowledged by the server (null - * indicates no callback) + * @param producerRecord The non-null record to send + * @param callback A user-supplied callback to execute when the record has been acknowledged by the server + * (null indicates no callback) * @throws IllegalArgumentException If record argumet is null * @throws DatabusClientRuntimeException If send method fails. The original cause could be any of these exceptions: *

SerializationException If the key or value are not valid objects @@ -161,13 +162,19 @@ public void send(final ProducerRecord

producerRecord, final Callback callback callbackAdapter = null; } - producer.send(targetProducerRecord, callbackAdapter); + sendKafkaRecord(targetProducerRecord, callbackAdapter); } catch (Exception e) { throw new DatabusClientRuntimeException("send cannot be performed: " + e.getMessage(), e, Producer.class); } } + protected void + sendKafkaRecord(final org.apache.kafka.clients.producer.ProducerRecord record, + final org.apache.kafka.clients.producer.Callback callback) { + producer.send(record, callback); + } + /** * Invoking this method makes all buffered records immediately available to send (even if linger.ms is * greater than 0) and blocks on the completion of the requests associated with these records. The post-condition @@ -216,7 +223,7 @@ public void flush() { * @param topic to get info * @return List of {@link PartitionInfo} * @throws DatabusClientRuntimeException If partitionsFor method fails. - * The original cause could be the following exception: + * The original cause could be the following exception: *

InterruptException If the thread is interrupted while blocked */ public List partitionsFor(final String topic) { @@ -301,7 +308,7 @@ public void close(final long timeout, final TimeUnit timeUnit) { * * @param keySerializer A DatabusKeySerializer Instance */ - protected void setKeySerializer(final DatabusKeySerializer keySerializer) { + protected void setKafkaKeySerializer(final DatabusKeySerializer keySerializer) { this.keySerializer = keySerializer; } @@ -311,14 +318,15 @@ protected void setKeySerializer(final DatabusKeySerializer keySerializer) { * @param valueSerializer A Serializer object instance for the value serializer */ protected void - setValueSerializer(final org.apache.kafka.common.serialization.Serializer valueSerializer) { - this.valueSerializer = valueSerializer; + setKafkaValueSerializer(final org.apache.kafka.common.serialization.Serializer + kafkaValueSerializer) { + this.kafkaValueSerializer = kafkaValueSerializer; } /** * Get the key serializer from producer * - * @return A {@link DatabusKeySerializer} object instance + * @return A {@link DatabusKeySerializer} object instance */ protected DatabusKeySerializer getKeySerializer() { return keySerializer; @@ -327,16 +335,16 @@ protected DatabusKeySerializer getKeySerializer() { /** * Get the value serializer from producer * - * @return A {@link org.apache.kafka.common.serialization.Serializer} object instance + * @return A {@link org.apache.kafka.common.serialization.Serializer} object instance */ - protected org.apache.kafka.common.serialization.Serializer getValueSerializer() { - return valueSerializer; + protected org.apache.kafka.common.serialization.Serializer getKafkaValueSerializer() { + return kafkaValueSerializer; } /** * Set a Kafka producer instance to the producer. * - * @return A {@link org.apache.kafka.clients.producer.Producer} object instance to set in the producer + * @return A {@link org.apache.kafka.clients.producer.Producer} object instance to set in the producer */ protected void setProducer(final org.apache.kafka.clients.producer.Producer producer) { this.producer = producer; @@ -351,6 +359,16 @@ protected void setDatabusProducerRecordAdapter(final DatabusProducerRecordAdapte this.databusProducerRecordAdapter = databusProducerRecordAdapter; } + + /** + * Set a {@link DatabusProducerRecordAdapter} associated to the producer. + * + * @param databusProducerRecordAdapter The {@link DatabusProducerRecordAdapter} to set to the producer + */ + protected DatabusProducerRecordAdapter

getDatabusProducerRecordAdapter() { + return this.databusProducerRecordAdapter; + } + /** * Set the clientId to the producer * @@ -406,7 +424,7 @@ public void setConfiguration(final Map configuration) { /** * Needs to be called before any other methods when the transactional.id is set in the configuration. - * + *

* This method does the following: * 1. Ensures any transactions initiated by previous instances of the producer with the same * transactional.id are completed. If the previous instance had failed with a transaction in @@ -471,7 +489,7 @@ public void beginTransaction() { * (via {@link Consumer#commitSync(Map) sync} or * {@link Consumer#commitAsync(OffsetCommitCallback)} commits). * - * @param offsets offsets + * @param offsets offsets * @param consumerGroupId consumer group id * @throws DatabusClientRuntimeException If method fails. The original cause could be any of these exceptions: *

IllegalStateException if no transactional.id has been configured or no transaction has been started @@ -486,7 +504,7 @@ public void beginTransaction() { * other unexpected error */ public void sendOffsetsToTransaction(final Map offsets, - final String consumerGroupId) { + final String consumerGroupId) { try { Map adaptedOffsets = new HashMap(); @@ -509,20 +527,20 @@ public void sendOffsetsToTransaction(final Map * Further, if any of the {@link #send(ProducerRecord)} calls which were part of the transaction hit irrecoverable * errors, this method will throw the last received exception immediately and the transaction will not be committed. * So all {@link #send(ProducerRecord)} calls in a transaction must succeed in order for this method to succeed. - * + *

* DatabusClientRuntimeException If method fails. The original cause could be any of these exceptions: *

IllegalStateException if no transactional.id has been configured or no transaction has been started *

ProducerFencedException fatal error indicating another producer with the same transactional.id is active *

org.apache.kafka.common.errors.UnsupportedVersionException fatal error indicating the broker - * does not support transactions (i.e. if its version is lower than 0.11.0.0) + * does not support transactions (i.e. if its version is lower than 0.11.0.0) *

org.apache.kafka.common.errors.AuthorizationException fatal error indicating that the configured - * transactional.id is not authorized. See the exception for more details + * transactional.id is not authorized. See the exception for more details *

KafkaException if the producer has encountered a previous fatal or abortable error, or for any - * other unexpected error + * other unexpected error */ public void commitTransaction() { try { @@ -544,9 +562,9 @@ public void commitTransaction() { *

IllegalStateException if no transactional.id has been configured or no transaction has been started *

ProducerFencedException fatal error indicating another producer with the same transactional.id is active *

org.apache.kafka.common.errors.UnsupportedVersionException fatal error indicating the broker - * does not support transactions (i.e. if its version is lower than 0.11.0.0) + * does not support transactions (i.e. if its version is lower than 0.11.0.0) *

org.apache.kafka.common.errors.AuthorizationException fatal error indicating that the configured - * transactional.id is not authorized. See the exception for more details + * transactional.id is not authorized. See the exception for more details *

KafkaException if the producer has encountered a previous fatal error or for any other unexpected error */ public void abortTransaction() { @@ -715,7 +733,7 @@ private ProducerMetric getMetricPerClientId(final ProducerMetricEnum producerMet /** * Gets a {@link ProducerMetric} given a Topic name and a {@link ProducerMetricEnum}. * - * @param topic The topic name. + * @param topic The topic name. * @param producerMetricEnum The {@link ProducerMetricEnum} to get the metric. * @return a {@link ProducerMetric} instance. */ diff --git a/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java index 9de0383..0c9a0e0 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java @@ -83,18 +83,22 @@ public DatabusMessage deserialize(String topic, byte[] data, TierStorage tierSto if (tierStorage != null) { final String bucketName = headers.get(HeaderInternalField.TIER_STORAGE_BUCKET_NAME_KEY); final String objectName = headers.get(HeaderInternalField.TIER_STORAGE_OBJECT_NAME_KEY); - if (bucketName != null && objectName != null) { - byte[] object = null; + + if (bucketName != null && objectName != null && !bucketName.isEmpty() && !objectName.isEmpty()) { + byte[] tierStorageObjectContent = null; try { - object = tierStorage.get(bucketName, objectName); + tierStorageObjectContent = tierStorage.get(bucketName, objectName); } catch (Exception e) { LOG.error("Error when reading message from Tier Storage. Bucket Name: " + bucketName + "Object Name: " + objectName, e); } - if (object != null || object.length > 0) { - avroRecord = reader.read(null, DecoderFactory.get().binaryDecoder(object, null)); + if (tierStorageObjectContent != null && tierStorageObjectContent.length > 0) { + MessageStructure messageStructure = + MessageStructureFactory.getStructure(tierStorageObjectContent); + avroRecord = reader + .read(null, DecoderFactory.get().binaryDecoder(messageStructure.getPayload(), null)); headers = new HeadersAvroDeserializedAdapter().adapt(avroRecord.get("headers")); payload = new PayloadHeadersAvroDeserializedAdapter().adapt(avroRecord.get("payload")); } else { diff --git a/src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java b/src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java new file mode 100644 index 0000000..ef0e886 --- /dev/null +++ b/src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java @@ -0,0 +1,102 @@ +package com.opendxl.databus.entities; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import io.findify.s3mock.S3Mock; +import junit.extensions.PA; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.Arrays; + +public class S3TierStorageTest { + + private static final String AWS_SECRET_KEY = "secretKey"; + private static final String AWS_ACCESS_KEY = "accessKey"; + private static final String AWS_REGION = "us-east-1"; + private static S3Mock api; + private static AmazonS3Client client; + + + @BeforeClass + public static void beforeClass() { + api = new S3Mock.Builder().withPort(8001).withInMemoryBackend().build(); + api.start(); + AwsClientBuilder.EndpointConfiguration endpoint = + new AwsClientBuilder + .EndpointConfiguration("http://localhost:8001", "us-east-1"); + + client = (AmazonS3Client) AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withEndpointConfiguration(endpoint) + .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .build(); + } + + @AfterClass + public static void afterClass() { + api.shutdown(); // kills the underlying actor system. Use api.stop() to just unbind the port. + } + + @Test + public void shouldPutAngGetAnS3ObjectWithCredentials() { + + final String bucketName = "bucket-name"; + final String objectName = "object-name"; + final String objectRaw = "Hello!"; + final byte[] objectContent = objectRaw.getBytes(); + + try { + TierStorage tierStorage = new S3TierStorage(AWS_ACCESS_KEY, AWS_SECRET_KEY, AWS_REGION, + new ClientConfiguration()); + + PA.setValue(tierStorage, "s3Client", client); + tierStorage.put(bucketName, objectName, objectContent); + + Assert.assertTrue(tierStorage.doesObjectExist(bucketName, objectName)); + + + byte[] actualObjectContent = tierStorage.get(bucketName, objectName); + Assert.assertTrue(Arrays.equals(actualObjectContent, objectContent)); + String actualObjectRaw = new String(actualObjectContent); + Assert.assertTrue(actualObjectRaw.equals(objectRaw)); + + } catch (Exception e) { + Assert.fail(); + } + } + + @Test + public void shouldPutAngGetAnS3ObjectWithoutCredentials() { + + final String bucketName = "bucket-name"; + final String objectName = "object-name"; + final String objectRaw = "Hello!"; + final byte[] objectContent = objectRaw.getBytes(); + + try { + TierStorage tierStorage = new S3TierStorage(AWS_REGION, new ClientConfiguration()); + + PA.setValue(tierStorage, "s3Client", client); + tierStorage.put(bucketName, objectName, objectContent); + + Assert.assertTrue(tierStorage.doesObjectExist(bucketName, objectName)); + + byte[] actualObjectContent = tierStorage.get(bucketName, objectName); + Assert.assertTrue(Arrays.equals(actualObjectContent, objectContent)); + String actualObjectRaw = new String(actualObjectContent); + Assert.assertTrue(actualObjectRaw.equals(objectRaw)); + + } catch (Exception e) { + Assert.fail(); + } + } + +} \ No newline at end of file From 1650cc57b2492467c5b0a1420080e3f3636629f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20Villalba=C2=A0=20=C2=A0=20=C2=A0=20=C2=A0?= Date: Thu, 26 Mar 2020 14:41:02 -0300 Subject: [PATCH 03/20] DXL Tiered Stora: Add UT, code grooming, javadoc --- broker/src/broker/ClusterHelper.java | 1 + build.gradle | 8 +- .../sample/BasicConsumerProducerExample.java | 13 +- ...cS3TierStorageConsumerProducerExample.java | 46 +- .../adapter/DatabusProducerRecordAdapter.java | 14 +- .../opendxl/databus/entities/RoutingData.java | 7 +- .../databus/entities/S3TierStorage.java | 82 +++- .../producer/DatabusTierStorageProducer.java | 340 +++++++++++---- .../opendxl/databus/producer/Producer.java | 110 +++-- .../internal/AvroMessageDeserializer.java | 25 +- .../internal/InternalDeserializer.java | 7 + .../internal/LegacyMessageDeserializer.java | 17 +- .../internal/MessageDeserializer.java | 12 + .../internal/RawMessageDeserializer.java | 9 +- .../databus/entities/S3TierStorageTest.java | 4 +- ...3TierStorageTestForUnreachableService.java | 98 +++++ .../DatabusTierStorageProducerTest.java | 397 ++++++++++++++++++ ...tabusTierStorageProducerWithoutS3Test.java | 187 +++++++++ 18 files changed, 1212 insertions(+), 165 deletions(-) create mode 100644 src/test/java/com/opendxl/databus/entities/S3TierStorageTestForUnreachableService.java create mode 100644 src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerTest.java create mode 100644 src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerWithoutS3Test.java diff --git a/broker/src/broker/ClusterHelper.java b/broker/src/broker/ClusterHelper.java index 8080cbf..1e73d1d 100644 --- a/broker/src/broker/ClusterHelper.java +++ b/broker/src/broker/ClusterHelper.java @@ -136,6 +136,7 @@ private Properties getConfig(final int port) { config.setProperty("num.partitions", String.valueOf(6)); config.setProperty("transaction.state.log.replication.factor", String.valueOf(1)); config.setProperty("transaction.state.log.min.isr", String.valueOf(1)); + config.setProperty("log.message.format.version", "2.3.0"); } catch (Exception e) { e.printStackTrace(); } diff --git a/build.gradle b/build.gradle index 41e12a9..214419c 100644 --- a/build.gradle +++ b/build.gradle @@ -59,14 +59,14 @@ configurations { dependencies { implementation 'org.apache.kafka:kafka-clients:2.3.1' implementation 'org.apache.kafka:kafka-streams:2.3.1' - implementation 'org.apache.avro:avro:1.9.0' - implementation 'com.google.code.gson:gson:2.8.5' + implementation 'org.apache.avro:avro:1.9.2' + implementation 'com.google.code.gson:gson:2.8.6' implementation 'org.apache.commons:commons-configuration2:2.7' implementation 'commons-lang:commons-lang:2.6' implementation 'org.slf4j:slf4j-api:1.7.30' implementation 'net.sf.jopt-simple:jopt-simple:5.0.4' - implementation 'software.amazon.awssdk:bom:2.10.73' - implementation 'com.amazonaws:aws-java-sdk-s3:1.11.731' + implementation 'software.amazon.awssdk:bom:2.11.1' + implementation 'com.amazonaws:aws-java-sdk-s3:1.11.751' testImplementation 'org.apache.kafka:kafka_2.12:2.3.1' testImplementation 'org.apache.zookeeper:zookeeper:3.5.5' testImplementation('io.netty:netty-all:4.1.43.Final') { diff --git a/sample/src/sample/BasicConsumerProducerExample.java b/sample/src/sample/BasicConsumerProducerExample.java index 3ab16a9..d6bb204 100644 --- a/sample/src/sample/BasicConsumerProducerExample.java +++ b/sample/src/sample/BasicConsumerProducerExample.java @@ -4,7 +4,6 @@ package sample; -import broker.ClusterHelper; import com.opendxl.databus.common.RecordMetadata; import com.opendxl.databus.common.internal.builder.TopicNameBuilder; import com.opendxl.databus.consumer.*; @@ -47,11 +46,11 @@ public class BasicConsumerProducerExample { public BasicConsumerProducerExample() { // Start Kafka cluster - ClusterHelper - .getInstance() - .addBroker(9092) - .zookeeperPort(2181) - .start(); +// ClusterHelper +// .getInstance() +// .addBroker(9092) +// .zookeeperPort(2181) +// .start(); // Prepare a Producer this.producer = getProducer(); @@ -197,7 +196,7 @@ synchronized private void stopExample(final ExecutorService executor) { try { closed.set(true); consumer.wakeup(); - ClusterHelper.getInstance().stop(); +// ClusterHelper.getInstance().stop(); executor.shutdown(); executor.awaitTermination(5, TimeUnit.SECONDS); } catch (InterruptedException e) { diff --git a/sample/src/sample/BasicS3TierStorageConsumerProducerExample.java b/sample/src/sample/BasicS3TierStorageConsumerProducerExample.java index f68eae9..d89dfb5 100644 --- a/sample/src/sample/BasicS3TierStorageConsumerProducerExample.java +++ b/sample/src/sample/BasicS3TierStorageConsumerProducerExample.java @@ -8,18 +8,34 @@ import com.amazonaws.ClientConfiguration; import com.opendxl.databus.common.RecordMetadata; import com.opendxl.databus.common.internal.builder.TopicNameBuilder; -import com.opendxl.databus.consumer.*; -import com.opendxl.databus.entities.*; -import com.opendxl.databus.producer.*; +import com.opendxl.databus.consumer.Consumer; +import com.opendxl.databus.consumer.ConsumerConfiguration; +import com.opendxl.databus.consumer.ConsumerRecord; +import com.opendxl.databus.consumer.ConsumerRecords; +import com.opendxl.databus.consumer.DatabusConsumer; +import com.opendxl.databus.entities.Headers; +import com.opendxl.databus.entities.MessagePayload; +import com.opendxl.databus.entities.RoutingData; +import com.opendxl.databus.entities.S3TierStorage; +import com.opendxl.databus.entities.TierStorage; +import com.opendxl.databus.entities.TierStorageMetadata; +import com.opendxl.databus.producer.Callback; +import com.opendxl.databus.producer.DatabusTierStorageProducer; +import com.opendxl.databus.producer.Producer; +import com.opendxl.databus.producer.ProducerConfig; +import com.opendxl.databus.producer.ProducerRecord; import com.opendxl.databus.serialization.ByteArrayDeserializer; import com.opendxl.databus.serialization.ByteArraySerializer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; import java.nio.charset.Charset; import java.time.LocalDateTime; -import java.util.*; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.UUID; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -28,6 +44,9 @@ public class BasicS3TierStorageConsumerProducerExample { + private static final String AWS_REGION = "add-aws-region-name-here"; + private static final String S3_ACCESS_KEY = "add-your-access-key-here"; + private static final String S3_SECRET_KEY = "add-your-secret-key-here"; private final Producer producer; private final ExecutorService executor; private final TierStorage tierStorage; @@ -50,12 +69,12 @@ public BasicS3TierStorageConsumerProducerExample() { .zookeeperPort(2181) .start(); - + // Prepare a S3 Tiered Storage ClientConfiguration awsClientConfiguration = new ClientConfiguration(); - - this.tierStorage = new S3TierStorage("", - "", - "", awsClientConfiguration); + this.tierStorage = new S3TierStorage(AWS_REGION, + awsClientConfiguration, + S3_ACCESS_KEY, + S3_SECRET_KEY); // Prepare a Producer this.producer = getProducer(); @@ -77,8 +96,7 @@ public Producer getProducer() { config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString()); - config.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, true); - return new DatabusTierStorageProducer(config, new ByteArraySerializer(), null); + return new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); } public Consumer getConsumer() { @@ -158,7 +176,7 @@ private Runnable getConsumerTask() { consumer.unsubscribe(); try { consumer.close(); - } catch (IOException e) { + } catch (Exception e) { LOG.error(e.getMessage()); } LOG.info("Consumer closed"); @@ -172,10 +190,8 @@ public ProducerRecord getProducerRecord(final String topic, final byte[] String key = String.valueOf(System.currentTimeMillis()); TierStorageMetadata tStorageMetadata = new TierStorageMetadata("databus-poc-test", topic + key); RoutingData routingData = new RoutingData(topic, key, null, tStorageMetadata); - Headers headers = new Headers(); headers.put("k","v"); - MessagePayload messagePayload = new MessagePayload<>(payload); return new ProducerRecord<>(routingData, headers, messagePayload); } diff --git a/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java b/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java index 9b99273..101f8b2 100644 --- a/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java +++ b/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java @@ -26,15 +26,15 @@ public final class DatabusProducerRecordAdapter

/** * The message deserializer. */ - private final Serializer

messageSerializer; + private final Serializer

userSerializer; /** * Constructor * - * @param messageSerializer a {@link Serializer} instance used for Serializing the payload. + * @param userSerializer a {@link Serializer} instance used for Serializing the payload. */ - public DatabusProducerRecordAdapter(final Serializer

messageSerializer) { - this.messageSerializer = messageSerializer; + public DatabusProducerRecordAdapter(final Serializer

userSerializer) { + this.userSerializer = userSerializer; } /** @@ -46,7 +46,7 @@ public DatabusProducerRecordAdapter(final Serializer

messageSerializer) { */ @Override public org.apache.kafka.clients.producer.ProducerRecord - adapt(final ProducerRecord sourceProducerRecord) { + adapt(final ProducerRecord

sourceProducerRecord) { final Headers clonedHeaders = sourceProducerRecord.getHeaders().clone(); @@ -60,7 +60,7 @@ public DatabusProducerRecordAdapter(final Serializer

messageSerializer) { } // Add internal headers to let consumer knows the payload is tiered storage - TierStorageMetadata tierStorageMetadata = sourceProducerRecord.getRoutingData().getTierStorageMetadata(); + final TierStorageMetadata tierStorageMetadata = sourceProducerRecord.getRoutingData().getTierStorageMetadata(); if (tierStorageMetadata != null && tierStorageMetadata.getBucketName() != null && !tierStorageMetadata.getBucketName().isEmpty() && tierStorageMetadata.getObjectName() != null && !tierStorageMetadata.getObjectName().isEmpty() @@ -70,7 +70,7 @@ public DatabusProducerRecordAdapter(final Serializer

messageSerializer) { } final DatabusMessage databusMessage = - new MessagePayloadAdapter

(messageSerializer) + new MessagePayloadAdapter<>(userSerializer) .adapt(sourceProducerRecord.payload(), clonedHeaders); final String targetTopic = diff --git a/src/main/java/com/opendxl/databus/entities/RoutingData.java b/src/main/java/com/opendxl/databus/entities/RoutingData.java index 76ed3b1..7854cf1 100644 --- a/src/main/java/com/opendxl/databus/entities/RoutingData.java +++ b/src/main/java/com/opendxl/databus/entities/RoutingData.java @@ -16,7 +16,8 @@ * Represent a address where a message must be sent. * It is used by {@link ProducerRecord} * to know what the destination is. - * It contains a mandatory topic name as well as optionals sharding key and tenant group and partitions. + * It contains a mandatory topic name as well as optionals sharding key, tenant group, + * partitions and tier storage metadata. *

*

* See how to use in {@link DatabusProducer} example @@ -37,12 +38,12 @@ public class RoutingData { /** * The topic name */ - private String topic = null; + private String topic; /** * The sharding key value */ - private String shardingKey = null; + private String shardingKey; /** * The tenant group diff --git a/src/main/java/com/opendxl/databus/entities/S3TierStorage.java b/src/main/java/com/opendxl/databus/entities/S3TierStorage.java index a99bdf6..a04f4ac 100644 --- a/src/main/java/com/opendxl/databus/entities/S3TierStorage.java +++ b/src/main/java/com/opendxl/databus/entities/S3TierStorage.java @@ -9,7 +9,6 @@ import com.amazonaws.services.s3.internal.Mimetypes; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PutObjectResult; import com.amazonaws.services.s3.model.S3Object; import com.opendxl.databus.exception.DatabusClientRuntimeException; import org.apache.commons.io.IOUtils; @@ -19,6 +18,11 @@ import java.io.ByteArrayInputStream; import java.io.InputStream; +/** + * It is a built-in AWS S3 Tier Storage. + * + * It implements mechanisms to upload and download AWS S3 objects + */ public class S3TierStorage implements TierStorage { /** @@ -31,6 +35,13 @@ public class S3TierStorage implements TierStorage { */ private AmazonS3 s3Client; + /** + * Constructor used to create a role-based authenticated tier storage instance. + * + * @param awsRegion AWS region + * @param config AWS client configuration + * @throws DatabusClientRuntimeException exception if the underlying AWS S3 client cannot be created + */ public S3TierStorage(final String awsRegion, final ClientConfiguration config) { @@ -43,15 +54,26 @@ public S3TierStorage(final String awsRegion, try { this.s3Client = s3Builder.build(); } catch (Exception e) { - e.printStackTrace(); - + final String errMsg = "Error creating a S3 Tier Storage. Region: " + awsRegion + " " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); } } - public S3TierStorage(final String awsAccessKey, - final String awsSecretKey, - final String awsRegion, - final ClientConfiguration config) { + + /** + * Constructor used to create a tier storage instance with AWS access and secret key + * + * @param awsRegion AWS region + * @param config AWS client configuration + * @param awsAccessKey AWS access key + * @param awsSecretKey AWS secret key + * @throws DatabusClientRuntimeException exception if the underlying AWS S3 client cannot be created + */ + public S3TierStorage(final String awsRegion, + final ClientConfiguration config, + final String awsAccessKey, + final String awsSecretKey) { AmazonS3ClientBuilder s3Builder = AmazonS3ClientBuilder.standard(); @@ -66,16 +88,24 @@ public S3TierStorage(final String awsAccessKey, try { this.s3Client = s3Builder.build(); } catch (Exception e) { - e.printStackTrace(); + final String errMsg = "Error creating a S3 Tier Storage. Region: " + awsRegion + " " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); } - - } + + /** + * Upload a object to AWS S3 bucket + * + * @param s3BucketName AWS S3 bucket + * @param s3KeyName AWS S3 object name + * @param payload AWS object content + * @throws DatabusClientRuntimeException exception if the underlying AWS S3 fails. + * + */ @Override - public void put(final String s3BucketName, - final String s3KeyName, - final byte[] payload) { + public void put(final String s3BucketName, final String s3KeyName, final byte[] payload) { try { if (!s3Client.doesBucketExistV2(s3BucketName)) { @@ -86,7 +116,7 @@ public void put(final String s3BucketName, metadata.setContentLength(payload.length); metadata.setContentType(Mimetypes.MIMETYPE_HTML); InputStream s3Object = new ByteArrayInputStream(payload); - PutObjectResult putObjectResult = s3Client.putObject(s3BucketName, s3KeyName, s3Object, metadata); + s3Client.putObject(s3BucketName, s3KeyName, s3Object, metadata); } catch (Exception e) { final String errMsg = "Error uploading S3 object: Bucket: " + " Object: " @@ -98,19 +128,37 @@ public void put(final String s3BucketName, } - public boolean doesObjectExist(String s3BucketName, String s3KeyName) { + /** + * Check if an AWS S3 object exists + * + * @param s3BucketName AWS S3 bucket + * @param s3KeyName AWS object name + * @return a boolean + * @throws DatabusClientRuntimeException exception if the underlying AWS S3 fails. + */ + @Override + public boolean doesObjectExist(final String s3BucketName, final String s3KeyName) { try { return s3Client.doesObjectExist(s3BucketName, s3KeyName); } catch (Exception e) { - final String errMsg = "Error trying to reach S3 object: Bucket: " + " Object: " + s3KeyName + " " + final String errMsg = "Error trying to find a S3 object: Bucket: " + " Object: " + s3KeyName + " " + e.getMessage(); LOG.error(errMsg, e); throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); } } + + /** + * Download a AWS S3 object content + * + * @param s3BucketName AWS S3 bucket name + * @param s3KeyName AWS S3 object name + * @return the object content + * @throws DatabusClientRuntimeException exception if the underlying AWS S3 fails. + */ @Override - public byte[] get(String s3BucketName, String s3KeyName) { + public byte[] get(final String s3BucketName, final String s3KeyName) { try { S3Object s3Object = s3Client.getObject(new GetObjectRequest(s3BucketName, s3KeyName)); return IOUtils.toByteArray(s3Object.getObjectContent()); diff --git a/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java b/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java index 7a3ee8a..adccea8 100644 --- a/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java +++ b/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java @@ -15,48 +15,96 @@ import com.opendxl.databus.exception.DatabusClientRuntimeException; import com.opendxl.databus.serialization.Serializer; import com.opendxl.databus.serialization.internal.MessageSerializer; +import org.apache.kafka.common.errors.AuthorizationException; +import org.apache.kafka.common.errors.OutOfOrderSequenceException; +import org.apache.kafka.common.errors.ProducerFencedException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Map; -import java.util.Optional; import java.util.Properties; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; - +/** + * It writes a Message to kafka and stores Payload and Header in a Tier Storage. The kafka message is used like + * offsets control and to point to payload which is stored in the Tier Storage. + * + * @param

Payload's type, tipically a byte[] + */ public class DatabusTierStorageProducer

extends DatabusProducer

{ + /** + * The logger + */ private static final Logger LOG = LoggerFactory.getLogger(DatabusTierStorageProducer.class); + /** + * Used to save the message in a separated store + */ private TierStorage tierStorage; + + /** + * Transform a user payload in a {@link DatabusMessage} + */ private MessagePayloadAdapter

messagePayloadAdapter; + /** + * Constructor + * + * @param configs Producer configuration + * @param userSerializer user serializer + * @param tierStorage tier storage + */ public DatabusTierStorageProducer(final Map configs, final Serializer

userSerializer, final TierStorage tierStorage) { this(configs, userSerializer, null, tierStorage); } + /** + * Constructor + * + * @param configs producer configuration + * @param userSerializer user serializer + * @param credential credentials + * @param tierStorage tier storage + */ public DatabusTierStorageProducer(final Map configs, final Serializer

userSerializer, final Credential credential, final TierStorage tierStorage) { super(configs, userSerializer, credential); if (tierStorage == null) { throw new IllegalArgumentException("Tier Storage cannot be null"); } + validateConfiguration(configs); this.tierStorage = tierStorage; setFieldMembers(userSerializer); initTransactions(); } + /** + * Constructor + * + * @param properties producer configuration + * @param userSerializer user serializer + * @param tierStorage tier storage + */ public DatabusTierStorageProducer(final Properties properties, final Serializer

userSerializer, final TierStorage tierStorage) { this(properties, userSerializer, null, tierStorage); } + /** + * Constructor + * + * @param properties producer configuration + * @param userSerializer user serializer + * @param credential credential + * @param tierStorage tier storage + */ public DatabusTierStorageProducer(final Properties properties, final Serializer

userSerializer, final Credential credential, final TierStorage tierStorage) { @@ -64,88 +112,215 @@ public DatabusTierStorageProducer(final Properties properties, final Serializer< if (tierStorage == null) { throw new IllegalArgumentException("Tier Storage cannot be null"); } + validateConfiguration(properties); + this.tierStorage = tierStorage; setFieldMembers(userSerializer); initTransactions(); } private void setFieldMembers(Serializer

userSerializer) { setKafkaValueSerializer(new MessageSerializer()); // The serializer used bu Kafka - this.messagePayloadAdapter = new MessagePayloadAdapter

(userSerializer); - setDatabusProducerRecordAdapter(new DatabusProducerRecordAdapter

(userSerializer)); + this.messagePayloadAdapter = new MessagePayloadAdapter<>(userSerializer); + setDatabusProducerRecordAdapter(new DatabusProducerRecordAdapter<>(userSerializer)); } + private void validateConfiguration(final Map config) { + Properties properties = new Properties(); + try { + properties.putAll(config); + } catch (Exception e) { + throw new IllegalArgumentException("Producer configuration is invalid ERROR:" + e.getMessage()); + } + validateConfiguration(properties); + } - @Override - public void send(final ProducerRecord record) { - send(record, null); + private void validateConfiguration(final Properties config) { + if (config.get(ProducerConfig.TRANSACTIONAL_ID_CONFIG) == null) { + throw new IllegalArgumentException("Transaction Id cannot be null or empty"); + } + final String transactionId = config.get(ProducerConfig.TRANSACTIONAL_ID_CONFIG).toString(); + if (transactionId == null || transactionId.trim().isEmpty()) { + throw new IllegalArgumentException("Transaction Id cannot be null or empty"); + } } + /** + * It writes a Message to kafka and stores Payload and Header in Tier Storage. + * The kafka message has headers information pointing to Tier Storage payload. + * Both operation are in the same tansaction. If something goes wrong, they will be consistently aborted + * + * @param producerRecord producer record + */ @Override - public void send(ProducerRecord

producerRecord, final Callback callback) { + public void send(final ProducerRecord

producerRecord) { + try { + validateTierStorageMetadata(producerRecord); + + // Get the Tier Storage from RoutindData which was already created by the user + final TierStorageMetadata tierStorageMetadata = + producerRecord.getRoutingData().getTierStorageMetadata(); + + // Serialize the producerRecord payload to be stored with TieredStorage + // when callback being invoked by Kafka + final DatabusMessage databusMessage = + messagePayloadAdapter.adapt(producerRecord.payload(), producerRecord.getHeaders()); + final byte[] databusMessageSerialized = getKafkaValueSerializer().serialize("", databusMessage); + + // Remove the producerRecord payload to be written in kafka. + final ProducerRecord

adaptedProducerRecord = new ProducerRecord<>(producerRecord.getRoutingData(), + producerRecord.getHeaders(), + new MessagePayload<>(null)); + + // Transform a Databus ProducerRecord in a Kafka Producer Record + org.apache.kafka.clients.producer.ProducerRecord targetProducerRecord = + getDatabusProducerRecordAdapter().adapt(adaptedProducerRecord); + + try { + beginTransaction(); + super.sendKafkaRecord(targetProducerRecord); + tierStorage.put(tierStorageMetadata.getBucketName(), + tierStorageMetadata.getObjectName(), + databusMessageSerialized); + commitTransaction(); + LOG.info("Send Ok. Message was sent and payload was stored in Tier Storage"); + } catch (ProducerFencedException | OutOfOrderSequenceException | AuthorizationException e) { + super.flush(); + super.close(); + final String errMsg = "Send cannot be performed. Producer throws an irrecoverable exception " + + "during a transaction. Producer is closed effective immediately. ERROR:" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); + + } catch (Exception e) { + abortTransaction(); + final String errMsg = "Send cannot be performed. Producer throws an exception during a transaction. " + + "Producer continues active. Message should be sent again to retry. ERROR:" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); + } + } catch (Exception e) { + final String errMsg = "send cannot be performed: ERROR:" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); + } + + } + protected void validateTierStorageMetadata(ProducerRecord

producerRecord) { if (producerRecord.getRoutingData().getTierStorageMetadata() == null - || producerRecord.getRoutingData().getTierStorageMetadata().getBucketName() == null - || producerRecord.getRoutingData().getTierStorageMetadata().getBucketName().isEmpty() - || producerRecord.getRoutingData().getTierStorageMetadata().getObjectName() == null - || producerRecord.getRoutingData().getTierStorageMetadata().getObjectName().isEmpty() + || producerRecord.getRoutingData().getTierStorageMetadata().getBucketName() == null + || producerRecord.getRoutingData().getTierStorageMetadata().getBucketName().isEmpty() + || producerRecord.getRoutingData().getTierStorageMetadata().getObjectName() == null + || producerRecord.getRoutingData().getTierStorageMetadata().getObjectName().isEmpty() ) { - final String errMsg = "Bucket metadatada is invalid"; + final String errMsg = "Send cannot be performed. Bucket metadatada is invalid"; LOG.error(errMsg); throw new IllegalArgumentException(errMsg); } + } - final TierStorageMetadata tierStorageMetadata = - producerRecord.getRoutingData().getTierStorageMetadata(); + /** + * It writes a Message to kafka and stores Payload in Tier Storage. + * The kafka message has headers information pointing to Tier Storage payload. So that a Consumer can recover + * Both operation are in the same tansaction. If something goes wrong, they will be consistently aborted + * + * @param producerRecord The non-null record to send + * @param callback A user-supplied callback to execute when the record has been acknowledged by the server + * (null indicates no callback) + */ + @Override + public void send(ProducerRecord

producerRecord, final Callback callback) { - // Serialize the producerRecord payload to be stored with TieredStorage when callback being invoked by Kafka - final DatabusMessage databusMessage = - messagePayloadAdapter.adapt(producerRecord.payload(), producerRecord.getHeaders()); - final byte[] databusMessageSerialized = getKafkaValueSerializer().serialize("", databusMessage); + validateTierStorageMetadata(producerRecord); - // Remove the producerRecord headers and payload - final ProducerRecord

adaptedProducerRecord = new ProducerRecord(producerRecord.getRoutingData(), - producerRecord.getHeaders(), - new MessagePayload(null)); + if (callback == null) { + final String errMsg = "Send cannot be performed. Producer Callback is invalid"; + LOG.error(errMsg); + throw new IllegalArgumentException(errMsg); + } + + try { + + // Get the Tier Storage from RoutindData which was already created by the user + final TierStorageMetadata tierStorageMetadata = + producerRecord.getRoutingData().getTierStorageMetadata(); + + // Serialize the producerRecord payload to be stored with TieredStorage when callback being invoked by Kafka + final DatabusMessage databusMessage = + messagePayloadAdapter.adapt(producerRecord.payload(), producerRecord.getHeaders()); + final byte[] kafkaValueSerializer = getKafkaValueSerializer().serialize("", databusMessage); + // Remove the producerRecord payload to be written in kafka. + final ProducerRecord

adaptedProducerRecord = new ProducerRecord<>(producerRecord.getRoutingData(), + producerRecord.getHeaders(), + new MessagePayload<>(null)); - // Get a Kafka Producer Record made up by a DatabusMessage: - // version = AVRO_1_S3_TIER_STORAGE_VERSION_NUMBER - // headers = empty - // payload = empty - org.apache.kafka.clients.producer.ProducerRecord targetProducerRecord = - getDatabusProducerRecordAdapter().adapt(adaptedProducerRecord); + // Transform a Databus ProducerRecord in a Kafka Producer Record + org.apache.kafka.clients.producer.ProducerRecord targetProducerRecord = + getDatabusProducerRecordAdapter().adapt(adaptedProducerRecord); - // Create the callback - CountDownLatch latch = new CountDownLatch(1); - final CallbackAdapterTierStorage callbackAdapterTierStorage; - if (callback != null) { + // Create the callback + CountDownLatch latch = new CountDownLatch(1); + final CallbackAdapterTierStorage callbackAdapterTierStorage; callbackAdapterTierStorage = new CallbackAdapterTierStorage(callback, - databusMessageSerialized, + kafkaValueSerializer, latch, tierStorageMetadata); - } else { - callbackAdapterTierStorage = null; - } - beginTransaction(); - super.sendKafkaRecord(targetProducerRecord, callbackAdapterTierStorage); - try { - // wait for callback ends - final boolean callbackFinished = latch.await(10000, TimeUnit.MILLISECONDS); - if (callbackFinished) { - if (callbackAdapterTierStorage.isOk()) { - commitTransaction(); - } else { + try { + beginTransaction(); + super.sendKafkaRecord(targetProducerRecord, callbackAdapterTierStorage); + // wait for callback ends + final boolean callbackFinished = latch.await(10000, TimeUnit.MILLISECONDS); + if (callbackFinished) { // means the callback finished before timeout + if (callbackAdapterTierStorage.isMessageAndPayloadStored()) { + commitTransaction(); + LOG.info("Send OK. Message was sent and payload was stored in Tier Storage"); + } else { // means something was wrong in kafka or tier storage + abortTransaction(); // Logging is already performed in the Callback + throw new DatabusClientRuntimeException("Send cannot be performed. Record not produced. " + + "Something was wrong producing the message in Kafka or " + + " storing the payload in Tier Storage", this.getClass()); + } + } else { // means that the callback has not finished in time abortTransaction(); + final String errMsg = "Send cannot be performed. Record not produced. " + + "Timeout: Too long time taken by Kafka or Tier Storage."; + LOG.error(errMsg); + throw new DatabusClientRuntimeException(errMsg, this.getClass()); } - } else { // means that the callback has not finished in time - LOG.error("Record not produced. Too long time taken by tier storage."); + } catch (InterruptedException e) { + abortTransaction(); + final String errMsg = "Send cannot be performed. Producer was interrupted while " + + "waiting for a Callback response. " + + "Producer continues active. Message should be sent again to retry. ERROR:" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); + } catch (ProducerFencedException | OutOfOrderSequenceException | AuthorizationException e) { + super.flush(); + super.close(); + final String errMsg = "Send cannot be performed. Producer throws an irrecoverable exception " + + "during a transaction. Producer is closed effective immediately. ERROR:" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); + + } catch (Exception e) { abortTransaction(); + final String errMsg = "Producer throws an exception during a transaction. " + + "Producer continues active. Message should be sent again to retry. ERROR:" + e.getMessage(); + LOG.error(errMsg); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } - } catch (InterruptedException e) { - abortTransaction(); + } catch (Exception e) { + if (e instanceof DatabusClientRuntimeException) { + throw e; + } + final String errMsg = "Send cannot be performed: " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } + } @@ -155,23 +330,42 @@ public void send(ProducerRecord

producerRecord, final Callback callback) { * It forwards a kafka callback to databus callback */ private class CallbackAdapterTierStorage implements org.apache.kafka.clients.producer.Callback { - private final Callback callback; - private final byte[] databusMessageSerialized; + /** + * Callback defined by the user when invoking send method + */ + private final Callback userCallback; + + /** + * the kafka value serializer + */ + private final byte[] kafkaValueSerializer; + + /** + * An object to signal when callback has finished + */ private CountDownLatch latch; + + /** + * The Tier Storage in charged to store payload + */ private TierStorageMetadata tierStorageMetadata; - private AtomicBoolean isOk = new AtomicBoolean(false); /** - * @param callback Databus callback - * @param databusMessageSerialized - * @param latch + * storage operation result */ - CallbackAdapterTierStorage(final Callback callback, - final byte[] databusMessageSerialized, + private AtomicBoolean isMessageAndPayloadStored = new AtomicBoolean(false); + + /** + * @param userCallback user callback + * @param kafkaValueSerializer kafka serializer + * @param latch a object to signal when callback + */ + CallbackAdapterTierStorage(final Callback userCallback, + final byte[] kafkaValueSerializer, final CountDownLatch latch, final TierStorageMetadata tierStorageMetadata) { - this.callback = callback; - this.databusMessageSerialized = databusMessageSerialized; + this.userCallback = userCallback; + this.kafkaValueSerializer = kafkaValueSerializer; this.latch = latch; this.tierStorageMetadata = tierStorageMetadata; } @@ -187,7 +381,8 @@ public void onCompletion(final org.apache.kafka.clients.producer.RecordMetadata final Exception exception) { if (exception != null) { - LOG.error("The record was not produced. " + exception.getMessage(), exception); + LOG.error("Send cannot be performed. The record was not produced. ERROR:" + + exception.getMessage(), exception); response(recordMetadata, exception); return; } @@ -196,10 +391,11 @@ public void onCompletion(final org.apache.kafka.clients.producer.RecordMetadata tierStorage.put(tierStorageMetadata.getBucketName(), tierStorageMetadata.getObjectName(), - databusMessageSerialized); + kafkaValueSerializer); response(recordMetadata, exception); } catch (DatabusClientRuntimeException databusException) { - LOG.error("The record was not produced. " + databusException.getMessage(), databusException); + LOG.error("Send cannot be performed. The record was not produced. ERROR:" + + databusException.getMessage(), databusException); response(recordMetadata, databusException); } } @@ -207,21 +403,23 @@ public void onCompletion(final org.apache.kafka.clients.producer.RecordMetadata /** * Send callback response * - * @param recordMetadata recordMetadata + * @param kafkaRecordMetadata recordMetadata * @param exception exception */ private void response(final org.apache.kafka.clients.producer.RecordMetadata kafkaRecordMetadata, final Exception exception) { - isOk.set(exception == null); + isMessageAndPayloadStored.set(exception == null); latch.countDown(); - final RecordMetadata databusRecordMetadata = - Optional.ofNullable(new RecordMetadata(kafkaRecordMetadata)) - .orElse(null); - callback.onCompletion(databusRecordMetadata, exception); + + RecordMetadata databusRecordMetadata = null; + if (kafkaRecordMetadata != null) { + databusRecordMetadata = new RecordMetadata(kafkaRecordMetadata); + } + userCallback.onCompletion(databusRecordMetadata, exception); } - protected boolean isOk() { - return isOk.get(); + protected boolean isMessageAndPayloadStored() { + return isMessageAndPayloadStored.get(); } } diff --git a/src/main/java/com/opendxl/databus/producer/Producer.java b/src/main/java/com/opendxl/databus/producer/Producer.java index bcc6b25..81b1207 100644 --- a/src/main/java/com/opendxl/databus/producer/Producer.java +++ b/src/main/java/com/opendxl/databus/producer/Producer.java @@ -4,8 +4,6 @@ package com.opendxl.databus.producer; -import com.opendxl.databus.consumer.Consumer; -import com.opendxl.databus.exception.DatabusClientRuntimeException; import com.opendxl.databus.common.MetricName; import com.opendxl.databus.common.PartitionInfo; import com.opendxl.databus.common.RecordMetadata; @@ -13,20 +11,24 @@ import com.opendxl.databus.common.internal.adapter.DatabusProducerRecordAdapter; import com.opendxl.databus.common.internal.adapter.MetricNameMapAdapter; import com.opendxl.databus.common.internal.adapter.PartitionInfoListAdapter; +import com.opendxl.databus.consumer.Consumer; import com.opendxl.databus.consumer.OffsetAndMetadata; import com.opendxl.databus.consumer.OffsetCommitCallback; import com.opendxl.databus.entities.internal.DatabusMessage; +import com.opendxl.databus.exception.DatabusClientRuntimeException; import com.opendxl.databus.producer.metric.ProducerMetric; import com.opendxl.databus.producer.metric.ProducerMetricBuilder; import com.opendxl.databus.producer.metric.ProducerMetricEnum; import com.opendxl.databus.serialization.internal.DatabusKeySerializer; import org.apache.commons.lang.StringUtils; -import org.slf4j.LoggerFactory; import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.util.Map; -import java.util.List; +import java.time.Duration; +import java.time.temporal.TemporalUnit; import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.concurrent.TimeUnit; /** @@ -91,7 +93,7 @@ public Map getConfiguration() { * and the buffer is full. *

InterruptException If the thread is interrupted while blocked */ - public void send(final ProducerRecord record) { + public void send(final ProducerRecord

record) { send(record, null); } @@ -165,7 +167,9 @@ public void send(final ProducerRecord

producerRecord, final Callback callback sendKafkaRecord(targetProducerRecord, callbackAdapter); } catch (Exception e) { - throw new DatabusClientRuntimeException("send cannot be performed: " + e.getMessage(), e, Producer.class); + final String errMsg = "send cannot be performed: " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } @@ -175,6 +179,11 @@ public void send(final ProducerRecord

producerRecord, final Callback callback producer.send(record, callback); } + protected void + sendKafkaRecord(final org.apache.kafka.clients.producer.ProducerRecord record) { + producer.send(record); + } + /** * Invoking this method makes all buffered records immediately available to send (even if linger.ms is * greater than 0) and blocks on the completion of the requests associated with these records. The post-condition @@ -212,7 +221,9 @@ public void flush() { try { producer.flush(); } catch (Exception e) { - throw new DatabusClientRuntimeException("flush cannot be performed :" + e.getMessage(), e, Producer.class); + final String errMsg = "flush cannot be performed :" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } @@ -231,8 +242,9 @@ public List partitionsFor(final String topic) { List partitions = producer.partitionsFor(topic); return new PartitionInfoListAdapter().adapt(partitions); } catch (Exception e) { - throw new DatabusClientRuntimeException("partitionsFor cannot be performed :" - + e.getMessage(), e, Producer.class); + final String errMsg = "partitionsFor cannot be performed: " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } @@ -249,8 +261,9 @@ public List partitionsFor(final String topic) { return new MetricNameMapAdapter().adapt(metrics); } catch (Exception e) { - throw new DatabusClientRuntimeException("metrics cannot be performed :" - + e.getMessage(), e, Producer.class); + final String errMsg = "metrics cannot be performed: " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } @@ -272,7 +285,9 @@ public void close() { try { producer.close(); } catch (Exception e) { - throw new DatabusClientRuntimeException("close cannot be performed :" + e.getMessage(), e, Producer.class); + final String errMsg = "close cannot be performed :" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } @@ -294,15 +309,48 @@ public void close() { *

InterruptException If the thread is interrupted while blocked *

IllegalArgumentException If the timeout is negative. */ + @Deprecated public void close(final long timeout, final TimeUnit timeUnit) { try { producer.close(timeout, timeUnit); } catch (Exception e) { - throw new DatabusClientRuntimeException("close cannot be performed :" + e.getMessage(), e, Producer.class); + final String errMsg = "close cannot be performed :" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } + /** + * This method waits up to timeout for the producer to complete the sending of all incomplete requests. + *

+ * If the producer is unable to complete all requests before the timeout expires, this method will fail + * any unsent and unacknowledged records immediately. + *

+ * If invoked from within a {@link Callback} this method will not block and will be equivalent to + * close(0, TimeUnit.MILLISECONDS). This is done since no further sending will happen while + * blocking the I/O thread of the producer. + * + * @param duration The maximum time to wait for producer to complete any pending requests. The value should be + * non-negative. Specifying a timeout of zero means do not wait for pending send + * requests to complete. + * @param timeUnit The time unit for the timeoutl + * @throws DatabusClientRuntimeException If close method fails. The original cause could be any of these exceptions: + *

InterruptException If the thread is interrupted while blocked + *

IllegalArgumentException If the timeout is negative. + */ + public void close(long duration, TemporalUnit timeUnit) { + try { + producer.close(Duration.of(duration, timeUnit)); + } catch (Exception e) { + final String errMsg = "close cannot be performed :" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); + } + + } + + /** * Set the DatabusKeySerializer in producer * @@ -315,7 +363,7 @@ protected void setKafkaKeySerializer(final DatabusKeySerializer keySerializer) { /** * Set the value serializer in producer * - * @param valueSerializer A Serializer object instance for the value serializer + * @param kafkaValueSerializer A Serializer object instance for the value serializer */ protected void setKafkaValueSerializer(final org.apache.kafka.common.serialization.Serializer @@ -343,8 +391,7 @@ protected org.apache.kafka.common.serialization.Serializer getKa /** * Set a Kafka producer instance to the producer. - * - * @return A {@link org.apache.kafka.clients.producer.Producer} object instance to set in the producer + * @param producer Producer */ protected void setProducer(final org.apache.kafka.clients.producer.Producer producer) { this.producer = producer; @@ -360,10 +407,10 @@ protected void setDatabusProducerRecordAdapter(final DatabusProducerRecordAdapte } + /** - * Set a {@link DatabusProducerRecordAdapter} associated to the producer. * - * @param databusProducerRecordAdapter The {@link DatabusProducerRecordAdapter} to set to the producer + * @return Databus producer adapter */ protected DatabusProducerRecordAdapter

getDatabusProducerRecordAdapter() { return this.databusProducerRecordAdapter; @@ -445,8 +492,9 @@ public void initTransactions() { try { producer.initTransactions(); } catch (Exception e) { - throw new DatabusClientRuntimeException("initTransactions cannot be performed: " - + e.getMessage(), e, Producer.class); + final String errMsg = "initTransactions cannot be performed: " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } @@ -468,8 +516,9 @@ public void beginTransaction() { try { producer.beginTransaction(); } catch (Exception e) { - throw new DatabusClientRuntimeException("beginTransaction cannot be performed: " - + e.getMessage(), e, Producer.class); + final String errMsg = "beginTransaction cannot be performed: " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } @@ -519,8 +568,9 @@ public void sendOffsetsToTransaction(final Map { */ P deserialize(String topic, byte[] data); + /** + * + * @param topic the topic where the message comes from + * @param data data to be deserialized + * @param tierStorage tier storage where the payload should be read + * @return data of type P + */ P deserialize(String topic, byte[] data, TierStorage tierStorage); diff --git a/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java index db85664..d4af046 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java @@ -33,6 +33,13 @@ public final class LegacyMessageDeserializer implements InternalDeserializer { + /** + * Tier Storage + */ private TierStorage tierStorage; + /** + * Constructor + * + * @param tierStorage If null it will be ignored and payload won't be read + */ public MessageDeserializer(final TierStorage tierStorage) { this.tierStorage = tierStorage; } + /** + * Constructor + */ public MessageDeserializer() { this(null); } @@ -35,6 +46,7 @@ public void configure(final Map map, final boolean b) { /** * Deserialize a message to a {@link DatabusMessage} + * If tierStorage is not null will be used to read the payload from the underlying Tier Storage. * * @param topic The topic name. * @param serializedMessage A serialized message. diff --git a/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java index 964d5fa..9659fc5 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java @@ -25,8 +25,15 @@ public DatabusMessage deserialize(final String topic, final byte[] data) { return new DatabusMessage(null, data); } + /** + * + * @param topic the topic where the message comes from + * @param data data to be deserialized + * @param tierStorage It is not used + * @return A {@link DatabusMessage} with binary data as is. + */ @Override - public DatabusMessage deserialize(String topic, byte[] data, TierStorage tierStorage) { + public DatabusMessage deserialize(final String topic, final byte[] data, final TierStorage tierStorage) { return deserialize(topic, data); } } diff --git a/src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java b/src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java index ef0e886..d2eccc5 100644 --- a/src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java +++ b/src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java @@ -54,8 +54,8 @@ public void shouldPutAngGetAnS3ObjectWithCredentials() { final byte[] objectContent = objectRaw.getBytes(); try { - TierStorage tierStorage = new S3TierStorage(AWS_ACCESS_KEY, AWS_SECRET_KEY, AWS_REGION, - new ClientConfiguration()); + TierStorage tierStorage = new S3TierStorage(AWS_REGION,new ClientConfiguration(), + AWS_ACCESS_KEY, AWS_SECRET_KEY); PA.setValue(tierStorage, "s3Client", client); tierStorage.put(bucketName, objectName, objectContent); diff --git a/src/test/java/com/opendxl/databus/entities/S3TierStorageTestForUnreachableService.java b/src/test/java/com/opendxl/databus/entities/S3TierStorageTestForUnreachableService.java new file mode 100644 index 0000000..af4a779 --- /dev/null +++ b/src/test/java/com/opendxl/databus/entities/S3TierStorageTestForUnreachableService.java @@ -0,0 +1,98 @@ +package com.opendxl.databus.entities; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.opendxl.databus.exception.DatabusClientRuntimeException; +import io.findify.s3mock.S3Mock; +import junit.extensions.PA; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +public class S3TierStorageTestForUnreachableService { + + private static final String AWS_SECRET_KEY = "secretKey"; + private static final String AWS_ACCESS_KEY = "accessKey"; + private static final String AWS_REGION = "us-east-1"; + private static S3Mock api; + private static AmazonS3Client client; + + + @BeforeClass + public static void beforeClass() { + api = new S3Mock.Builder().withPort(8001).withInMemoryBackend().build(); + AwsClientBuilder.EndpointConfiguration endpoint = + new AwsClientBuilder + .EndpointConfiguration("http://localhost:8001", "us-east-1"); + + client = (AmazonS3Client) AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withEndpointConfiguration(endpoint) + .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .build(); + } + + + @Test + public void shouldThrowAnExceptionWhenPutAnObject() { + final String bucketName = "bucket-name"; + final String objectName = "object-name"; + final String objectRaw = "Hello!"; + final byte[] objectContent = objectRaw.getBytes(); + + try { + TierStorage tierStorage = new S3TierStorage(AWS_REGION, new ClientConfiguration()); + PA.setValue(tierStorage, "s3Client", client); + tierStorage.put(bucketName, objectName, objectContent); + Assert.fail("An Exception is expected"); + } catch (DatabusClientRuntimeException e) { + Assert.assertTrue(true); + } catch (Exception e) { + Assert.fail("Unexpected Exception"); + } + } + + @Test + public void shouldThrowAnExceptionWhenGetAnObject() { + final String bucketName = "bucket-name"; + final String objectName = "object-name"; + final String objectRaw = "Hello!"; + final byte[] objectContent = objectRaw.getBytes(); + + try { + TierStorage tierStorage = new S3TierStorage(AWS_REGION, new ClientConfiguration()); + PA.setValue(tierStorage, "s3Client", client); + tierStorage.get(bucketName, objectName); + Assert.fail("An Exception is expected"); + } catch (DatabusClientRuntimeException e) { + Assert.assertTrue(true); + } catch (Exception e) { + Assert.fail("Unexpected Exception"); + } + } + + @Test + public void shouldThrowAnExceptionSearchAnObject() { + final String bucketName = "bucket-name"; + final String objectName = "object-name"; + final String objectRaw = "Hello!"; + final byte[] objectContent = objectRaw.getBytes(); + + try { + TierStorage tierStorage = new S3TierStorage(AWS_REGION, new ClientConfiguration()); + PA.setValue(tierStorage, "s3Client", client); + tierStorage.doesObjectExist(bucketName, objectName); + Assert.fail("An Exception is expected"); + } catch (DatabusClientRuntimeException e) { + Assert.assertTrue(true); + } catch (Exception e) { + Assert.fail("Unexpected Exception"); + } + } + +} \ No newline at end of file diff --git a/src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerTest.java b/src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerTest.java new file mode 100644 index 0000000..e165252 --- /dev/null +++ b/src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerTest.java @@ -0,0 +1,397 @@ +package com.opendxl.databus.producer; + +import broker.ClusterHelper; +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.opendxl.databus.common.internal.util.HeaderInternalField; +import com.opendxl.databus.consumer.*; +import com.opendxl.databus.entities.*; +import com.opendxl.databus.exception.DatabusClientRuntimeException; +import com.opendxl.databus.serialization.ByteArrayDeserializer; +import com.opendxl.databus.serialization.ByteArraySerializer; +import io.findify.s3mock.S3Mock; +import junit.extensions.PA; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.nio.charset.Charset; +import java.time.LocalDateTime; +import java.util.*; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +public class DatabusTierStorageProducerTest { + + private static final String AWS_SECRET_KEY = "secretKey"; + private static final String AWS_ACCESS_KEY = "accessKey"; + private static final String AWS_REGION = "us-east-1"; + private static final String BUCKET_NAME = "databus-poc-test"; + private static S3Mock api; + private static AmazonS3Client client; + private static S3TierStorage tierStorage; + + @BeforeClass + public static void beforeClass() { + // Start Kafka cluster + ClusterHelper + .getInstance() + .addBroker(9092) + .zookeeperPort(2181) + .start(); + + api = new S3Mock.Builder().withPort(8001).withInMemoryBackend().build(); + api.start(); + AwsClientBuilder.EndpointConfiguration endpoint = + new AwsClientBuilder + .EndpointConfiguration("http://localhost:8001", "us-east-1"); + + client = (AmazonS3Client) AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withEndpointConfiguration(endpoint) + .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .build(); + + tierStorage = new S3TierStorage(AWS_REGION, new ClientConfiguration(), + AWS_ACCESS_KEY, AWS_SECRET_KEY); + PA.setValue(tierStorage, "s3Client", client); + + } + @AfterClass + public static void afterClass() { + ClusterHelper.getInstance().stop(); + api.shutdown(); // kills the underlying actor system. Use api.stop() to just unbind the port. + } + + @Test(expected = IllegalArgumentException.class) + public void shouldFailWhenTierStorageIsNull() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString()); + config.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, true); + new DatabusTierStorageProducer(config, new ByteArraySerializer(), null); + } + + @Test(expected = IllegalArgumentException.class) + public void shouldFailWhenTransactionIdIsNotDefined() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + + // The following line is commented on purpose to show that transaction id is not configured + //config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString()); + + new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); + } + + @Test(expected = IllegalArgumentException.class) + public void shouldFailWhenTransactionIdIsNull() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + + // The following line set TransactionId null + config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, null); + + new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); + } + + @Test(expected = DatabusClientRuntimeException.class) + public void shouldFailWhenTransactionIdIsEmpty() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + + // The following line set TransactionId empty + config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, ""); + + new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); + } + + + @Test + public void shouldProduceARecordWithCallBackAndTierStorageRecord() { + final String topicName = UUID.randomUUID().toString(); + + Producer producer = null; + Consumer consumer = null; + + try { + producer = getProducer(); + + // Prepare a record + final String message = "Hello World at "+ LocalDateTime.now(); + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final String key = UUID.randomUUID().toString(); + final ProducerRecord producerRecord = getProducerRecord(topicName, payload, key); + + // Send the record and set an anonymous callback for check the result + CountDownLatch latch = new CountDownLatch(1); + producer.send(producerRecord, (metadata, exception) -> { + try { + if(exception != null) { + Assert.fail(exception.getMessage()); + } + } finally { + latch.countDown(); + } + }); + + // Wait for callback being invoked by Kafka + boolean isTimeout = latch.await(10000, TimeUnit.MILLISECONDS); + if(!isTimeout) { + Assert.fail("Producer take a long time to produce a record"); + return; + } + + // Consume the record + consumer = getConsumer(); + consumer.subscribe(Collections.singletonList(topicName)); + boolean closed = false; + while(!closed) { + final ConsumerRecords records = consumer.poll(1000); + for(ConsumerRecord record : records) { + if(record.getKey().equals(key)) { + final String actualMessage = new String(record.getMessagePayload().getPayload()); + Assert.assertTrue(actualMessage.equals(message)); + closed = true; + break; + } + } + } + Assert.assertTrue(tierStorage.doesObjectExist(BUCKET_NAME, topicName + key)); + + } catch (Exception e) { + Assert.fail(e.getMessage()); + } finally { + if (producer != null) { + producer.close(); + } + if (consumer != null) { + consumer.close(); + } + + } + } + + @Test + public void shouldProduceARecordWithoutCallBackAndTierStorageRecord() { + final String topicName = UUID.randomUUID().toString(); + + Producer producer = null; + Consumer consumer = null; + + try { + producer = getProducer(); + + // Prepare a record + final String message = "Hello World at "+ LocalDateTime.now(); + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final String key = UUID.randomUUID().toString(); + final ProducerRecord producerRecord = getProducerRecord(topicName, payload, key); + + // Send the record + producer.send(producerRecord); + + // Consume the record + consumer = getConsumer(); + consumer.subscribe(Collections.singletonList(topicName)); + boolean closed = false; + while(!closed) { + final ConsumerRecords records = consumer.poll(1000); + for(ConsumerRecord record : records) { + if(record.getKey().equals(key)) { + final String actualMessage = new String(record.getMessagePayload().getPayload()); + Assert.assertTrue(actualMessage.equals(message)); + closed = true; + break; + } + } + } + Assert.assertTrue(tierStorage.doesObjectExist(BUCKET_NAME, topicName + key)); + + } catch (Exception e) { + Assert.fail(e.getMessage()); + } finally { + if (producer != null) { + producer.close(); + } + if (consumer != null) { + consumer.close(); + } + + } + } + + + @Test + public void shouldConsumeWithoutTierStorage() { + final String topicName = UUID.randomUUID().toString(); + + Producer producer = null; + Consumer consumer = null; + + try { + producer = getProducer(); + + // Prepare a record + final String message = "Hello World at "+ LocalDateTime.now(); + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final String key = UUID.randomUUID().toString(); + final ProducerRecord producerRecord = getProducerRecord(topicName, payload, key); + + // Send the record and set an anonymous callback for check the result + CountDownLatch latch = new CountDownLatch(1); + producer.send(producerRecord, (metadata, exception) -> { + try { + if(exception != null) { + Assert.fail(exception.getMessage()); + } + } finally { + latch.countDown(); + } + }); + + // Wait for callback being invoked by Kafka + boolean isTimeout = latch.await(10000, TimeUnit.MILLISECONDS); + if(!isTimeout) { + Assert.fail("Producer take a long time to produce a record"); + return; + } + + // Consume the record + consumer = getConsumerWOTierStorage(); + consumer.subscribe(Collections.singletonList(topicName)); + boolean closed = false; + while(!closed) { + final ConsumerRecords records = consumer.poll(500); + for (ConsumerRecord record : records) { + final Headers headers = record.getHeaders(); + final String bucketName = headers.get(HeaderInternalField.TIER_STORAGE_BUCKET_NAME_KEY); + final String objectName = headers.get(HeaderInternalField.TIER_STORAGE_OBJECT_NAME_KEY); + Assert.assertTrue(bucketName.equals(BUCKET_NAME)); + Assert.assertTrue(objectName.equals(topicName + key)); + closed = true; + break; + } + } + + } catch (Exception e) { + Assert.fail(e.getMessage()); + } finally { + if (producer != null) { + producer.close(); + } + if (consumer != null) { + consumer.close(); + } + } + } + + + @Test + public void shouldConsumeWithoutCallbackAndTierStorage() { + final String topicName = UUID.randomUUID().toString(); + + Producer producer = null; + Consumer consumer = null; + + try { + producer = getProducer(); + + // Prepare a record + final String message = "Hello World at "+ LocalDateTime.now(); + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final String key = UUID.randomUUID().toString(); + final ProducerRecord producerRecord = getProducerRecord(topicName, payload, key); + + // Send the record + producer.send(producerRecord); + + // Consume the record + consumer = getConsumerWOTierStorage(); + consumer.subscribe(Collections.singletonList(topicName)); + boolean closed = false; + while(!closed) { + final ConsumerRecords records = consumer.poll(500); + for (ConsumerRecord record : records) { + final Headers headers = record.getHeaders(); + final String bucketName = headers.get(HeaderInternalField.TIER_STORAGE_BUCKET_NAME_KEY); + final String objectName = headers.get(HeaderInternalField.TIER_STORAGE_OBJECT_NAME_KEY); + Assert.assertTrue(bucketName.equals(BUCKET_NAME)); + Assert.assertTrue(objectName.equals(topicName + key)); + closed = true; + break; + } + } + + } catch (Exception e) { + Assert.fail(e.getMessage()); + } finally { + if (producer != null) { + producer.close(); + } + if (consumer != null) { + consumer.close(); + } + } + } + + public Consumer getConsumer() { + final Properties consumerProps = new Properties(); + consumerProps.put(ConsumerConfiguration.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + consumerProps.put(ConsumerConfiguration.GROUP_ID_CONFIG, UUID.randomUUID().toString()); + consumerProps.put(ConsumerConfiguration.ENABLE_AUTO_COMMIT_CONFIG, "true"); + consumerProps.put(ConsumerConfiguration.SESSION_TIMEOUT_MS_CONFIG, "30000"); + consumerProps.put(ConsumerConfiguration.CLIENT_ID_CONFIG, "consumer-id-sample"); + consumerProps.put(ConsumerConfiguration.AUTO_OFFSET_RESET_CONFIG, "earliest"); + return new DatabusConsumer(consumerProps, new ByteArrayDeserializer(), tierStorage ); + } + + public Consumer getConsumerWOTierStorage() { + final Properties consumerProps = new Properties(); + consumerProps.put(ConsumerConfiguration.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + consumerProps.put(ConsumerConfiguration.GROUP_ID_CONFIG, UUID.randomUUID().toString()); + consumerProps.put(ConsumerConfiguration.ENABLE_AUTO_COMMIT_CONFIG, "true"); + consumerProps.put(ConsumerConfiguration.SESSION_TIMEOUT_MS_CONFIG, "30000"); + consumerProps.put(ConsumerConfiguration.CLIENT_ID_CONFIG, "consumer-id-sample"); + consumerProps.put(ConsumerConfiguration.AUTO_OFFSET_RESET_CONFIG, "earliest"); + return new DatabusConsumer(consumerProps, new ByteArrayDeserializer()); + } + public Producer getProducer() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString()); + return new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); + } + + public ProducerRecord getProducerRecord(final String topic, final byte[] payload, String key) { + final TierStorageMetadata tStorageMetadata = + new TierStorageMetadata(BUCKET_NAME, topic + key); + final RoutingData routingData = new RoutingData(topic, key, null, tStorageMetadata); + final Headers headers = new Headers(); + final MessagePayload messagePayload = new MessagePayload<>(payload); + return new ProducerRecord<>(routingData, headers, messagePayload); + } + + + +} \ No newline at end of file diff --git a/src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerWithoutS3Test.java b/src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerWithoutS3Test.java new file mode 100644 index 0000000..6c2b99c --- /dev/null +++ b/src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerWithoutS3Test.java @@ -0,0 +1,187 @@ +package com.opendxl.databus.producer; + +import broker.ClusterHelper; +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.opendxl.databus.consumer.Consumer; +import com.opendxl.databus.consumer.ConsumerConfiguration; +import com.opendxl.databus.consumer.DatabusConsumer; +import com.opendxl.databus.entities.*; +import com.opendxl.databus.exception.DatabusClientRuntimeException; +import com.opendxl.databus.serialization.ByteArrayDeserializer; +import com.opendxl.databus.serialization.ByteArraySerializer; +import io.findify.s3mock.S3Mock; +import junit.extensions.PA; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.nio.charset.Charset; +import java.time.LocalDateTime; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.UUID; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +public class DatabusTierStorageProducerWithoutS3Test { + + private static final String AWS_SECRET_KEY = "secretKey"; + private static final String AWS_ACCESS_KEY = "accessKey"; + private static final String AWS_REGION = "us-east-1"; + private static final String BUCKET_NAME = "databus-poc-test"; + private static S3Mock api; + private static AmazonS3Client client; + private static S3TierStorage tierStorage; + + @BeforeClass + public static void beforeClass() { + // Start Kafka cluster + ClusterHelper + .getInstance() + .addBroker(9092) + .zookeeperPort(2181) + .start(); + + api = new S3Mock.Builder().withPort(8001).withInMemoryBackend().build(); + // api.start is missing on purpose to keep Tier Storage down + AwsClientBuilder.EndpointConfiguration endpoint = + new AwsClientBuilder + .EndpointConfiguration("http://localhost:8001", "us-east-1"); + + client = (AmazonS3Client) AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withEndpointConfiguration(endpoint) + .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .build(); + + tierStorage = new S3TierStorage(AWS_REGION, new ClientConfiguration(), + AWS_ACCESS_KEY, AWS_SECRET_KEY); + PA.setValue(tierStorage, "s3Client", client); + + } + @AfterClass + public static void afterClass() { + ClusterHelper.getInstance().stop(); + } + + @Test + public void shouldFailBecauseTierStorageIsUnreachable() { + final String topicName = UUID.randomUUID().toString(); + + Producer producer = null; + + try { + producer = getProducer(); + + // Prepare a record + final String message = "Hello World at " + LocalDateTime.now(); + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final String key = UUID.randomUUID().toString(); + final ProducerRecord producerRecord = getProducerRecord(topicName, payload, key); + + // Send the record + CountDownLatch latch = new CountDownLatch(1); + producer.send(producerRecord, (metadata, exception) -> { + try { + if (exception != null) { + Assert.fail(exception.getMessage()); + } + } finally { + latch.countDown(); + } + }); + latch.await(10000, TimeUnit.MILLISECONDS); + Assert.fail(); + } catch (DatabusClientRuntimeException e) { + Assert.assertTrue(true); + } catch (Exception e) { + Assert.fail(); + } finally { + if (producer != null) { + producer.close(); + } + } + } + + + @Test + public void shouldFailBecauseTierStorageIsUnreachable1() { + final String topicName = UUID.randomUUID().toString(); + + Producer producer = null; + + try { + producer = getProducer(); + + // Prepare a record + final String message = "Hello World at " + LocalDateTime.now(); + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final String key = UUID.randomUUID().toString(); + final ProducerRecord producerRecord = getProducerRecord(topicName, payload, key); + + // Send the record + producer.send(producerRecord); + Assert.fail(); + } catch (DatabusClientRuntimeException e) { + Assert.assertTrue(true); + } catch (Exception e) { + Assert.fail(); + } finally { + if (producer != null) { + producer.close(); + } + } + } + + public Consumer getConsumer() { + final Properties consumerProps = new Properties(); + consumerProps.put(ConsumerConfiguration.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + consumerProps.put(ConsumerConfiguration.GROUP_ID_CONFIG, UUID.randomUUID().toString()); + consumerProps.put(ConsumerConfiguration.ENABLE_AUTO_COMMIT_CONFIG, "true"); + consumerProps.put(ConsumerConfiguration.SESSION_TIMEOUT_MS_CONFIG, "30000"); + consumerProps.put(ConsumerConfiguration.CLIENT_ID_CONFIG, "consumer-id-sample"); + consumerProps.put(ConsumerConfiguration.AUTO_OFFSET_RESET_CONFIG, "earliest"); + return new DatabusConsumer(consumerProps, new ByteArrayDeserializer(), tierStorage ); + } + + public Consumer getConsumerWOTierStorage() { + final Properties consumerProps = new Properties(); + consumerProps.put(ConsumerConfiguration.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + consumerProps.put(ConsumerConfiguration.GROUP_ID_CONFIG, UUID.randomUUID().toString()); + consumerProps.put(ConsumerConfiguration.ENABLE_AUTO_COMMIT_CONFIG, "true"); + consumerProps.put(ConsumerConfiguration.SESSION_TIMEOUT_MS_CONFIG, "30000"); + consumerProps.put(ConsumerConfiguration.CLIENT_ID_CONFIG, "consumer-id-sample"); + consumerProps.put(ConsumerConfiguration.AUTO_OFFSET_RESET_CONFIG, "earliest"); + return new DatabusConsumer(consumerProps, new ByteArrayDeserializer()); + } + public Producer getProducer() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString()); + config.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, true); + return new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); + } + + public ProducerRecord getProducerRecord(final String topic, final byte[] payload, String key) { + TierStorageMetadata tStorageMetadata = + new TierStorageMetadata(BUCKET_NAME, topic + key); + RoutingData routingData = new RoutingData(topic, key, null, tStorageMetadata); + Headers headers = new Headers(); + MessagePayload messagePayload = new MessagePayload<>(payload); + return new ProducerRecord<>(routingData, headers, messagePayload); + } + + + +} \ No newline at end of file From f20ae2e970b7f300ea18b489e0e3ac2f6180efed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20Villalba=C2=A0=20=C2=A0=20=C2=A0=20=C2=A0?= Date: Thu, 23 Apr 2020 12:05:10 -0300 Subject: [PATCH 04/20] Update dependency versions, fix example --- build.gradle | 9 +++++---- sample/src/sample/BasicConsumerProducerExample.java | 13 +++++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/build.gradle b/build.gradle index 214419c..05afb9f 100644 --- a/build.gradle +++ b/build.gradle @@ -19,7 +19,7 @@ plugins { id "com.github.johnrengelman.shadow" version "4.0.3" id "kr.motd.sphinx" version "2.3.1" id "jacoco" - id "org.owasp.dependencycheck" version "5.2.1" + id "org.owasp.dependencycheck" version "5.3.2" } group 'com.opendxl' @@ -65,11 +65,12 @@ dependencies { implementation 'commons-lang:commons-lang:2.6' implementation 'org.slf4j:slf4j-api:1.7.30' implementation 'net.sf.jopt-simple:jopt-simple:5.0.4' - implementation 'software.amazon.awssdk:bom:2.11.1' - implementation 'com.amazonaws:aws-java-sdk-s3:1.11.751' + implementation 'software.amazon.awssdk:bom:2.13.0' + implementation 'com.amazonaws:aws-java-sdk-s3:1.11.766' + implementation 'commons-io:commons-io:2.6' testImplementation 'org.apache.kafka:kafka_2.12:2.3.1' testImplementation 'org.apache.zookeeper:zookeeper:3.5.5' - testImplementation('io.netty:netty-all:4.1.43.Final') { + testImplementation('io.netty:netty-all:4.1.48.Final') { force = true } testImplementation 'commons-io:commons-io:2.6' diff --git a/sample/src/sample/BasicConsumerProducerExample.java b/sample/src/sample/BasicConsumerProducerExample.java index d6bb204..3ab16a9 100644 --- a/sample/src/sample/BasicConsumerProducerExample.java +++ b/sample/src/sample/BasicConsumerProducerExample.java @@ -4,6 +4,7 @@ package sample; +import broker.ClusterHelper; import com.opendxl.databus.common.RecordMetadata; import com.opendxl.databus.common.internal.builder.TopicNameBuilder; import com.opendxl.databus.consumer.*; @@ -46,11 +47,11 @@ public class BasicConsumerProducerExample { public BasicConsumerProducerExample() { // Start Kafka cluster -// ClusterHelper -// .getInstance() -// .addBroker(9092) -// .zookeeperPort(2181) -// .start(); + ClusterHelper + .getInstance() + .addBroker(9092) + .zookeeperPort(2181) + .start(); // Prepare a Producer this.producer = getProducer(); @@ -196,7 +197,7 @@ synchronized private void stopExample(final ExecutorService executor) { try { closed.set(true); consumer.wakeup(); -// ClusterHelper.getInstance().stop(); + ClusterHelper.getInstance().stop(); executor.shutdown(); executor.awaitTermination(5, TimeUnit.SECONDS); } catch (InterruptedException e) { From 94ad6cd4f6f8b33e454e8ca446b13f568f214d81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20Villalba=C2=A0=20=C2=A0=20=C2=A0=20=C2=A0?= Date: Thu, 23 Apr 2020 19:48:10 -0300 Subject: [PATCH 05/20] Add sphinx doc for Tiered Storage example --- docs/Basic-S3-tiered-storage-example.rst | 314 ++++++++++++++++++ docs/index.rst | 1 + .../producer/DatabusTierStorageProducer.java | 18 +- 3 files changed, 324 insertions(+), 9 deletions(-) create mode 100644 docs/Basic-S3-tiered-storage-example.rst diff --git a/docs/Basic-S3-tiered-storage-example.rst b/docs/Basic-S3-tiered-storage-example.rst new file mode 100644 index 0000000..fedc23a --- /dev/null +++ b/docs/Basic-S3-tiered-storage-example.rst @@ -0,0 +1,314 @@ +S3 Tierered Storage Example +--------------------------- + +This sample demonstrates how to produce messages to Kafka topic and S3 bucket. At the same time it shows how a consumer +is able to read messages from Kafka and S3 in seamless fashion. + +Benefits Tiered Storage Feature + +- Reduces costs because it stores the message payload in S3 bucket and uses a Kafka message part as control and index +- It stores in Kafka topic and S3 bucket atomically. +- Exposes a new extended Producer type to differentiate from the regular one. +- Consumer is backward compatible. +- Consumer is able to read raw kafka, databus or Kakfa + S3 messages. +- Previous consumer (2.4.1 and below) won't break when reading a Tiered Storage message. + + +Code highlights are shown below: + +Sample Code +~~~~~~~~~~~ + +.. code:: java + + package sample; + + import broker.ClusterHelper; + import com.amazonaws.ClientConfiguration; + import com.opendxl.databus.common.RecordMetadata; + import com.opendxl.databus.common.internal.builder.TopicNameBuilder; + import com.opendxl.databus.consumer.Consumer; + import com.opendxl.databus.consumer.ConsumerConfiguration; + import com.opendxl.databus.consumer.ConsumerRecord; + import com.opendxl.databus.consumer.ConsumerRecords; + import com.opendxl.databus.consumer.DatabusConsumer; + import com.opendxl.databus.entities.Headers; + import com.opendxl.databus.entities.MessagePayload; + import com.opendxl.databus.entities.RoutingData; + import com.opendxl.databus.entities.S3TierStorage; + import com.opendxl.databus.entities.TierStorage; + import com.opendxl.databus.entities.TierStorageMetadata; + import com.opendxl.databus.producer.Callback; + import com.opendxl.databus.producer.DatabusTierStorageProducer; + import com.opendxl.databus.producer.Producer; + import com.opendxl.databus.producer.ProducerConfig; + import com.opendxl.databus.producer.ProducerRecord; + import com.opendxl.databus.serialization.ByteArrayDeserializer; + import com.opendxl.databus.serialization.ByteArraySerializer; + import org.slf4j.Logger; + import org.slf4j.LoggerFactory; + + import java.nio.charset.Charset; + import java.time.LocalDateTime; + import java.util.Collections; + import java.util.HashMap; + import java.util.Map; + import java.util.Properties; + import java.util.UUID; + import java.util.concurrent.ExecutorService; + import java.util.concurrent.Executors; + import java.util.concurrent.TimeUnit; + import java.util.concurrent.atomic.AtomicBoolean; + + + public class BasicS3TierStorageConsumerProducerExample { + + private static final String AWS_REGION = "add-aws-region-name-here"; + private static final String S3_ACCESS_KEY = "add-your-access-key-here"; + private static final String S3_SECRET_KEY = "add-your-secret-key-here"; + private final Producer producer; + private final ExecutorService executor; + private final TierStorage tierStorage; + private Consumer consumer; + private String producerTopic = "topic1"; + private String consumerTopic = "topic1"; + + private static final long PRODUCER_TIME_CADENCE_MS = 1000L; + private static final long CONSUMER_TIME_CADENCE_MS = 1000L; + private final AtomicBoolean closed = new AtomicBoolean(false); + + private static Logger LOG = LoggerFactory.getLogger(BasicS3TierStorageConsumerProducerExample.class); + + public BasicS3TierStorageConsumerProducerExample() { + + // Start Kafka cluster + ClusterHelper.getInstance().addBroker(9092).zookeeperPort(2181).start(); + + // Prepare a S3 Tiered Storage + ClientConfiguration awsClientConfiguration = new ClientConfiguration(); + this.tierStorage = new S3TierStorage(AWS_REGION, awsClientConfiguration, S3_ACCESS_KEY, S3_SECRET_KEY); + + // Prepare a Producer + this.producer = getProducer(); + + // Prepare a Consumer + this.consumer = getConsumer(); + + // Subscribe to topic + this.consumer.subscribe(Collections.singletonList(consumerTopic)); + + this.executor = Executors.newFixedThreadPool(2); + + } + + public Producer getProducer() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString()); + return new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); + + } + + public Consumer getConsumer() { + final Properties consumerProps = new Properties(); + consumerProps.put(ConsumerConfiguration.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + consumerProps.put(ConsumerConfiguration.GROUP_ID_CONFIG, "cg1"); + consumerProps.put(ConsumerConfiguration.ENABLE_AUTO_COMMIT_CONFIG, "true"); + consumerProps.put(ConsumerConfiguration.SESSION_TIMEOUT_MS_CONFIG, "30000"); + consumerProps.put(ConsumerConfiguration.CLIENT_ID_CONFIG, "consumer-id-sample"); + return new DatabusConsumer(consumerProps, new ByteArrayDeserializer(), tierStorage); + } + + private Runnable getProducerTask() { + return () -> { + LOG.info("Producer started"); + while (!closed.get()) { + + // Prepare a record + final String message = "Hello World at "+ LocalDateTime.now(); + + // user should provide the encoding + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final ProducerRecord producerRecord = getProducerRecord(producerTopic, payload); + + // Send the record + // Get headers as String + final StringBuilder headers = new StringBuilder().append("["); + producerRecord.getHeaders().getAll().forEach((k, v) -> headers.append("[" + k + ":" + v + "]")); + headers.append("]"); + + producer.send(producerRecord, new MyCallback(producerRecord.getRoutingData().getShardingKey())); + LOG.info("[PRODUCER -> KAFKA][SENDING MSG] ID " + producerRecord.getRoutingData().getShardingKey() + + " TOPIC:" + TopicNameBuilder.getTopicName(producerTopic, null) + + " HEADERS:" + headers + + " PAYLOAD:" + message); + + justWait(PRODUCER_TIME_CADENCE_MS); + } + producer.flush(); + producer.close(); + LOG.info("Producer closed"); + + }; + } + + private Runnable getConsumerTask() { + return () -> { + try { + LOG.info("Consumer started"); + while (!closed.get()) { + + // Polling the databus + final ConsumerRecords records = consumer.poll(CONSUMER_TIME_CADENCE_MS); + + // Iterate records + for (ConsumerRecord record : records) { + + // Get headers as String + final StringBuilder headers = new StringBuilder().append("["); + record.getHeaders().getAll().forEach((k, v) -> headers.append("[" + k + ":" + v + "]")); + headers.append("]"); + + LOG.info("[CONSUMER <- KAFKA][MSG RCEIVED] ID " + record.getKey() + + " TOPIC:" + record.getComposedTopic() + + " KEY:" + record.getKey() + + " PARTITION:" + record.getPartition() + + " OFFSET:" + record.getOffset() + + " TIMESTAMP:" + record.getTimestamp() + + " HEADERS:" + headers + + " PAYLOAD:" + new String(record.getMessagePayload().getPayload())); + } + consumer.commitAsync(); + } + } catch (Exception e) { + LOG.error(e.getMessage()); + } finally { + consumer.unsubscribe(); + try { + consumer.close(); + } catch (Exception e) { + LOG.error(e.getMessage()); + } + LOG.info("Consumer closed"); + + } + + }; + } + + public ProducerRecord getProducerRecord(final String topic, final byte[] payload) { + String key = String.valueOf(System.currentTimeMillis()); + TierStorageMetadata tStorageMetadata = new TierStorageMetadata("databus-poc-test", topic + key); + RoutingData routingData = new RoutingData(topic, key, null, tStorageMetadata); + Headers headers = new Headers(); + headers.put("k","v"); + MessagePayload messagePayload = new MessagePayload<>(payload); + return new ProducerRecord<>(routingData, headers, messagePayload); + } + + private void justWait(long time) { + try { + Thread.sleep(time); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + + private static class MyCallback implements Callback { + + private String shardingKey; + + public MyCallback(String shardingKey) { + + this.shardingKey = shardingKey; + } + + public void onCompletion(RecordMetadata metadata, Exception exception) { + if (exception != null) { + LOG.warn("Error sending a record " + exception.getMessage()); + return; + } + LOG.info("[PRODUCER <- KAFKA][OK MSG SENT] ID " + shardingKey + + " TOPIC:" + metadata.topic() + + " PARTITION:" + metadata.partition() + + " OFFSET:" + metadata.offset()); + } + } + + synchronized private void stopExample(final ExecutorService executor) { + try { + closed.set(true); + consumer.wakeup(); + ClusterHelper.getInstance().stop(); + executor.shutdown(); + executor.awaitTermination(5, TimeUnit.SECONDS); + } catch (InterruptedException e) { + } finally { + executor.shutdownNow(); + } + } + + public void startExample() throws InterruptedException { + + Runnable consumerTask = getConsumerTask(); + Runnable producerTask = getProducerTask(); + + executor.submit(consumerTask); + executor.submit(producerTask); + + Runtime.getRuntime().addShutdownHook( + new Thread( + new Runnable() { + public void run() { + stopExample(executor); + LOG.info("Example finished"); + } + })); + + } + + + public static void main(String[] args) throws InterruptedException { + LOG.info("Ctrl-C to finish"); + new BasicS3TierStorageConsumerProducerExample().startExample(); + } + + } + + +The first step is to create a ``DatabusTierStorageProducer`` by passing a ``S3TierStorage`` instance + +.. code:: java + + // Prepares a S3 Tiered Storage + this.tierStorage = new S3TierStorage(AWS_REGION, awsClientConfiguration, S3_ACCESS_KEY, S3_SECRET_KEY); + ... + public Producer getProducer() { + ... + // Creates a Tiered Storage Producer + return new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); + } + +Then a ``ProducerRecord`` message should be created by using ``TierStorageMetadata`` instance. The S3 bucket and +the S3 object name must be specified. + +.. code:: java + + public ProducerRecord getProducerRecord(final String topic, final byte[] payload) { + String key = String.valueOf(System.currentTimeMillis()); + TierStorageMetadata tStorageMetadata = new TierStorageMetadata("databus-poc-test", topic + key); + RoutingData routingData = new RoutingData(topic, key, null, tStorageMetadata); + ... + MessagePayload messagePayload = new MessagePayload<>(payload); + return new ProducerRecord<>(routingData, headers, messagePayload); + } + +Finally it sends the message to Kafka and S3 + +.. code:: java + + producer.send(producerRecord, ...); + diff --git a/docs/index.rst b/docs/index.rst index 022d8d3..f5c0fa3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -76,6 +76,7 @@ Basic Consumer-metrics-sample.rst Transactions-producer-consumer-example.rst Basic-push-consumer-example.rst + Basic-S3-tiered-storage-example.rst Bugs and Feedback ----------------- diff --git a/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java b/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java index adccea8..ca3cc97 100644 --- a/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java +++ b/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java @@ -166,7 +166,7 @@ public void send(final ProducerRecord

producerRecord) { messagePayloadAdapter.adapt(producerRecord.payload(), producerRecord.getHeaders()); final byte[] databusMessageSerialized = getKafkaValueSerializer().serialize("", databusMessage); - // Remove the producerRecord payload to be written in kafka. + // Remove the producerRecord payload to be written in kafka and keeps Headers. final ProducerRecord

adaptedProducerRecord = new ProducerRecord<>(producerRecord.getRoutingData(), producerRecord.getHeaders(), new MessagePayload<>(null)); @@ -248,9 +248,9 @@ public void send(ProducerRecord

producerRecord, final Callback callback) { // Serialize the producerRecord payload to be stored with TieredStorage when callback being invoked by Kafka final DatabusMessage databusMessage = messagePayloadAdapter.adapt(producerRecord.payload(), producerRecord.getHeaders()); - final byte[] kafkaValueSerializer = getKafkaValueSerializer().serialize("", databusMessage); + final byte[] kafkaValueSerialized = getKafkaValueSerializer().serialize("", databusMessage); - // Remove the producerRecord payload to be written in kafka. + // Remove the producerRecord payload to be written in kafka and keeps Headers. final ProducerRecord

adaptedProducerRecord = new ProducerRecord<>(producerRecord.getRoutingData(), producerRecord.getHeaders(), new MessagePayload<>(null)); @@ -263,7 +263,7 @@ public void send(ProducerRecord

producerRecord, final Callback callback) { CountDownLatch latch = new CountDownLatch(1); final CallbackAdapterTierStorage callbackAdapterTierStorage; callbackAdapterTierStorage = new CallbackAdapterTierStorage(callback, - kafkaValueSerializer, + kafkaValueSerialized, latch, tierStorageMetadata); @@ -338,7 +338,7 @@ private class CallbackAdapterTierStorage implements org.apache.kafka.clients.pro /** * the kafka value serializer */ - private final byte[] kafkaValueSerializer; + private final byte[] kafkaValueSerialized; /** * An object to signal when callback has finished @@ -357,15 +357,15 @@ private class CallbackAdapterTierStorage implements org.apache.kafka.clients.pro /** * @param userCallback user callback - * @param kafkaValueSerializer kafka serializer + * @param kafkaValueSerialized kafka serializer * @param latch a object to signal when callback */ CallbackAdapterTierStorage(final Callback userCallback, - final byte[] kafkaValueSerializer, + final byte[] kafkaValueSerialized, final CountDownLatch latch, final TierStorageMetadata tierStorageMetadata) { this.userCallback = userCallback; - this.kafkaValueSerializer = kafkaValueSerializer; + this.kafkaValueSerialized = kafkaValueSerialized; this.latch = latch; this.tierStorageMetadata = tierStorageMetadata; } @@ -391,7 +391,7 @@ public void onCompletion(final org.apache.kafka.clients.producer.RecordMetadata tierStorage.put(tierStorageMetadata.getBucketName(), tierStorageMetadata.getObjectName(), - kafkaValueSerializer); + kafkaValueSerialized); response(recordMetadata, exception); } catch (DatabusClientRuntimeException databusException) { LOG.error("Send cannot be performed. The record was not produced. ERROR:" From d81c0d475d5fc503492301e6ee7c2793a5a4ddfd Mon Sep 17 00:00:00 2001 From: rliberal Date: Tue, 5 May 2020 12:50:24 -0300 Subject: [PATCH 06/20] Added changes to build gradle for dep check issues --- build.gradle | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/build.gradle b/build.gradle index eca9986..966f68c 100644 --- a/build.gradle +++ b/build.gradle @@ -57,15 +57,22 @@ configurations { } dependencies { - implementation 'org.apache.kafka:kafka-clients:2.3.1' - implementation 'org.apache.kafka:kafka-streams:2.3.1' + implementation ('org.apache.kafka:kafka-clients:2.3.1') { + exclude group: 'org.scala-lang', module: 'scala-reflect' + } + implementation ('org.apache.kafka:kafka-streams:2.3.1') { + exclude group: 'org.scala-lang', module: 'scala-reflect' + } implementation 'org.apache.avro:avro:1.9.0' implementation 'com.google.code.gson:gson:2.8.5' implementation 'org.apache.commons:commons-configuration2:2.7' implementation 'commons-lang:commons-lang:2.6' implementation 'org.slf4j:slf4j-api:1.7.30' implementation 'net.sf.jopt-simple:jopt-simple:5.0.4' - testImplementation 'org.apache.kafka:kafka_2.12:2.3.1' + testImplementation('org.apache.kafka:kafka_2.12:2.3.1') { + exclude group: 'org.scala-lang', module: 'scala-reflect' + } + testImplementation group: 'org.scala-lang', name: 'scala-reflect', version: '2.12.11' testImplementation 'org.apache.zookeeper:zookeeper:3.5.5' testImplementation('io.netty:netty-all:4.1.43.Final') { force = true @@ -73,7 +80,10 @@ dependencies { testImplementation 'commons-io:commons-io:2.6' testImplementation 'junit:junit:4.12' testImplementation 'com.github.stefanbirkner:system-rules:1.19.0' - kafkaInMemory 'org.apache.kafka:kafka_2.12:2.3.1' + kafkaInMemory ('org.apache.kafka:kafka_2.12:2.3.1') { + exclude group: 'org.scala-lang', module: 'scala-reflect' + } + kafkaInMemory group: 'org.scala-lang', name: 'scala-reflect', version: '2.12.11' kafkaInMemory 'commons-io:commons-io:2.6' // This following section mitigates OWASP vulnerabilities report. From cd0ff4bad82f69ff299ef62ed23c2011edc19cb1 Mon Sep 17 00:00:00 2001 From: rliberal Date: Wed, 6 May 2020 14:39:08 -0300 Subject: [PATCH 07/20] Added changes to support fix in dep check --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index ea978f0..edf895f 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1 +1 @@ -version=2.5.0-SNAPSHOT +version=2.5.1-SNAPSHOT From cc98eaa28d10e3d90e0448097be9f7b488aff523 Mon Sep 17 00:00:00 2001 From: rliberal Date: Wed, 6 May 2020 17:14:28 -0300 Subject: [PATCH 08/20] Added changes to support build gradle and version --- build.gradle | 1 + gradle.properties | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 966f68c..8e86ed1 100644 --- a/build.gradle +++ b/build.gradle @@ -63,6 +63,7 @@ dependencies { implementation ('org.apache.kafka:kafka-streams:2.3.1') { exclude group: 'org.scala-lang', module: 'scala-reflect' } + implementation group: 'org.scala-lang', name: 'scala-reflect', version: '2.12.11' implementation 'org.apache.avro:avro:1.9.0' implementation 'com.google.code.gson:gson:2.8.5' implementation 'org.apache.commons:commons-configuration2:2.7' diff --git a/gradle.properties b/gradle.properties index edf895f..ea978f0 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1 +1 @@ -version=2.5.1-SNAPSHOT +version=2.5.0-SNAPSHOT From 24fda2a2a41f43cc07ba4196a7038e9d38452721 Mon Sep 17 00:00:00 2001 From: rliberal Date: Thu, 7 May 2020 13:04:43 -0300 Subject: [PATCH 09/20] fix way to add lib versions --- build.gradle | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/build.gradle b/build.gradle index 8e86ed1..d581718 100644 --- a/build.gradle +++ b/build.gradle @@ -63,7 +63,7 @@ dependencies { implementation ('org.apache.kafka:kafka-streams:2.3.1') { exclude group: 'org.scala-lang', module: 'scala-reflect' } - implementation group: 'org.scala-lang', name: 'scala-reflect', version: '2.12.11' + implementation 'org.scala-lang:scala-reflect:2.12.11' implementation 'org.apache.avro:avro:1.9.0' implementation 'com.google.code.gson:gson:2.8.5' implementation 'org.apache.commons:commons-configuration2:2.7' @@ -73,7 +73,7 @@ dependencies { testImplementation('org.apache.kafka:kafka_2.12:2.3.1') { exclude group: 'org.scala-lang', module: 'scala-reflect' } - testImplementation group: 'org.scala-lang', name: 'scala-reflect', version: '2.12.11' + testImplementation 'org.scala-lang:scala-reflect:2.12.11' testImplementation 'org.apache.zookeeper:zookeeper:3.5.5' testImplementation('io.netty:netty-all:4.1.43.Final') { force = true @@ -84,7 +84,7 @@ dependencies { kafkaInMemory ('org.apache.kafka:kafka_2.12:2.3.1') { exclude group: 'org.scala-lang', module: 'scala-reflect' } - kafkaInMemory group: 'org.scala-lang', name: 'scala-reflect', version: '2.12.11' + kafkaInMemory 'org.scala-lang:scala-reflect:2.12.11' kafkaInMemory 'commons-io:commons-io:2.6' // This following section mitigates OWASP vulnerabilities report. From a86398e7eb385fb62ccb0ee6c507158836035825 Mon Sep 17 00:00:00 2001 From: Christopher Smith Date: Mon, 18 May 2020 09:02:23 -0700 Subject: [PATCH 10/20] Updated to version 2.4.2 --- README.md | 4 ++-- docs/CLI-Example.rst | 8 ++++---- docs/index.rst | 4 ++-- gradle.properties | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index de03536..01e6468 100644 --- a/README.md +++ b/README.md @@ -36,12 +36,12 @@ Maven: com.opendxl dxldatabusclient - 2.4.1 + 2.4.2 ``` or Gradle: ```groovy -compile 'com.opendxl:dxldatabusclient:2.4.1' +compile 'com.opendxl:dxldatabusclient:2.4.2' ``` ## Bugs and Feedback diff --git a/docs/CLI-Example.rst b/docs/CLI-Example.rst index 6d80c85..9dbbe7f 100644 --- a/docs/CLI-Example.rst +++ b/docs/CLI-Example.rst @@ -7,7 +7,7 @@ library with no arguments displays help information: :: - $ java -jar dxldatabusclient-2.4.1.jar + $ java -jar dxldatabusclient-2.4.2.jar ERROR: There are not options Option (* = required) Description @@ -50,7 +50,7 @@ few CLI operations. Operations arguments are placed after :: - $ java -jar dxldatabusclient-2.4.1.jar --operation ... + $ java -jar dxldatabusclient-2.4.2.jar --operation ... Operation Arguments ^^^^^^^^^^^^^^^^^^^ @@ -123,7 +123,7 @@ example :: - $ java -jar dxldatabusclient-2.4.1.jar \ + $ java -jar dxldatabusclient-2.4.2.jar \ --operation produce \ --to-topic \ --brokers <0.0.0.0>: \ @@ -230,7 +230,7 @@ example :: - java -jar dxldatabusclient-2.4.1.jar \ + java -jar dxldatabusclient-2.4.2.jar \ --operation consume \ --from-topic \ --brokers \ diff --git a/docs/index.rst b/docs/index.rst index 022d8d3..0152eee 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -40,14 +40,14 @@ Maven: com.opendxl dxldatabusclient - 2.4.1 + 2.4.2 or Gradle: .. code:: groovy - compile 'com.opendxl:dxldatabusclient:2.4.1' + compile 'com.opendxl:dxldatabusclient:2.4.2' API Documentation ----------------- diff --git a/gradle.properties b/gradle.properties index ea978f0..dec196c 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1 +1 @@ -version=2.5.0-SNAPSHOT +version=2.4.2 From 1cf0f4809c2e119585c3268137cfb0b90bb4dd20 Mon Sep 17 00:00:00 2001 From: Chris Smith Date: Mon, 18 May 2020 09:06:03 -0700 Subject: [PATCH 11/20] Updated to next snapshot --- gradle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle.properties b/gradle.properties index dec196c..ea978f0 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1 +1 @@ -version=2.4.2 +version=2.5.0-SNAPSHOT From 60d2e11ac3e97b1da8f5633cf096cabbd38198f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20Villalba=C2=A0=20=C2=A0=20=C2=A0=20=C2=A0?= Date: Mon, 6 Jul 2020 22:53:42 -0300 Subject: [PATCH 12/20] Fix DatabusPushConsumer when a rebalance scenario occurs --- build.gradle | 4 +- .../databus/consumer/DatabusPushConsumer.java | 74 +++++++++++++++++++ 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/build.gradle b/build.gradle index 05afb9f..1148a52 100644 --- a/build.gradle +++ b/build.gradle @@ -6,7 +6,7 @@ buildscript { mavenCentral() } dependencies { - classpath 'org.owasp:dependency-check-gradle:5.2.1' + classpath 'org.owasp:dependency-check-gradle:5.3.2.1' } } @@ -19,7 +19,7 @@ plugins { id "com.github.johnrengelman.shadow" version "4.0.3" id "kr.motd.sphinx" version "2.3.1" id "jacoco" - id "org.owasp.dependencycheck" version "5.3.2" + id "org.owasp.dependencycheck" version "5.3.2.1" } group 'com.opendxl' diff --git a/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java b/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java index 64183c1..fc20a16 100644 --- a/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java +++ b/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java @@ -15,8 +15,11 @@ import java.io.Closeable; import java.time.Duration; +import java.util.Collection; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Properties; import java.util.Set; import java.util.concurrent.CancellationException; @@ -87,6 +90,11 @@ public final class DatabusPushConsumer

extends DatabusConsumer

implements */ private CountDownLatch countDownLatch = new CountDownLatch(1); + /** + * An boolean to signal if pause operation has to be refreshed + */ + private AtomicBoolean refreshPause = new AtomicBoolean(false); + /** * Constructor * @@ -204,6 +212,65 @@ public DatabusPushConsumer(final Properties properties, super(properties, messageDeserializer, credential, tierStorage); this.consumerListener = consumerListener; } + + /** + * {@inheritDoc} + */ + @Override + public void subscribe(final Map> groupTopics) { + super.subscribe(groupTopics, new PushConsumerRebalanceListener(null)); + } + + /** + * {@inheritDoc} + */ + @Override + public void subscribe(final Map> groupTopics, + final ConsumerRebalanceListener consumerRebalanceListener) { + super.subscribe(groupTopics, new PushConsumerRebalanceListener(consumerRebalanceListener)); + } + + /** + * {@inheritDoc} + */ + @Override + public void subscribe(final List topics, + final ConsumerRebalanceListener consumerRebalanceListener) { + super.subscribe(topics, new PushConsumerRebalanceListener(consumerRebalanceListener)); + } + + /** + * {@inheritDoc} + */ + @Override + public void subscribe(final List topics) { + super.subscribe(topics, new PushConsumerRebalanceListener(null)); + } + + private class PushConsumerRebalanceListener implements ConsumerRebalanceListener { + + private final ConsumerRebalanceListener customerListener; + + PushConsumerRebalanceListener(final ConsumerRebalanceListener customerListener) { + this.customerListener = Optional.ofNullable(customerListener).orElse(new NoOpConsumerRebalanceListener()); + + } + + @Override + public void onPartitionsRevoked(final Collection partitions) { + customerListener.onPartitionsRevoked(partitions); + + } + + @Override + public void onPartitionsAssigned(final Collection partitions) { + refreshPause.set(true); + customerListener.onPartitionsAssigned(partitions); + + } + } + + /** * {@inheritDoc} */ @@ -367,6 +434,13 @@ private void push(final DatabusPushConsumerFuture databusPushConsumerFuture, LOG.info("Consumer " + super.getClientId() + " is resumed"); } catch (TimeoutException e) { + // refreshPause == true means a rebalance was performed and partitions might be reassigned. + // Then, in order to avoid reading messages and just sends the heartbeat when poll(), + // a pause() method has to be invoked with the updated partitions assignment. + if (refreshPause.get()) { + refreshPause.set(false); + pause(assignment()); + } // TimeoutException means that listener is still working. // So, a poll is performed to heartbeat Databus super.poll(Duration.ofMillis(0)); From 1b7af1b12389593dda4764f29b92c201b1c0f1a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20Villalba=C2=A0=20=C2=A0=20=C2=A0=20=C2=A0?= Date: Mon, 6 Jul 2020 23:01:45 -0300 Subject: [PATCH 13/20] Change methods order --- .../databus/consumer/DatabusPushConsumer.java | 46 +++++++++---------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java b/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java index fc20a16..4296ea9 100644 --- a/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java +++ b/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java @@ -247,30 +247,6 @@ public void subscribe(final List topics) { super.subscribe(topics, new PushConsumerRebalanceListener(null)); } - private class PushConsumerRebalanceListener implements ConsumerRebalanceListener { - - private final ConsumerRebalanceListener customerListener; - - PushConsumerRebalanceListener(final ConsumerRebalanceListener customerListener) { - this.customerListener = Optional.ofNullable(customerListener).orElse(new NoOpConsumerRebalanceListener()); - - } - - @Override - public void onPartitionsRevoked(final Collection partitions) { - customerListener.onPartitionsRevoked(partitions); - - } - - @Override - public void onPartitionsAssigned(final Collection partitions) { - refreshPause.set(true); - customerListener.onPartitionsAssigned(partitions); - - } - } - - /** * {@inheritDoc} */ @@ -575,6 +551,28 @@ public void close() { } } + private class PushConsumerRebalanceListener implements ConsumerRebalanceListener { + + private final ConsumerRebalanceListener customerListener; + + PushConsumerRebalanceListener(final ConsumerRebalanceListener customerListener) { + this.customerListener = Optional.ofNullable(customerListener).orElse(new NoOpConsumerRebalanceListener()); + + } + + @Override + public void onPartitionsRevoked(final Collection partitions) { + customerListener.onPartitionsRevoked(partitions); + + } + + @Override + public void onPartitionsAssigned(final Collection partitions) { + refreshPause.set(true); + customerListener.onPartitionsAssigned(partitions); + + } + } } From 255f546a04ebc4c3cd52adb181cc7bd35b20bdd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20Villalba=C2=A0=20=C2=A0=20=C2=A0=20=C2=A0?= Date: Tue, 17 Mar 2020 15:00:03 -0300 Subject: [PATCH 14/20] Add consumer tier storage capability --- build.gradle | 8 +- .../internal/util/HeaderInternalField.java | 8 ++ .../databus/consumer/DatabusConsumer.java | 22 ++-- .../databus/consumer/DatabusPushConsumer.java | 68 +++++++++- .../databus/entities/S3TierStorage.java | 124 ++++++++++++++++++ .../opendxl/databus/entities/TierStorage.java | 8 ++ .../databus/serialization/SerdeDatabus.java | 14 +- .../internal/AvroMessageDeserializer.java | 51 ++++++- .../internal/InternalDeserializer.java | 5 + .../internal/LegacyMessageDeserializer.java | 6 + .../internal/MessageDeserializer.java | 15 ++- .../internal/RawMessageDeserializer.java | 6 + .../internal/MessageSerializationTest.java | 2 +- 13 files changed, 316 insertions(+), 21 deletions(-) create mode 100644 src/main/java/com/opendxl/databus/entities/S3TierStorage.java create mode 100644 src/main/java/com/opendxl/databus/entities/TierStorage.java diff --git a/build.gradle b/build.gradle index d581718..5ce5e08 100644 --- a/build.gradle +++ b/build.gradle @@ -70,17 +70,23 @@ dependencies { implementation 'commons-lang:commons-lang:2.6' implementation 'org.slf4j:slf4j-api:1.7.30' implementation 'net.sf.jopt-simple:jopt-simple:5.0.4' + implementation 'commons-io:commons-io:2.6' + + implementation 'software.amazon.awssdk:bom:2.10.73' + implementation 'com.amazonaws:aws-java-sdk-s3:1.11.731' + + testImplementation 'org.apache.zookeeper:zookeeper:3.5.5' testImplementation('org.apache.kafka:kafka_2.12:2.3.1') { exclude group: 'org.scala-lang', module: 'scala-reflect' } testImplementation 'org.scala-lang:scala-reflect:2.12.11' - testImplementation 'org.apache.zookeeper:zookeeper:3.5.5' testImplementation('io.netty:netty-all:4.1.43.Final') { force = true } testImplementation 'commons-io:commons-io:2.6' testImplementation 'junit:junit:4.12' testImplementation 'com.github.stefanbirkner:system-rules:1.19.0' + kafkaInMemory ('org.apache.kafka:kafka_2.12:2.3.1') { exclude group: 'org.scala-lang', module: 'scala-reflect' } diff --git a/src/main/java/com/opendxl/databus/common/internal/util/HeaderInternalField.java b/src/main/java/com/opendxl/databus/common/internal/util/HeaderInternalField.java index 84bb9fd..4169b12 100644 --- a/src/main/java/com/opendxl/databus/common/internal/util/HeaderInternalField.java +++ b/src/main/java/com/opendxl/databus/common/internal/util/HeaderInternalField.java @@ -10,6 +10,7 @@ */ public final class HeaderInternalField { + private HeaderInternalField() { } @@ -27,4 +28,11 @@ private HeaderInternalField() { * The topic name key name. */ public static final String TOPIC_NAME_KEY = INTERNAL_HEADER_IDENTIFIER + "TN" + INTERNAL_HEADER_IDENTIFIER; + + public static final String TIER_STORAGE_BUCKET_NAME_KEY = INTERNAL_HEADER_IDENTIFIER + "BN" + + INTERNAL_HEADER_IDENTIFIER;; + + public static final String TIER_STORAGE_OBJECT_NAME_KEY = INTERNAL_HEADER_IDENTIFIER + "OB" + + INTERNAL_HEADER_IDENTIFIER;; + } diff --git a/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java b/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java index b2eb98a..6711d10 100644 --- a/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java +++ b/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java @@ -5,6 +5,7 @@ package com.opendxl.databus.consumer; import com.opendxl.databus.credential.Credential; +import com.opendxl.databus.entities.TierStorage; import com.opendxl.databus.exception.DatabusClientRuntimeException; import com.opendxl.databus.producer.DatabusProducer; import com.opendxl.databus.serialization.Deserializer; @@ -71,7 +72,7 @@ public class DatabusConsumer

extends Consumer

{ * @throws DatabusClientRuntimeException if a DatabusConsumer getInstance was not able to be created */ public DatabusConsumer(final Map configs, final Deserializer

messageDeserializer) { - this(configs, messageDeserializer, null); + this(configs, messageDeserializer, null, null); } /** @@ -85,15 +86,15 @@ public DatabusConsumer(final Map configs, final Deserializer

* @param configs The consumer configs * @param messageDeserializer a {@link Deserializer} getInstance implementd by SDK's user * @param credential identity to authenticate/authorization - * + * @param tierStorage Tier storage * @throws DatabusClientRuntimeException if a DatabusConsumer getInstance was not able to be created */ public DatabusConsumer(final Map configs, final Deserializer

messageDeserializer, - final Credential credential) { + final Credential credential, final TierStorage tierStorage) { try { Map configuration = configureCredential(configs, credential); configuration = configureClientId(configuration); - setFieldMembers(messageDeserializer, configuration); + setFieldMembers(messageDeserializer, configuration, tierStorage); setConsumer(new KafkaConsumer(configuration, getKeyDeserializer(), getValueDeserializer())); } catch (DatabusClientRuntimeException e) { throw e; @@ -117,7 +118,7 @@ public DatabusConsumer(final Map configs, final Deserializer

* @throws DatabusClientRuntimeException if a DatabusConsumer getInstance was not able to be created */ public DatabusConsumer(final Properties properties, final Deserializer

messageDeserializer) { - this(properties, messageDeserializer, null); + this(properties, messageDeserializer, null, null); } /** @@ -131,15 +132,16 @@ public DatabusConsumer(final Properties properties, final Deserializer

messag * @param properties The consumer configuration properties * @param messageDeserializer a {@link Deserializer} getInstance implementd by SDK's user * @param credential identity to authenticate/authorization + * @param tierStorage Tier storage * * @throws DatabusClientRuntimeException if a DatabusConsumer getInstance was not able to be created */ public DatabusConsumer(final Properties properties, final Deserializer

messageDeserializer, - final Credential credential) { + final Credential credential, final TierStorage tierStorage) { try { Map configuration = configureCredential((Map) properties, credential); configuration = configureClientId(configuration); - setFieldMembers(messageDeserializer, configuration); + setFieldMembers(messageDeserializer, configuration, tierStorage); setConsumer(new KafkaConsumer(configuration, getKeyDeserializer(), getValueDeserializer())); } catch (DatabusClientRuntimeException e) { throw e; @@ -156,14 +158,16 @@ public DatabusConsumer(final Properties properties, final Deserializer

messag * @param configuration The consumer configuration map. * @param messageDeserializer a {@link Deserializer} getInstance implemented by SDK's user. */ - private void setFieldMembers(final Deserializer

messageDeserializer, final Map configuration) { + private void setFieldMembers(final Deserializer

messageDeserializer, + final Map configuration, + final TierStorage tierStorage) { if (messageDeserializer == null) { throw new DatabusClientRuntimeException(DATABUS_CONSUMER_INSTANCE_CANNOT_BE_CREATED_MESSAGE + "Message Deserializer cannot be null" , DatabusConsumer.class); } setKeyDeserializer(new DatabusKeyDeserializer()); - setValueDeserializer(new MessageDeserializer()); + setValueDeserializer(new MessageDeserializer(tierStorage)); setConsumerRecordsAdapter(new ConsumerRecordsAdapter

(messageDeserializer)); setClientId((String) configuration.get(ConsumerConfiguration.CLIENT_ID_CONFIG)); } diff --git a/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java b/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java index 1dd2af1..64183c1 100644 --- a/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java +++ b/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java @@ -6,6 +6,7 @@ import com.opendxl.databus.common.TopicPartition; import com.opendxl.databus.credential.Credential; +import com.opendxl.databus.entities.TierStorage; import com.opendxl.databus.exception.DatabusClientRuntimeException; import com.opendxl.databus.serialization.Deserializer; import org.apache.kafka.common.errors.WakeupException; @@ -100,6 +101,22 @@ public DatabusPushConsumer(final Map configs, this.consumerListener = consumerListener; } + /** + * Constructor + * + * @param configs consumer configuration + * @param messageDeserializer consumer message deserializer + * @param consumerListener consumer listener + * @param tierStorage Tier storage + */ + public DatabusPushConsumer(final Map configs, + final Deserializer

messageDeserializer, + final DatabusPushConsumerListener consumerListener, + final TierStorage tierStorage) { + super(configs, messageDeserializer, null, tierStorage); + this.consumerListener = consumerListener; + } + /** * @param configs consumer configuration * @param messageDeserializer consumer message deserializer @@ -110,7 +127,23 @@ public DatabusPushConsumer(final Map configs, final Deserializer

messageDeserializer, final DatabusPushConsumerListener consumerListener, final Credential credential) { - super(configs, messageDeserializer, credential); + super(configs, messageDeserializer, credential, null); + this.consumerListener = consumerListener; + } + + /** + * @param configs consumer configuration + * @param messageDeserializer consumer message deserializer + * @param consumerListener consumer listener + * @param credential credential to get access to Databus in case security is enabled + * @param tierStorage Tier storage + */ + public DatabusPushConsumer(final Map configs, + final Deserializer

messageDeserializer, + final DatabusPushConsumerListener consumerListener, + final Credential credential, + final TierStorage tierStorage) { + super(configs, messageDeserializer, credential, tierStorage); this.consumerListener = consumerListener; } @@ -127,6 +160,21 @@ public DatabusPushConsumer(final Properties properties, } + /** + * @param properties consumer configuration + * @param messageDeserializer consumer message deserializer + * @param consumerListener consumer listener + * @param tierStorage Tier storage + */ + public DatabusPushConsumer(final Properties properties, + final Deserializer

messageDeserializer, + final DatabusPushConsumerListener consumerListener, + final TierStorage tierStorage) { + super(properties, messageDeserializer, null, tierStorage); + this.consumerListener = consumerListener; + + } + /** * @param properties consumer configuration * @param messageDeserializer consumer message deserializer @@ -137,11 +185,25 @@ public DatabusPushConsumer(final Properties properties, final Deserializer

messageDeserializer, final DatabusPushConsumerListener consumerListener, final Credential credential) { - super(properties, messageDeserializer, credential); + super(properties, messageDeserializer, credential, null); this.consumerListener = consumerListener; } - + /** + * @param properties consumer configuration + * @param messageDeserializer consumer message deserializer + * @param consumerListener consumer listener + * @param credential credential to get access to Databus in case security is enabled + * @param tierStorage Tier storage + */ + public DatabusPushConsumer(final Properties properties, + final Deserializer

messageDeserializer, + final DatabusPushConsumerListener consumerListener, + final Credential credential, + final TierStorage tierStorage) { + super(properties, messageDeserializer, credential, tierStorage); + this.consumerListener = consumerListener; + } /** * {@inheritDoc} */ diff --git a/src/main/java/com/opendxl/databus/entities/S3TierStorage.java b/src/main/java/com/opendxl/databus/entities/S3TierStorage.java new file mode 100644 index 0000000..a99bdf6 --- /dev/null +++ b/src/main/java/com/opendxl/databus/entities/S3TierStorage.java @@ -0,0 +1,124 @@ +package com.opendxl.databus.entities; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.BasicAWSCredentials; +import com.amazonaws.auth.InstanceProfileCredentialsProvider; +import com.amazonaws.services.s3.AmazonS3; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.amazonaws.services.s3.internal.Mimetypes; +import com.amazonaws.services.s3.model.GetObjectRequest; +import com.amazonaws.services.s3.model.ObjectMetadata; +import com.amazonaws.services.s3.model.PutObjectResult; +import com.amazonaws.services.s3.model.S3Object; +import com.opendxl.databus.exception.DatabusClientRuntimeException; +import org.apache.commons.io.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; + +public class S3TierStorage implements TierStorage { + + /** + * The logger object. + */ + private static final Logger LOG = LoggerFactory.getLogger(S3TierStorage.class); + + /** + * S3 client + */ + private AmazonS3 s3Client; + + public S3TierStorage(final String awsRegion, + final ClientConfiguration config) { + + AmazonS3ClientBuilder s3Builder = AmazonS3ClientBuilder.standard(); + s3Builder.withCredentials(new InstanceProfileCredentialsProvider(false)); + s3Builder.withRegion(awsRegion); + if (config != null) { + s3Builder.withClientConfiguration(config); + } + try { + this.s3Client = s3Builder.build(); + } catch (Exception e) { + e.printStackTrace(); + + } + } + + public S3TierStorage(final String awsAccessKey, + final String awsSecretKey, + final String awsRegion, + final ClientConfiguration config) { + + + AmazonS3ClientBuilder s3Builder = AmazonS3ClientBuilder.standard(); + s3Builder.withCredentials( + new AWSStaticCredentialsProvider( + new BasicAWSCredentials(awsAccessKey, awsSecretKey))); + s3Builder.withRegion(awsRegion); + if (config != null) { + s3Builder.withClientConfiguration(config); + } + + try { + this.s3Client = s3Builder.build(); + } catch (Exception e) { + e.printStackTrace(); + } + + + } + + @Override + public void put(final String s3BucketName, + final String s3KeyName, + final byte[] payload) { + + try { + if (!s3Client.doesBucketExistV2(s3BucketName)) { + s3Client.createBucket(s3BucketName); + } + + ObjectMetadata metadata = new ObjectMetadata(); + metadata.setContentLength(payload.length); + metadata.setContentType(Mimetypes.MIMETYPE_HTML); + InputStream s3Object = new ByteArrayInputStream(payload); + PutObjectResult putObjectResult = s3Client.putObject(s3BucketName, s3KeyName, s3Object, metadata); + + } catch (Exception e) { + final String errMsg = "Error uploading S3 object: Bucket: " + " Object: " + + s3KeyName + " " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); + } + + } + + + public boolean doesObjectExist(String s3BucketName, String s3KeyName) { + try { + return s3Client.doesObjectExist(s3BucketName, s3KeyName); + } catch (Exception e) { + final String errMsg = "Error trying to reach S3 object: Bucket: " + " Object: " + s3KeyName + " " + + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); + } + } + + @Override + public byte[] get(String s3BucketName, String s3KeyName) { + try { + S3Object s3Object = s3Client.getObject(new GetObjectRequest(s3BucketName, s3KeyName)); + return IOUtils.toByteArray(s3Object.getObjectContent()); + } catch (Exception e) { + final String errMsg = "Error reading S3 object: Bucket: " + " Object: " + s3KeyName + " " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); + } + } + +} diff --git a/src/main/java/com/opendxl/databus/entities/TierStorage.java b/src/main/java/com/opendxl/databus/entities/TierStorage.java new file mode 100644 index 0000000..620ffb0 --- /dev/null +++ b/src/main/java/com/opendxl/databus/entities/TierStorage.java @@ -0,0 +1,8 @@ +package com.opendxl.databus.entities; + +public interface TierStorage { + void put(String bucketName, String objectName, byte[] payload); + byte[] get(String bucketName, String objectName); + boolean doesObjectExist(String bucketName, String objectName); +} + diff --git a/src/main/java/com/opendxl/databus/serialization/SerdeDatabus.java b/src/main/java/com/opendxl/databus/serialization/SerdeDatabus.java index a9393ff..6629124 100644 --- a/src/main/java/com/opendxl/databus/serialization/SerdeDatabus.java +++ b/src/main/java/com/opendxl/databus/serialization/SerdeDatabus.java @@ -4,6 +4,7 @@ package com.opendxl.databus.serialization; +import com.opendxl.databus.entities.TierStorage; import com.opendxl.databus.entities.internal.DatabusMessage; import com.opendxl.databus.serialization.internal.MessageDeserializer; import com.opendxl.databus.serialization.internal.MessageSerializer; @@ -17,6 +18,17 @@ */ public class SerdeDatabus implements Serde { + private final TierStorage tierStorage; + + public SerdeDatabus(final TierStorage tierStorage) { + + this.tierStorage = tierStorage; + } + + public SerdeDatabus() { + this(null); + } + /** * Not implemented. */ @@ -50,6 +62,6 @@ public Serializer serializer() { */ @Override public Deserializer deserializer() { - return new MessageDeserializer(); + return new MessageDeserializer(tierStorage); } } diff --git a/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java index 3707b51..9de0383 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java @@ -5,9 +5,11 @@ package com.opendxl.databus.serialization.internal; +import com.opendxl.databus.common.internal.util.HeaderInternalField; import com.opendxl.databus.consumer.ConsumerRecord; import com.opendxl.databus.consumer.DatabusConsumer; import com.opendxl.databus.entities.Headers; +import com.opendxl.databus.entities.TierStorage; import com.opendxl.databus.exception.DatabusClientRuntimeException; import com.opendxl.databus.common.internal.adapter.HeadersAvroDeserializedAdapter; import com.opendxl.databus.common.internal.adapter.PayloadHeadersAvroDeserializedAdapter; @@ -17,6 +19,8 @@ import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumReader; import org.apache.avro.io.DecoderFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Avro Message Deserializer @@ -25,6 +29,11 @@ */ public final class AvroMessageDeserializer implements InternalDeserializer { + /** + * The logger object. + */ + private static final Logger LOG = LoggerFactory.getLogger(AvroMessageDeserializer.class); + /** * The schema to define the message. */ @@ -52,24 +61,56 @@ public AvroMessageDeserializer(final Schema schema) { */ @Override public DatabusMessage deserialize(final String topic, final byte[] data) { + return this.deserialize(topic, data, null); + } + + @Override + public DatabusMessage deserialize(String topic, byte[] data, TierStorage tierStorage) { try { - final GenericRecord avroRecord = reader.read(null, DecoderFactory.get().binaryDecoder(data, null)); + GenericRecord avroRecord = reader.read(null, DecoderFactory.get().binaryDecoder(data, null)); - final Headers headers = + Headers headers = new HeadersAvroDeserializedAdapter() .adapt(avroRecord.get("headers")); - final byte[] payload = + byte[] payload = new PayloadHeadersAvroDeserializedAdapter() .adapt(avroRecord.get("payload")); + + // Tier Storage Section + if (tierStorage != null) { + final String bucketName = headers.get(HeaderInternalField.TIER_STORAGE_BUCKET_NAME_KEY); + final String objectName = headers.get(HeaderInternalField.TIER_STORAGE_OBJECT_NAME_KEY); + if (bucketName != null && objectName != null) { + byte[] object = null; + try { + object = tierStorage.get(bucketName, objectName); + } catch (Exception e) { + LOG.error("Error when reading message from Tier Storage. Bucket Name: " + + bucketName + "Object Name: " + + objectName, e); + } + + if (object != null || object.length > 0) { + avroRecord = reader.read(null, DecoderFactory.get().binaryDecoder(object, null)); + headers = new HeadersAvroDeserializedAdapter().adapt(avroRecord.get("headers")); + payload = new PayloadHeadersAvroDeserializedAdapter().adapt(avroRecord.get("payload")); + } else { + LOG.warn("Object content reading from Tier Storage is null or empty. Bucket: " + bucketName + + " Object: " + objectName); + } + } + } + final DatabusMessage message = new DatabusMessage(headers, payload); return message; } catch (Exception e) { - throw new DatabusClientRuntimeException("Error deserializing Avro schema:" + schema.toString(true), - e, AvroMessageDeserializer.class); + final String errMsg = "Error deserializing Avro schema:" + schema.toString(true); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, AvroMessageDeserializer.class); } } } diff --git a/src/main/java/com/opendxl/databus/serialization/internal/InternalDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/InternalDeserializer.java index 3215ebb..ea02265 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/InternalDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/InternalDeserializer.java @@ -4,6 +4,8 @@ package com.opendxl.databus.serialization.internal; +import com.opendxl.databus.entities.TierStorage; + /** * Internal Deserializer * Used by SDK to deserialize an object of P type, @@ -22,4 +24,7 @@ public interface InternalDeserializer

{ */ P deserialize(String topic, byte[] data); + P deserialize(String topic, byte[] data, TierStorage tierStorage); + + } diff --git a/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java index 8105ab7..db85664 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java @@ -6,6 +6,7 @@ import com.google.gson.Gson; +import com.opendxl.databus.entities.TierStorage; import com.opendxl.databus.exception.DatabusClientRuntimeException; import com.opendxl.databus.common.internal.builder.TopicNameBuilder; import com.opendxl.databus.common.internal.util.HeaderInternalField; @@ -47,6 +48,11 @@ public DatabusMessage deserialize(final String topic, final byte[] data) { } + @Override + public DatabusMessage deserialize(String topic, byte[] data, TierStorage tierStorage) { + return deserialize(topic, data); + } + /** * This class is filled by GSON framework, based on a JSON as byte[], * then, it can create a {@link DatabusMessage} getInstance diff --git a/src/main/java/com/opendxl/databus/serialization/internal/MessageDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/MessageDeserializer.java index c532071..dcca9e4 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/MessageDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/MessageDeserializer.java @@ -4,6 +4,7 @@ package com.opendxl.databus.serialization.internal; +import com.opendxl.databus.entities.TierStorage; import com.opendxl.databus.entities.internal.DatabusMessage; import java.util.Map; @@ -13,6 +14,18 @@ */ public final class MessageDeserializer implements org.apache.kafka.common.serialization.Deserializer { + + private TierStorage tierStorage; + + public MessageDeserializer(final TierStorage tierStorage) { + this.tierStorage = tierStorage; + } + + public MessageDeserializer() { + this(null); + } + + /** * Not implemented. */ @@ -33,7 +46,7 @@ public DatabusMessage deserialize(final String topic, final byte[] serializedMes final MessageStructure messageStructure = MessageStructureFactory.getStructure(serializedMessage); final Integer version = messageStructure.getVersion(); final InternalDeserializer deserializer = DeserializerRegistry.getDeserializer(version); - return deserializer.deserialize(topic, messageStructure.getPayload()); + return deserializer.deserialize(topic, messageStructure.getPayload(), tierStorage); } diff --git a/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java index a2801d2..964d5fa 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java @@ -4,6 +4,7 @@ package com.opendxl.databus.serialization.internal; +import com.opendxl.databus.entities.TierStorage; import com.opendxl.databus.entities.internal.DatabusMessage; /** @@ -23,4 +24,9 @@ public final class RawMessageDeserializer implements InternalDeserializer Date: Thu, 19 Mar 2020 14:53:07 -0300 Subject: [PATCH 15/20] Add producer and consumer with tier storage capability. Add S3 Mock for UT purpose. Addd UT --- build.gradle | 4 + response.txt | 137 ++++++++++ ...cS3TierStorageConsumerProducerExample.java | 250 ++++++++++++++++++ .../cli/operation/ProduceOperation.java | 3 +- .../adapter/DatabusProducerRecordAdapter.java | 17 +- .../adapter/MessagePayloadAdapter.java | 24 +- .../databus/consumer/DatabusConsumer.java | 28 +- .../opendxl/databus/entities/RoutingData.java | 29 +- .../databus/entities/TierStorageMetadata.java | 22 ++ .../databus/producer/DatabusProducer.java | 8 +- .../producer/DatabusTierStorageProducer.java | 229 ++++++++++++++++ .../opendxl/databus/producer/Producer.java | 70 +++-- .../internal/AvroMessageDeserializer.java | 14 +- .../databus/entities/S3TierStorageTest.java | 102 +++++++ 14 files changed, 876 insertions(+), 61 deletions(-) create mode 100644 response.txt create mode 100644 sample/src/sample/BasicS3TierStorageConsumerProducerExample.java create mode 100644 src/main/java/com/opendxl/databus/entities/TierStorageMetadata.java create mode 100644 src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java create mode 100644 src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java diff --git a/build.gradle b/build.gradle index 5ce5e08..5615bb6 100644 --- a/build.gradle +++ b/build.gradle @@ -91,6 +91,10 @@ dependencies { exclude group: 'org.scala-lang', module: 'scala-reflect' } kafkaInMemory 'org.scala-lang:scala-reflect:2.12.11' + testImplementation 'io.findify:s3mock_2.12:0.2.5' + testImplementation 'com.e-movimento.tinytools:privilegedaccessor:1.2.2' + + kafkaInMemory 'org.apache.kafka:kafka_2.12:2.3.1' kafkaInMemory 'commons-io:commons-io:2.6' // This following section mitigates OWASP vulnerabilities report. diff --git a/response.txt b/response.txt new file mode 100644 index 0000000..f3a67ce --- /dev/null +++ b/response.txt @@ -0,0 +1,137 @@ +HTTP/1.0 200 Connection established + +HTTP/1.1 403 cnmismatch +Connection: Keep-Alive +Content-Type: text/html +Cache-Control: no-cache +Content-Length: 3646 +X-Frame-Options: deny + + + + + + + + + McAfee Web Gateway - Notification + + + + + + + + + + +
+ + + + + +
+ + +
+ + + + + + + + +
+ Common Name Mismatch. +
+ + + + + + + +
+ The certificate verification failed due to a common name mismatch. +
+ + + + + + + +
+ Host: e1581825-govprodsupportgat-28a7-27882486.us-gov-west-1.elb.amazonaws.com
+ Common name: ui.soc.mcafee-gov.com
+ Alternative subject names: regex(ui\.soc\.mcafee-gov\.com), regex(api\.soc\.mcafee-gov\.com)
+
+ + + + + + + + + + + + + + +
+ For assistance, please contact the InfoSec team by submitting this form. +
+ + + + +
+ + CA Certificate installed. +
+ + + + + generated 2020-03-18 17:14:28 by MWG: us-dal-mwg6, IP 10.52.64.203, ProxyPort: 9090 +Client IP: 10.52.241.166, RuleName: Block Incident, User: ,AuthProto: +
+ User-Agent curl/7.54.0 +
+ +
+ + + diff --git a/sample/src/sample/BasicS3TierStorageConsumerProducerExample.java b/sample/src/sample/BasicS3TierStorageConsumerProducerExample.java new file mode 100644 index 0000000..f68eae9 --- /dev/null +++ b/sample/src/sample/BasicS3TierStorageConsumerProducerExample.java @@ -0,0 +1,250 @@ +/*---------------------------------------------------------------------------* + * Copyright (c) 2019 McAfee, LLC - All Rights Reserved. * + *---------------------------------------------------------------------------*/ + +package sample; + +import broker.ClusterHelper; +import com.amazonaws.ClientConfiguration; +import com.opendxl.databus.common.RecordMetadata; +import com.opendxl.databus.common.internal.builder.TopicNameBuilder; +import com.opendxl.databus.consumer.*; +import com.opendxl.databus.entities.*; +import com.opendxl.databus.producer.*; +import com.opendxl.databus.serialization.ByteArrayDeserializer; +import com.opendxl.databus.serialization.ByteArraySerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.charset.Charset; +import java.time.LocalDateTime; +import java.util.*; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + + +public class BasicS3TierStorageConsumerProducerExample { + + private final Producer producer; + private final ExecutorService executor; + private final TierStorage tierStorage; + private Consumer consumer; + private String producerTopic = "topic1"; + private String consumerTopic = "topic1"; + + private static final long PRODUCER_TIME_CADENCE_MS = 1000L; + private static final long CONSUMER_TIME_CADENCE_MS = 1000L; + private final AtomicBoolean closed = new AtomicBoolean(false); + + private static Logger LOG = LoggerFactory.getLogger(BasicS3TierStorageConsumerProducerExample.class); + + public BasicS3TierStorageConsumerProducerExample() { + + // Start Kafka cluster + ClusterHelper + .getInstance() + .addBroker(9092) + .zookeeperPort(2181) + .start(); + + + ClientConfiguration awsClientConfiguration = new ClientConfiguration(); + + this.tierStorage = new S3TierStorage("", + "", + "", awsClientConfiguration); + + // Prepare a Producer + this.producer = getProducer(); + + // Prepare a Consumer + this.consumer = getConsumer(); + + // Subscribe to topic + this.consumer.subscribe(Collections.singletonList(consumerTopic)); + + this.executor = Executors.newFixedThreadPool(2); + + } + + public Producer getProducer() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString()); + config.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, true); + return new DatabusTierStorageProducer(config, new ByteArraySerializer(), null); + } + + public Consumer getConsumer() { + final Properties consumerProps = new Properties(); + consumerProps.put(ConsumerConfiguration.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + consumerProps.put(ConsumerConfiguration.GROUP_ID_CONFIG, "cg1"); + consumerProps.put(ConsumerConfiguration.ENABLE_AUTO_COMMIT_CONFIG, "true"); + consumerProps.put(ConsumerConfiguration.SESSION_TIMEOUT_MS_CONFIG, "30000"); + consumerProps.put(ConsumerConfiguration.CLIENT_ID_CONFIG, "consumer-id-sample"); + return new DatabusConsumer(consumerProps, new ByteArrayDeserializer(), tierStorage); + } + + private Runnable getProducerTask() { + return () -> { + LOG.info("Producer started"); + while (!closed.get()) { + + // Prepare a record + final String message = "Hello World at "+ LocalDateTime.now(); + + // user should provide the encoding + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final ProducerRecord producerRecord = getProducerRecord(producerTopic, payload); + + // Send the record + // Get headers as String + final StringBuilder headers = new StringBuilder().append("["); + producerRecord.getHeaders().getAll().forEach((k, v) -> headers.append("[" + k + ":" + v + "]")); + headers.append("]"); + + producer.send(producerRecord, new MyCallback(producerRecord.getRoutingData().getShardingKey())); + LOG.info("[PRODUCER -> KAFKA][SENDING MSG] ID " + producerRecord.getRoutingData().getShardingKey() + + " TOPIC:" + TopicNameBuilder.getTopicName(producerTopic, null) + + " HEADERS:" + headers + + " PAYLOAD:" + message); + + justWait(PRODUCER_TIME_CADENCE_MS); + } + producer.flush(); + producer.close(); + LOG.info("Producer closed"); + + }; + } + + private Runnable getConsumerTask() { + return () -> { + try { + LOG.info("Consumer started"); + while (!closed.get()) { + + // Polling the databus + final ConsumerRecords records = consumer.poll(CONSUMER_TIME_CADENCE_MS); + + // Iterate records + for (ConsumerRecord record : records) { + + // Get headers as String + final StringBuilder headers = new StringBuilder().append("["); + record.getHeaders().getAll().forEach((k, v) -> headers.append("[" + k + ":" + v + "]")); + headers.append("]"); + + LOG.info("[CONSUMER <- KAFKA][MSG RCEIVED] ID " + record.getKey() + + " TOPIC:" + record.getComposedTopic() + + " KEY:" + record.getKey() + + " PARTITION:" + record.getPartition() + + " OFFSET:" + record.getOffset() + + " TIMESTAMP:" + record.getTimestamp() + + " HEADERS:" + headers + + " PAYLOAD:" + new String(record.getMessagePayload().getPayload())); + } + consumer.commitAsync(); + } + } catch (Exception e) { + LOG.error(e.getMessage()); + } finally { + consumer.unsubscribe(); + try { + consumer.close(); + } catch (IOException e) { + LOG.error(e.getMessage()); + } + LOG.info("Consumer closed"); + + } + + }; + } + + public ProducerRecord getProducerRecord(final String topic, final byte[] payload) { + String key = String.valueOf(System.currentTimeMillis()); + TierStorageMetadata tStorageMetadata = new TierStorageMetadata("databus-poc-test", topic + key); + RoutingData routingData = new RoutingData(topic, key, null, tStorageMetadata); + + Headers headers = new Headers(); + headers.put("k","v"); + + MessagePayload messagePayload = new MessagePayload<>(payload); + return new ProducerRecord<>(routingData, headers, messagePayload); + } + + private void justWait(long time) { + try { + Thread.sleep(time); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + + private static class MyCallback implements Callback { + + private String shardingKey; + + public MyCallback(String shardingKey) { + + this.shardingKey = shardingKey; + } + + public void onCompletion(RecordMetadata metadata, Exception exception) { + if (exception != null) { + LOG.warn("Error sending a record " + exception.getMessage()); + return; + } + LOG.info("[PRODUCER <- KAFKA][OK MSG SENT] ID " + shardingKey + + " TOPIC:" + metadata.topic() + + " PARTITION:" + metadata.partition() + + " OFFSET:" + metadata.offset()); + } + } + + synchronized private void stopExample(final ExecutorService executor) { + try { + closed.set(true); + consumer.wakeup(); + ClusterHelper.getInstance().stop(); + executor.shutdown(); + executor.awaitTermination(5, TimeUnit.SECONDS); + } catch (InterruptedException e) { + } finally { + executor.shutdownNow(); + } + } + + public void startExample() throws InterruptedException { + + Runnable consumerTask = getConsumerTask(); + Runnable producerTask = getProducerTask(); + + executor.submit(consumerTask); + executor.submit(producerTask); + + Runtime.getRuntime().addShutdownHook( + new Thread( + new Runnable() { + public void run() { + stopExample(executor); + LOG.info("Example finished"); + } + })); + + } + + + public static void main(String[] args) throws InterruptedException { + LOG.info("Ctrl-C to finish"); + new BasicS3TierStorageConsumerProducerExample().startExample(); + } + +} diff --git a/src/main/java/com/opendxl/databus/cli/operation/ProduceOperation.java b/src/main/java/com/opendxl/databus/cli/operation/ProduceOperation.java index cf08bb3..6b90a05 100644 --- a/src/main/java/com/opendxl/databus/cli/operation/ProduceOperation.java +++ b/src/main/java/com/opendxl/databus/cli/operation/ProduceOperation.java @@ -196,7 +196,8 @@ private RoutingData getRoutingData(final String topic, final String shardingKey, final String partition) { RoutingData routingData; if (!partition.isEmpty()) { - routingData = new RoutingData(topic, shardingKey, tenantGroup, Integer.parseInt(partition)); + routingData = new RoutingData(topic, shardingKey, tenantGroup, Integer.parseInt(partition), + null); } else { routingData = new RoutingData(topic, shardingKey, tenantGroup); } diff --git a/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java b/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java index bc9067e..9b99273 100644 --- a/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java +++ b/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java @@ -7,6 +7,7 @@ import com.opendxl.databus.common.internal.builder.TopicNameBuilder; import com.opendxl.databus.common.internal.util.HeaderInternalField; import com.opendxl.databus.entities.Headers; +import com.opendxl.databus.entities.TierStorageMetadata; import com.opendxl.databus.entities.internal.DatabusMessage; import com.opendxl.databus.producer.ProducerRecord; import com.opendxl.databus.serialization.Serializer; @@ -19,7 +20,7 @@ * @param

payload's type */ public final class DatabusProducerRecordAdapter

- implements Adapter, org.apache.kafka.clients.producer.ProducerRecord> { /** @@ -58,9 +59,19 @@ public DatabusProducerRecordAdapter(final Serializer

messageSerializer) { sourceProducerRecord.getRoutingData().getTopic()); } + // Add internal headers to let consumer knows the payload is tiered storage + TierStorageMetadata tierStorageMetadata = sourceProducerRecord.getRoutingData().getTierStorageMetadata(); + if (tierStorageMetadata != null + && tierStorageMetadata.getBucketName() != null && !tierStorageMetadata.getBucketName().isEmpty() + && tierStorageMetadata.getObjectName() != null && !tierStorageMetadata.getObjectName().isEmpty() + ) { + clonedHeaders.put(HeaderInternalField.TIER_STORAGE_BUCKET_NAME_KEY, tierStorageMetadata.getBucketName()); + clonedHeaders.put(HeaderInternalField.TIER_STORAGE_OBJECT_NAME_KEY, tierStorageMetadata.getObjectName()); + } + final DatabusMessage databusMessage = - new MessagePayloadAdapter(messageSerializer, clonedHeaders) - .adapt(sourceProducerRecord.payload()); + new MessagePayloadAdapter

(messageSerializer) + .adapt(sourceProducerRecord.payload(), clonedHeaders); final String targetTopic = TopicNameBuilder.getTopicName(sourceProducerRecord.getRoutingData().getTopic(), diff --git a/src/main/java/com/opendxl/databus/common/internal/adapter/MessagePayloadAdapter.java b/src/main/java/com/opendxl/databus/common/internal/adapter/MessagePayloadAdapter.java index 234b850..85239c8 100644 --- a/src/main/java/com/opendxl/databus/common/internal/adapter/MessagePayloadAdapter.java +++ b/src/main/java/com/opendxl/databus/common/internal/adapter/MessagePayloadAdapter.java @@ -16,30 +16,22 @@ * * @param

payload's type */ -public final class MessagePayloadAdapter

implements Adapter, DatabusMessage> { +public final class MessagePayloadAdapter

{ /** * The message deserializer. */ - private final Serializer

messageSerializer; - - /** - * The headers map. - */ - private final Headers headers; + private final Serializer

userSerializer; /** * Constructor * - * @param messageSerializer A {@link Serializer} instance + * @param userSerializer A {@link Serializer} instance * or creating a {@link DatabusMessage}. - * @param headers Headers map. */ - public MessagePayloadAdapter(final Serializer

messageSerializer, - final Headers headers) { + public MessagePayloadAdapter(final Serializer

userSerializer) { - this.messageSerializer = messageSerializer; - this.headers = headers; + this.userSerializer = userSerializer; } /** @@ -48,10 +40,10 @@ public MessagePayloadAdapter(final Serializer

messageSerializer, * * @param messagePayload a {@link MessagePayload} instance to be adapted. * @return a {@link DatabusMessage} instance. + * @param headers headers */ - @Override - public DatabusMessage adapt(final MessagePayload

messagePayload) { - final byte[] payload = messageSerializer.serialize(messagePayload.getPayload()); + public DatabusMessage adapt(final MessagePayload

messagePayload, final Headers headers) { + final byte[] payload = userSerializer.serialize(messagePayload.getPayload()); return new DatabusMessage(headers, payload); } diff --git a/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java b/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java index 6711d10..b959753 100644 --- a/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java +++ b/src/main/java/com/opendxl/databus/consumer/DatabusConsumer.java @@ -75,6 +75,23 @@ public DatabusConsumer(final Map configs, final Deserializer

this(configs, messageDeserializer, null, null); } + /** + * A consumer is instantiated by providing a set of key-value pairs as configuration. Valid configuration strings + * are documented here. Values can be + * either strings or objects of the appropriate type (for example a numeric configuration would accept either the + * string "42" or the integer 42). + *

+ * Valid configuration strings are documented at {@link org.apache.kafka.clients.consumer.ConsumerConfig} + * + * @param configs The consumer configs + * @param messageDeserializer a {@link Deserializer} getInstance implementd by SDK's user + * @throws DatabusClientRuntimeException if a DatabusConsumer getInstance was not able to be created + * @param tierStorage Tier Storage + */ + public DatabusConsumer(final Map configs, final Deserializer

messageDeserializer, + final TierStorage tierStorage) { + this(configs, messageDeserializer, null, tierStorage); + } /** * A consumer is instantiated by providing a set of key-value pairs as configuration. Valid configuration strings * are documented here. Values can be @@ -86,7 +103,8 @@ public DatabusConsumer(final Map configs, final Deserializer

* @param configs The consumer configs * @param messageDeserializer a {@link Deserializer} getInstance implementd by SDK's user * @param credential identity to authenticate/authorization - * @param tierStorage Tier storage + * @param tierStorage Tier Storage + * * @throws DatabusClientRuntimeException if a DatabusConsumer getInstance was not able to be created */ public DatabusConsumer(final Map configs, final Deserializer

messageDeserializer, @@ -121,6 +139,11 @@ public DatabusConsumer(final Properties properties, final Deserializer

messag this(properties, messageDeserializer, null, null); } + public DatabusConsumer(final Properties properties, final Deserializer

messageDeserializer, + final TierStorage tierStorage) { + this(properties, messageDeserializer, null, tierStorage); + } + /** * A consumer is instantiated by providing a {@link Properties} object as configuration. Valid * configuration strings are documented at {@link org.apache.kafka.clients.consumer.ConsumerConfig} @@ -132,7 +155,7 @@ public DatabusConsumer(final Properties properties, final Deserializer

messag * @param properties The consumer configuration properties * @param messageDeserializer a {@link Deserializer} getInstance implementd by SDK's user * @param credential identity to authenticate/authorization - * @param tierStorage Tier storage + * @param tierStorage Tier Storage * * @throws DatabusClientRuntimeException if a DatabusConsumer getInstance was not able to be created */ @@ -141,6 +164,7 @@ public DatabusConsumer(final Properties properties, final Deserializer

messag try { Map configuration = configureCredential((Map) properties, credential); configuration = configureClientId(configuration); + configuration.put(ConsumerConfiguration.ISOLATION_LEVEL_CONFIG, "read_committed"); setFieldMembers(messageDeserializer, configuration, tierStorage); setConsumer(new KafkaConsumer(configuration, getKeyDeserializer(), getValueDeserializer())); } catch (DatabusClientRuntimeException e) { diff --git a/src/main/java/com/opendxl/databus/entities/RoutingData.java b/src/main/java/com/opendxl/databus/entities/RoutingData.java index b08d720..76ed3b1 100644 --- a/src/main/java/com/opendxl/databus/entities/RoutingData.java +++ b/src/main/java/com/opendxl/databus/entities/RoutingData.java @@ -49,15 +49,23 @@ public class RoutingData { */ private String tenantGroup = DEFAULT_TENANT_GROUP; + /** + * Tier Storage Metadata + */ + private TierStorageMetadata tierStorageMetadata; + /** * RoutingData constructor with only topic name parameter * * @param topic The topic name where the message must be sent */ public RoutingData(final String topic) { - this(topic, null, null, null); + this(topic, null, null, null, null); } + public RoutingData(final String topic, final TierStorageMetadata tierStorageMetadata) { + this(topic, null, null, null, tierStorageMetadata); + } /** * RoutingData constructor with topic name sharding key and tenant group parameters * @@ -66,9 +74,15 @@ public RoutingData(final String topic) { * @param tenantGroup The name that groups topics */ public RoutingData(final String topic, final String shardingKey, final String tenantGroup) { - this(topic, shardingKey, tenantGroup, null); + this(topic, shardingKey, tenantGroup, null, null); } + public RoutingData(final String topic, + final String shardingKey, + final String tenantGroup, + final TierStorageMetadata tierStorageMetadata) { + this(topic, shardingKey, tenantGroup, null, tierStorageMetadata); + } /** * RoutingData constructor with all parameters * @@ -76,10 +90,11 @@ public RoutingData(final String topic, final String shardingKey, final String te * @param shardingKey The Databus sharding key * @param tenantGroup The name that groups topics * @param partition The partition number + * @param tierStorageMetadata Tier Storage Metadata + * */ public RoutingData(final String topic, final String shardingKey, final String tenantGroup, - final Integer partition) { - + final Integer partition, final TierStorageMetadata tierStorageMetadata) { if (StringUtils.isBlank(topic)) { throw new DatabusClientRuntimeException("topic cannot be empty or null", RoutingData.class); } @@ -87,6 +102,7 @@ public RoutingData(final String topic, final String shardingKey, final String te this.tenantGroup = Optional.ofNullable(tenantGroup).orElse("").trim(); this.shardingKey = shardingKey; this.partition = partition; + this.tierStorageMetadata = tierStorageMetadata; } /** @@ -124,4 +140,9 @@ public String getTenantGroup() { public Integer getPartition() { return partition; } + + public TierStorageMetadata getTierStorageMetadata() { + return tierStorageMetadata; + } + } diff --git a/src/main/java/com/opendxl/databus/entities/TierStorageMetadata.java b/src/main/java/com/opendxl/databus/entities/TierStorageMetadata.java new file mode 100644 index 0000000..2b76455 --- /dev/null +++ b/src/main/java/com/opendxl/databus/entities/TierStorageMetadata.java @@ -0,0 +1,22 @@ +package com.opendxl.databus.entities; + +public class TierStorageMetadata { + + private final String bucketName; + private final String objectName; + + public TierStorageMetadata(final String bucketName, final String objectName) { + this.bucketName = bucketName.trim(); + this.objectName = objectName.trim(); + } + + public String getBucketName() { + return bucketName; + } + + public String getObjectName() { + return objectName; + } + + +} diff --git a/src/main/java/com/opendxl/databus/producer/DatabusProducer.java b/src/main/java/com/opendxl/databus/producer/DatabusProducer.java index 95cff21..7c0549b 100644 --- a/src/main/java/com/opendxl/databus/producer/DatabusProducer.java +++ b/src/main/java/com/opendxl/databus/producer/DatabusProducer.java @@ -128,7 +128,7 @@ public DatabusProducer(final Map configs, final Serializer

me setFieldMembers(messageSerializer); this.setConfiguration(overrideConfig(configs)); this.configureCredential(getConfiguration(), credential); - setProducer(new KafkaProducer(this.getConfiguration(), getKeySerializer(), getValueSerializer())); + setProducer(new KafkaProducer(this.getConfiguration(), getKeySerializer(), getKafkaValueSerializer())); setClientId((String) configs.get(ProducerConfig.CLIENT_ID_CONFIG)); } catch (DatabusClientRuntimeException e) { throw e; @@ -176,7 +176,7 @@ public DatabusProducer(final Properties properties, final Serializer

messageS Properties fixedProperties = overrideConfig(properties); this.setConfiguration((Map) fixedProperties); this.configureCredential(getConfiguration(), credential); - setProducer(new KafkaProducer(this.getConfiguration(), getKeySerializer(), getValueSerializer())); + setProducer(new KafkaProducer(this.getConfiguration(), getKeySerializer(), getKafkaValueSerializer())); setClientId((String) fixedProperties.get(ProducerConfig.CLIENT_ID_CONFIG)); } catch (DatabusClientRuntimeException e) { throw e; @@ -197,8 +197,8 @@ private void setFieldMembers(final Serializer

messageSerializer) { + "Message Serializer cannot be null" , DatabusProducer.class); } - setKeySerializer(new DatabusKeySerializer()); - setValueSerializer(new MessageSerializer()); + setKafkaKeySerializer(new DatabusKeySerializer()); + setKafkaValueSerializer(new MessageSerializer()); setDatabusProducerRecordAdapter(new DatabusProducerRecordAdapter

(messageSerializer)); } diff --git a/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java b/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java new file mode 100644 index 0000000..7a3ee8a --- /dev/null +++ b/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java @@ -0,0 +1,229 @@ +/*---------------------------------------------------------------------------* + * Copyright (c) 2019 McAfee, LLC - All Rights Reserved. * + *---------------------------------------------------------------------------*/ + +package com.opendxl.databus.producer; + +import com.opendxl.databus.common.RecordMetadata; +import com.opendxl.databus.common.internal.adapter.DatabusProducerRecordAdapter; +import com.opendxl.databus.common.internal.adapter.MessagePayloadAdapter; +import com.opendxl.databus.credential.Credential; +import com.opendxl.databus.entities.MessagePayload; +import com.opendxl.databus.entities.TierStorage; +import com.opendxl.databus.entities.TierStorageMetadata; +import com.opendxl.databus.entities.internal.DatabusMessage; +import com.opendxl.databus.exception.DatabusClientRuntimeException; +import com.opendxl.databus.serialization.Serializer; +import com.opendxl.databus.serialization.internal.MessageSerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; +import java.util.Optional; +import java.util.Properties; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + + +public class DatabusTierStorageProducer

extends DatabusProducer

{ + + private static final Logger LOG = LoggerFactory.getLogger(DatabusTierStorageProducer.class); + + private TierStorage tierStorage; + private MessagePayloadAdapter

messagePayloadAdapter; + + public DatabusTierStorageProducer(final Map configs, final Serializer

userSerializer, + final TierStorage tierStorage) { + this(configs, userSerializer, null, tierStorage); + } + + + public DatabusTierStorageProducer(final Map configs, final Serializer

userSerializer, + final Credential credential, final TierStorage tierStorage) { + super(configs, userSerializer, credential); + if (tierStorage == null) { + throw new IllegalArgumentException("Tier Storage cannot be null"); + } + this.tierStorage = tierStorage; + setFieldMembers(userSerializer); + initTransactions(); + } + + + public DatabusTierStorageProducer(final Properties properties, final Serializer

userSerializer, + final TierStorage tierStorage) { + this(properties, userSerializer, null, tierStorage); + } + + + public DatabusTierStorageProducer(final Properties properties, final Serializer

userSerializer, + final Credential credential, final TierStorage tierStorage) { + + super(properties, userSerializer, credential); + if (tierStorage == null) { + throw new IllegalArgumentException("Tier Storage cannot be null"); + } + setFieldMembers(userSerializer); + initTransactions(); + } + + private void setFieldMembers(Serializer

userSerializer) { + setKafkaValueSerializer(new MessageSerializer()); // The serializer used bu Kafka + this.messagePayloadAdapter = new MessagePayloadAdapter

(userSerializer); + setDatabusProducerRecordAdapter(new DatabusProducerRecordAdapter

(userSerializer)); + } + + + @Override + public void send(final ProducerRecord record) { + send(record, null); + } + + @Override + public void send(ProducerRecord

producerRecord, final Callback callback) { + + if (producerRecord.getRoutingData().getTierStorageMetadata() == null + || producerRecord.getRoutingData().getTierStorageMetadata().getBucketName() == null + || producerRecord.getRoutingData().getTierStorageMetadata().getBucketName().isEmpty() + || producerRecord.getRoutingData().getTierStorageMetadata().getObjectName() == null + || producerRecord.getRoutingData().getTierStorageMetadata().getObjectName().isEmpty() + ) { + final String errMsg = "Bucket metadatada is invalid"; + LOG.error(errMsg); + throw new IllegalArgumentException(errMsg); + } + + final TierStorageMetadata tierStorageMetadata = + producerRecord.getRoutingData().getTierStorageMetadata(); + + // Serialize the producerRecord payload to be stored with TieredStorage when callback being invoked by Kafka + final DatabusMessage databusMessage = + messagePayloadAdapter.adapt(producerRecord.payload(), producerRecord.getHeaders()); + final byte[] databusMessageSerialized = getKafkaValueSerializer().serialize("", databusMessage); + + // Remove the producerRecord headers and payload + final ProducerRecord

adaptedProducerRecord = new ProducerRecord(producerRecord.getRoutingData(), + producerRecord.getHeaders(), + new MessagePayload(null)); + + + // Get a Kafka Producer Record made up by a DatabusMessage: + // version = AVRO_1_S3_TIER_STORAGE_VERSION_NUMBER + // headers = empty + // payload = empty + org.apache.kafka.clients.producer.ProducerRecord targetProducerRecord = + getDatabusProducerRecordAdapter().adapt(adaptedProducerRecord); + + // Create the callback + CountDownLatch latch = new CountDownLatch(1); + final CallbackAdapterTierStorage callbackAdapterTierStorage; + if (callback != null) { + callbackAdapterTierStorage = new CallbackAdapterTierStorage(callback, + databusMessageSerialized, + latch, + tierStorageMetadata); + } else { + callbackAdapterTierStorage = null; + } + + beginTransaction(); + super.sendKafkaRecord(targetProducerRecord, callbackAdapterTierStorage); + try { + // wait for callback ends + final boolean callbackFinished = latch.await(10000, TimeUnit.MILLISECONDS); + if (callbackFinished) { + if (callbackAdapterTierStorage.isOk()) { + commitTransaction(); + } else { + abortTransaction(); + } + } else { // means that the callback has not finished in time + LOG.error("Record not produced. Too long time taken by tier storage."); + abortTransaction(); + } + + } catch (InterruptedException e) { + abortTransaction(); + } + } + + + /** + * Callback Adapter + *

+ * It forwards a kafka callback to databus callback + */ + private class CallbackAdapterTierStorage implements org.apache.kafka.clients.producer.Callback { + private final Callback callback; + private final byte[] databusMessageSerialized; + private CountDownLatch latch; + private TierStorageMetadata tierStorageMetadata; + private AtomicBoolean isOk = new AtomicBoolean(false); + + /** + * @param callback Databus callback + * @param databusMessageSerialized + * @param latch + */ + CallbackAdapterTierStorage(final Callback callback, + final byte[] databusMessageSerialized, + final CountDownLatch latch, + final TierStorageMetadata tierStorageMetadata) { + this.callback = callback; + this.databusMessageSerialized = databusMessageSerialized; + this.latch = latch; + this.tierStorageMetadata = tierStorageMetadata; + } + + /** + * It is called as a send result. Then it is forwarded and adapted to databus callback + * + * @param recordMetadata Kafka RecordMetadata + * @param exception An exception thrown by Databus broker + */ + @Override + public void onCompletion(final org.apache.kafka.clients.producer.RecordMetadata recordMetadata, + final Exception exception) { + + if (exception != null) { + LOG.error("The record was not produced. " + exception.getMessage(), exception); + response(recordMetadata, exception); + return; + } + + try { + + tierStorage.put(tierStorageMetadata.getBucketName(), + tierStorageMetadata.getObjectName(), + databusMessageSerialized); + response(recordMetadata, exception); + } catch (DatabusClientRuntimeException databusException) { + LOG.error("The record was not produced. " + databusException.getMessage(), databusException); + response(recordMetadata, databusException); + } + } + + /** + * Send callback response + * + * @param recordMetadata recordMetadata + * @param exception exception + */ + private void response(final org.apache.kafka.clients.producer.RecordMetadata kafkaRecordMetadata, + final Exception exception) { + isOk.set(exception == null); + latch.countDown(); + final RecordMetadata databusRecordMetadata = + Optional.ofNullable(new RecordMetadata(kafkaRecordMetadata)) + .orElse(null); + callback.onCompletion(databusRecordMetadata, exception); + } + + protected boolean isOk() { + return isOk.get(); + } + } + + +} diff --git a/src/main/java/com/opendxl/databus/producer/Producer.java b/src/main/java/com/opendxl/databus/producer/Producer.java index 8e7288e..bcc6b25 100644 --- a/src/main/java/com/opendxl/databus/producer/Producer.java +++ b/src/main/java/com/opendxl/databus/producer/Producer.java @@ -30,7 +30,8 @@ import java.util.concurrent.TimeUnit; /** - * A abstract producer, responsible for handling Databus outgoing messages. + * A abstract producer, responsible for handling Databus outgoing messages. + * * @param

payload's type */ public abstract class Producer

{ @@ -43,7 +44,7 @@ public abstract class Producer

{ /** * A Kafka Serializer of {@link DatabusMessage}. */ - private org.apache.kafka.common.serialization.Serializer valueSerializer; + private org.apache.kafka.common.serialization.Serializer kafkaValueSerializer; /** * A configuration map for the producer. @@ -133,9 +134,9 @@ public void send(final ProducerRecord record) { * expensive callbacks it is recommended to use your own {@link java.util.concurrent.Executor} in the callback body * to parallelize processing. * - * @param producerRecord The non-null record to send - * @param callback A user-supplied callback to execute when the record has been acknowledged by the server (null - * indicates no callback) + * @param producerRecord The non-null record to send + * @param callback A user-supplied callback to execute when the record has been acknowledged by the server + * (null indicates no callback) * @throws IllegalArgumentException If record argumet is null * @throws DatabusClientRuntimeException If send method fails. The original cause could be any of these exceptions: *

SerializationException If the key or value are not valid objects @@ -161,13 +162,19 @@ public void send(final ProducerRecord

producerRecord, final Callback callback callbackAdapter = null; } - producer.send(targetProducerRecord, callbackAdapter); + sendKafkaRecord(targetProducerRecord, callbackAdapter); } catch (Exception e) { throw new DatabusClientRuntimeException("send cannot be performed: " + e.getMessage(), e, Producer.class); } } + protected void + sendKafkaRecord(final org.apache.kafka.clients.producer.ProducerRecord record, + final org.apache.kafka.clients.producer.Callback callback) { + producer.send(record, callback); + } + /** * Invoking this method makes all buffered records immediately available to send (even if linger.ms is * greater than 0) and blocks on the completion of the requests associated with these records. The post-condition @@ -216,7 +223,7 @@ public void flush() { * @param topic to get info * @return List of {@link PartitionInfo} * @throws DatabusClientRuntimeException If partitionsFor method fails. - * The original cause could be the following exception: + * The original cause could be the following exception: *

InterruptException If the thread is interrupted while blocked */ public List partitionsFor(final String topic) { @@ -301,7 +308,7 @@ public void close(final long timeout, final TimeUnit timeUnit) { * * @param keySerializer A DatabusKeySerializer Instance */ - protected void setKeySerializer(final DatabusKeySerializer keySerializer) { + protected void setKafkaKeySerializer(final DatabusKeySerializer keySerializer) { this.keySerializer = keySerializer; } @@ -311,14 +318,15 @@ protected void setKeySerializer(final DatabusKeySerializer keySerializer) { * @param valueSerializer A Serializer object instance for the value serializer */ protected void - setValueSerializer(final org.apache.kafka.common.serialization.Serializer valueSerializer) { - this.valueSerializer = valueSerializer; + setKafkaValueSerializer(final org.apache.kafka.common.serialization.Serializer + kafkaValueSerializer) { + this.kafkaValueSerializer = kafkaValueSerializer; } /** * Get the key serializer from producer * - * @return A {@link DatabusKeySerializer} object instance + * @return A {@link DatabusKeySerializer} object instance */ protected DatabusKeySerializer getKeySerializer() { return keySerializer; @@ -327,16 +335,16 @@ protected DatabusKeySerializer getKeySerializer() { /** * Get the value serializer from producer * - * @return A {@link org.apache.kafka.common.serialization.Serializer} object instance + * @return A {@link org.apache.kafka.common.serialization.Serializer} object instance */ - protected org.apache.kafka.common.serialization.Serializer getValueSerializer() { - return valueSerializer; + protected org.apache.kafka.common.serialization.Serializer getKafkaValueSerializer() { + return kafkaValueSerializer; } /** * Set a Kafka producer instance to the producer. * - * @return A {@link org.apache.kafka.clients.producer.Producer} object instance to set in the producer + * @return A {@link org.apache.kafka.clients.producer.Producer} object instance to set in the producer */ protected void setProducer(final org.apache.kafka.clients.producer.Producer producer) { this.producer = producer; @@ -351,6 +359,16 @@ protected void setDatabusProducerRecordAdapter(final DatabusProducerRecordAdapte this.databusProducerRecordAdapter = databusProducerRecordAdapter; } + + /** + * Set a {@link DatabusProducerRecordAdapter} associated to the producer. + * + * @param databusProducerRecordAdapter The {@link DatabusProducerRecordAdapter} to set to the producer + */ + protected DatabusProducerRecordAdapter

getDatabusProducerRecordAdapter() { + return this.databusProducerRecordAdapter; + } + /** * Set the clientId to the producer * @@ -406,7 +424,7 @@ public void setConfiguration(final Map configuration) { /** * Needs to be called before any other methods when the transactional.id is set in the configuration. - * + *

* This method does the following: * 1. Ensures any transactions initiated by previous instances of the producer with the same * transactional.id are completed. If the previous instance had failed with a transaction in @@ -471,7 +489,7 @@ public void beginTransaction() { * (via {@link Consumer#commitSync(Map) sync} or * {@link Consumer#commitAsync(OffsetCommitCallback)} commits). * - * @param offsets offsets + * @param offsets offsets * @param consumerGroupId consumer group id * @throws DatabusClientRuntimeException If method fails. The original cause could be any of these exceptions: *

IllegalStateException if no transactional.id has been configured or no transaction has been started @@ -486,7 +504,7 @@ public void beginTransaction() { * other unexpected error */ public void sendOffsetsToTransaction(final Map offsets, - final String consumerGroupId) { + final String consumerGroupId) { try { Map adaptedOffsets = new HashMap(); @@ -509,20 +527,20 @@ public void sendOffsetsToTransaction(final Map * Further, if any of the {@link #send(ProducerRecord)} calls which were part of the transaction hit irrecoverable * errors, this method will throw the last received exception immediately and the transaction will not be committed. * So all {@link #send(ProducerRecord)} calls in a transaction must succeed in order for this method to succeed. - * + *

* DatabusClientRuntimeException If method fails. The original cause could be any of these exceptions: *

IllegalStateException if no transactional.id has been configured or no transaction has been started *

ProducerFencedException fatal error indicating another producer with the same transactional.id is active *

org.apache.kafka.common.errors.UnsupportedVersionException fatal error indicating the broker - * does not support transactions (i.e. if its version is lower than 0.11.0.0) + * does not support transactions (i.e. if its version is lower than 0.11.0.0) *

org.apache.kafka.common.errors.AuthorizationException fatal error indicating that the configured - * transactional.id is not authorized. See the exception for more details + * transactional.id is not authorized. See the exception for more details *

KafkaException if the producer has encountered a previous fatal or abortable error, or for any - * other unexpected error + * other unexpected error */ public void commitTransaction() { try { @@ -544,9 +562,9 @@ public void commitTransaction() { *

IllegalStateException if no transactional.id has been configured or no transaction has been started *

ProducerFencedException fatal error indicating another producer with the same transactional.id is active *

org.apache.kafka.common.errors.UnsupportedVersionException fatal error indicating the broker - * does not support transactions (i.e. if its version is lower than 0.11.0.0) + * does not support transactions (i.e. if its version is lower than 0.11.0.0) *

org.apache.kafka.common.errors.AuthorizationException fatal error indicating that the configured - * transactional.id is not authorized. See the exception for more details + * transactional.id is not authorized. See the exception for more details *

KafkaException if the producer has encountered a previous fatal error or for any other unexpected error */ public void abortTransaction() { @@ -715,7 +733,7 @@ private ProducerMetric getMetricPerClientId(final ProducerMetricEnum producerMet /** * Gets a {@link ProducerMetric} given a Topic name and a {@link ProducerMetricEnum}. * - * @param topic The topic name. + * @param topic The topic name. * @param producerMetricEnum The {@link ProducerMetricEnum} to get the metric. * @return a {@link ProducerMetric} instance. */ diff --git a/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java index 9de0383..0c9a0e0 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/AvroMessageDeserializer.java @@ -83,18 +83,22 @@ public DatabusMessage deserialize(String topic, byte[] data, TierStorage tierSto if (tierStorage != null) { final String bucketName = headers.get(HeaderInternalField.TIER_STORAGE_BUCKET_NAME_KEY); final String objectName = headers.get(HeaderInternalField.TIER_STORAGE_OBJECT_NAME_KEY); - if (bucketName != null && objectName != null) { - byte[] object = null; + + if (bucketName != null && objectName != null && !bucketName.isEmpty() && !objectName.isEmpty()) { + byte[] tierStorageObjectContent = null; try { - object = tierStorage.get(bucketName, objectName); + tierStorageObjectContent = tierStorage.get(bucketName, objectName); } catch (Exception e) { LOG.error("Error when reading message from Tier Storage. Bucket Name: " + bucketName + "Object Name: " + objectName, e); } - if (object != null || object.length > 0) { - avroRecord = reader.read(null, DecoderFactory.get().binaryDecoder(object, null)); + if (tierStorageObjectContent != null && tierStorageObjectContent.length > 0) { + MessageStructure messageStructure = + MessageStructureFactory.getStructure(tierStorageObjectContent); + avroRecord = reader + .read(null, DecoderFactory.get().binaryDecoder(messageStructure.getPayload(), null)); headers = new HeadersAvroDeserializedAdapter().adapt(avroRecord.get("headers")); payload = new PayloadHeadersAvroDeserializedAdapter().adapt(avroRecord.get("payload")); } else { diff --git a/src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java b/src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java new file mode 100644 index 0000000..ef0e886 --- /dev/null +++ b/src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java @@ -0,0 +1,102 @@ +package com.opendxl.databus.entities; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import io.findify.s3mock.S3Mock; +import junit.extensions.PA; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.Arrays; + +public class S3TierStorageTest { + + private static final String AWS_SECRET_KEY = "secretKey"; + private static final String AWS_ACCESS_KEY = "accessKey"; + private static final String AWS_REGION = "us-east-1"; + private static S3Mock api; + private static AmazonS3Client client; + + + @BeforeClass + public static void beforeClass() { + api = new S3Mock.Builder().withPort(8001).withInMemoryBackend().build(); + api.start(); + AwsClientBuilder.EndpointConfiguration endpoint = + new AwsClientBuilder + .EndpointConfiguration("http://localhost:8001", "us-east-1"); + + client = (AmazonS3Client) AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withEndpointConfiguration(endpoint) + .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .build(); + } + + @AfterClass + public static void afterClass() { + api.shutdown(); // kills the underlying actor system. Use api.stop() to just unbind the port. + } + + @Test + public void shouldPutAngGetAnS3ObjectWithCredentials() { + + final String bucketName = "bucket-name"; + final String objectName = "object-name"; + final String objectRaw = "Hello!"; + final byte[] objectContent = objectRaw.getBytes(); + + try { + TierStorage tierStorage = new S3TierStorage(AWS_ACCESS_KEY, AWS_SECRET_KEY, AWS_REGION, + new ClientConfiguration()); + + PA.setValue(tierStorage, "s3Client", client); + tierStorage.put(bucketName, objectName, objectContent); + + Assert.assertTrue(tierStorage.doesObjectExist(bucketName, objectName)); + + + byte[] actualObjectContent = tierStorage.get(bucketName, objectName); + Assert.assertTrue(Arrays.equals(actualObjectContent, objectContent)); + String actualObjectRaw = new String(actualObjectContent); + Assert.assertTrue(actualObjectRaw.equals(objectRaw)); + + } catch (Exception e) { + Assert.fail(); + } + } + + @Test + public void shouldPutAngGetAnS3ObjectWithoutCredentials() { + + final String bucketName = "bucket-name"; + final String objectName = "object-name"; + final String objectRaw = "Hello!"; + final byte[] objectContent = objectRaw.getBytes(); + + try { + TierStorage tierStorage = new S3TierStorage(AWS_REGION, new ClientConfiguration()); + + PA.setValue(tierStorage, "s3Client", client); + tierStorage.put(bucketName, objectName, objectContent); + + Assert.assertTrue(tierStorage.doesObjectExist(bucketName, objectName)); + + byte[] actualObjectContent = tierStorage.get(bucketName, objectName); + Assert.assertTrue(Arrays.equals(actualObjectContent, objectContent)); + String actualObjectRaw = new String(actualObjectContent); + Assert.assertTrue(actualObjectRaw.equals(objectRaw)); + + } catch (Exception e) { + Assert.fail(); + } + } + +} \ No newline at end of file From 9d1def1f6ba3858bfa58b80894fa1beb071f9e45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20Villalba=C2=A0=20=C2=A0=20=C2=A0=20=C2=A0?= Date: Thu, 26 Mar 2020 14:41:02 -0300 Subject: [PATCH 16/20] DXL Tiered Stora: Add UT, code grooming, javadoc --- broker/src/broker/ClusterHelper.java | 1 + build.gradle | 6 +- .../sample/BasicConsumerProducerExample.java | 13 +- ...cS3TierStorageConsumerProducerExample.java | 46 +- .../adapter/DatabusProducerRecordAdapter.java | 14 +- .../opendxl/databus/entities/RoutingData.java | 7 +- .../databus/entities/S3TierStorage.java | 82 +++- .../producer/DatabusTierStorageProducer.java | 340 +++++++++++---- .../opendxl/databus/producer/Producer.java | 110 +++-- .../internal/AvroMessageDeserializer.java | 25 +- .../internal/InternalDeserializer.java | 7 + .../internal/LegacyMessageDeserializer.java | 17 +- .../internal/MessageDeserializer.java | 12 + .../internal/RawMessageDeserializer.java | 9 +- .../databus/entities/S3TierStorageTest.java | 4 +- ...3TierStorageTestForUnreachableService.java | 98 +++++ .../DatabusTierStorageProducerTest.java | 397 ++++++++++++++++++ ...tabusTierStorageProducerWithoutS3Test.java | 187 +++++++++ 18 files changed, 1212 insertions(+), 163 deletions(-) create mode 100644 src/test/java/com/opendxl/databus/entities/S3TierStorageTestForUnreachableService.java create mode 100644 src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerTest.java create mode 100644 src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerWithoutS3Test.java diff --git a/broker/src/broker/ClusterHelper.java b/broker/src/broker/ClusterHelper.java index 8080cbf..1e73d1d 100644 --- a/broker/src/broker/ClusterHelper.java +++ b/broker/src/broker/ClusterHelper.java @@ -136,6 +136,7 @@ private Properties getConfig(final int port) { config.setProperty("num.partitions", String.valueOf(6)); config.setProperty("transaction.state.log.replication.factor", String.valueOf(1)); config.setProperty("transaction.state.log.min.isr", String.valueOf(1)); + config.setProperty("log.message.format.version", "2.3.0"); } catch (Exception e) { e.printStackTrace(); } diff --git a/build.gradle b/build.gradle index 5615bb6..b24797e 100644 --- a/build.gradle +++ b/build.gradle @@ -66,15 +66,17 @@ dependencies { implementation 'org.scala-lang:scala-reflect:2.12.11' implementation 'org.apache.avro:avro:1.9.0' implementation 'com.google.code.gson:gson:2.8.5' + implementation 'org.apache.avro:avro:1.9.2' + implementation 'com.google.code.gson:gson:2.8.6' implementation 'org.apache.commons:commons-configuration2:2.7' implementation 'commons-lang:commons-lang:2.6' implementation 'org.slf4j:slf4j-api:1.7.30' implementation 'net.sf.jopt-simple:jopt-simple:5.0.4' implementation 'commons-io:commons-io:2.6' - implementation 'software.amazon.awssdk:bom:2.10.73' - implementation 'com.amazonaws:aws-java-sdk-s3:1.11.731' + implementation 'software.amazon.awssdk:bom:2.11.1' + implementation 'com.amazonaws:aws-java-sdk-s3:1.11.751' testImplementation 'org.apache.zookeeper:zookeeper:3.5.5' testImplementation('org.apache.kafka:kafka_2.12:2.3.1') { exclude group: 'org.scala-lang', module: 'scala-reflect' diff --git a/sample/src/sample/BasicConsumerProducerExample.java b/sample/src/sample/BasicConsumerProducerExample.java index 3ab16a9..d6bb204 100644 --- a/sample/src/sample/BasicConsumerProducerExample.java +++ b/sample/src/sample/BasicConsumerProducerExample.java @@ -4,7 +4,6 @@ package sample; -import broker.ClusterHelper; import com.opendxl.databus.common.RecordMetadata; import com.opendxl.databus.common.internal.builder.TopicNameBuilder; import com.opendxl.databus.consumer.*; @@ -47,11 +46,11 @@ public class BasicConsumerProducerExample { public BasicConsumerProducerExample() { // Start Kafka cluster - ClusterHelper - .getInstance() - .addBroker(9092) - .zookeeperPort(2181) - .start(); +// ClusterHelper +// .getInstance() +// .addBroker(9092) +// .zookeeperPort(2181) +// .start(); // Prepare a Producer this.producer = getProducer(); @@ -197,7 +196,7 @@ synchronized private void stopExample(final ExecutorService executor) { try { closed.set(true); consumer.wakeup(); - ClusterHelper.getInstance().stop(); +// ClusterHelper.getInstance().stop(); executor.shutdown(); executor.awaitTermination(5, TimeUnit.SECONDS); } catch (InterruptedException e) { diff --git a/sample/src/sample/BasicS3TierStorageConsumerProducerExample.java b/sample/src/sample/BasicS3TierStorageConsumerProducerExample.java index f68eae9..d89dfb5 100644 --- a/sample/src/sample/BasicS3TierStorageConsumerProducerExample.java +++ b/sample/src/sample/BasicS3TierStorageConsumerProducerExample.java @@ -8,18 +8,34 @@ import com.amazonaws.ClientConfiguration; import com.opendxl.databus.common.RecordMetadata; import com.opendxl.databus.common.internal.builder.TopicNameBuilder; -import com.opendxl.databus.consumer.*; -import com.opendxl.databus.entities.*; -import com.opendxl.databus.producer.*; +import com.opendxl.databus.consumer.Consumer; +import com.opendxl.databus.consumer.ConsumerConfiguration; +import com.opendxl.databus.consumer.ConsumerRecord; +import com.opendxl.databus.consumer.ConsumerRecords; +import com.opendxl.databus.consumer.DatabusConsumer; +import com.opendxl.databus.entities.Headers; +import com.opendxl.databus.entities.MessagePayload; +import com.opendxl.databus.entities.RoutingData; +import com.opendxl.databus.entities.S3TierStorage; +import com.opendxl.databus.entities.TierStorage; +import com.opendxl.databus.entities.TierStorageMetadata; +import com.opendxl.databus.producer.Callback; +import com.opendxl.databus.producer.DatabusTierStorageProducer; +import com.opendxl.databus.producer.Producer; +import com.opendxl.databus.producer.ProducerConfig; +import com.opendxl.databus.producer.ProducerRecord; import com.opendxl.databus.serialization.ByteArrayDeserializer; import com.opendxl.databus.serialization.ByteArraySerializer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; import java.nio.charset.Charset; import java.time.LocalDateTime; -import java.util.*; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.UUID; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -28,6 +44,9 @@ public class BasicS3TierStorageConsumerProducerExample { + private static final String AWS_REGION = "add-aws-region-name-here"; + private static final String S3_ACCESS_KEY = "add-your-access-key-here"; + private static final String S3_SECRET_KEY = "add-your-secret-key-here"; private final Producer producer; private final ExecutorService executor; private final TierStorage tierStorage; @@ -50,12 +69,12 @@ public BasicS3TierStorageConsumerProducerExample() { .zookeeperPort(2181) .start(); - + // Prepare a S3 Tiered Storage ClientConfiguration awsClientConfiguration = new ClientConfiguration(); - - this.tierStorage = new S3TierStorage("", - "", - "", awsClientConfiguration); + this.tierStorage = new S3TierStorage(AWS_REGION, + awsClientConfiguration, + S3_ACCESS_KEY, + S3_SECRET_KEY); // Prepare a Producer this.producer = getProducer(); @@ -77,8 +96,7 @@ public Producer getProducer() { config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString()); - config.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, true); - return new DatabusTierStorageProducer(config, new ByteArraySerializer(), null); + return new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); } public Consumer getConsumer() { @@ -158,7 +176,7 @@ private Runnable getConsumerTask() { consumer.unsubscribe(); try { consumer.close(); - } catch (IOException e) { + } catch (Exception e) { LOG.error(e.getMessage()); } LOG.info("Consumer closed"); @@ -172,10 +190,8 @@ public ProducerRecord getProducerRecord(final String topic, final byte[] String key = String.valueOf(System.currentTimeMillis()); TierStorageMetadata tStorageMetadata = new TierStorageMetadata("databus-poc-test", topic + key); RoutingData routingData = new RoutingData(topic, key, null, tStorageMetadata); - Headers headers = new Headers(); headers.put("k","v"); - MessagePayload messagePayload = new MessagePayload<>(payload); return new ProducerRecord<>(routingData, headers, messagePayload); } diff --git a/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java b/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java index 9b99273..101f8b2 100644 --- a/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java +++ b/src/main/java/com/opendxl/databus/common/internal/adapter/DatabusProducerRecordAdapter.java @@ -26,15 +26,15 @@ public final class DatabusProducerRecordAdapter

/** * The message deserializer. */ - private final Serializer

messageSerializer; + private final Serializer

userSerializer; /** * Constructor * - * @param messageSerializer a {@link Serializer} instance used for Serializing the payload. + * @param userSerializer a {@link Serializer} instance used for Serializing the payload. */ - public DatabusProducerRecordAdapter(final Serializer

messageSerializer) { - this.messageSerializer = messageSerializer; + public DatabusProducerRecordAdapter(final Serializer

userSerializer) { + this.userSerializer = userSerializer; } /** @@ -46,7 +46,7 @@ public DatabusProducerRecordAdapter(final Serializer

messageSerializer) { */ @Override public org.apache.kafka.clients.producer.ProducerRecord - adapt(final ProducerRecord sourceProducerRecord) { + adapt(final ProducerRecord

sourceProducerRecord) { final Headers clonedHeaders = sourceProducerRecord.getHeaders().clone(); @@ -60,7 +60,7 @@ public DatabusProducerRecordAdapter(final Serializer

messageSerializer) { } // Add internal headers to let consumer knows the payload is tiered storage - TierStorageMetadata tierStorageMetadata = sourceProducerRecord.getRoutingData().getTierStorageMetadata(); + final TierStorageMetadata tierStorageMetadata = sourceProducerRecord.getRoutingData().getTierStorageMetadata(); if (tierStorageMetadata != null && tierStorageMetadata.getBucketName() != null && !tierStorageMetadata.getBucketName().isEmpty() && tierStorageMetadata.getObjectName() != null && !tierStorageMetadata.getObjectName().isEmpty() @@ -70,7 +70,7 @@ public DatabusProducerRecordAdapter(final Serializer

messageSerializer) { } final DatabusMessage databusMessage = - new MessagePayloadAdapter

(messageSerializer) + new MessagePayloadAdapter<>(userSerializer) .adapt(sourceProducerRecord.payload(), clonedHeaders); final String targetTopic = diff --git a/src/main/java/com/opendxl/databus/entities/RoutingData.java b/src/main/java/com/opendxl/databus/entities/RoutingData.java index 76ed3b1..7854cf1 100644 --- a/src/main/java/com/opendxl/databus/entities/RoutingData.java +++ b/src/main/java/com/opendxl/databus/entities/RoutingData.java @@ -16,7 +16,8 @@ * Represent a address where a message must be sent. * It is used by {@link ProducerRecord} * to know what the destination is. - * It contains a mandatory topic name as well as optionals sharding key and tenant group and partitions. + * It contains a mandatory topic name as well as optionals sharding key, tenant group, + * partitions and tier storage metadata. *

*

* See how to use in {@link DatabusProducer} example @@ -37,12 +38,12 @@ public class RoutingData { /** * The topic name */ - private String topic = null; + private String topic; /** * The sharding key value */ - private String shardingKey = null; + private String shardingKey; /** * The tenant group diff --git a/src/main/java/com/opendxl/databus/entities/S3TierStorage.java b/src/main/java/com/opendxl/databus/entities/S3TierStorage.java index a99bdf6..a04f4ac 100644 --- a/src/main/java/com/opendxl/databus/entities/S3TierStorage.java +++ b/src/main/java/com/opendxl/databus/entities/S3TierStorage.java @@ -9,7 +9,6 @@ import com.amazonaws.services.s3.internal.Mimetypes; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PutObjectResult; import com.amazonaws.services.s3.model.S3Object; import com.opendxl.databus.exception.DatabusClientRuntimeException; import org.apache.commons.io.IOUtils; @@ -19,6 +18,11 @@ import java.io.ByteArrayInputStream; import java.io.InputStream; +/** + * It is a built-in AWS S3 Tier Storage. + * + * It implements mechanisms to upload and download AWS S3 objects + */ public class S3TierStorage implements TierStorage { /** @@ -31,6 +35,13 @@ public class S3TierStorage implements TierStorage { */ private AmazonS3 s3Client; + /** + * Constructor used to create a role-based authenticated tier storage instance. + * + * @param awsRegion AWS region + * @param config AWS client configuration + * @throws DatabusClientRuntimeException exception if the underlying AWS S3 client cannot be created + */ public S3TierStorage(final String awsRegion, final ClientConfiguration config) { @@ -43,15 +54,26 @@ public S3TierStorage(final String awsRegion, try { this.s3Client = s3Builder.build(); } catch (Exception e) { - e.printStackTrace(); - + final String errMsg = "Error creating a S3 Tier Storage. Region: " + awsRegion + " " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); } } - public S3TierStorage(final String awsAccessKey, - final String awsSecretKey, - final String awsRegion, - final ClientConfiguration config) { + + /** + * Constructor used to create a tier storage instance with AWS access and secret key + * + * @param awsRegion AWS region + * @param config AWS client configuration + * @param awsAccessKey AWS access key + * @param awsSecretKey AWS secret key + * @throws DatabusClientRuntimeException exception if the underlying AWS S3 client cannot be created + */ + public S3TierStorage(final String awsRegion, + final ClientConfiguration config, + final String awsAccessKey, + final String awsSecretKey) { AmazonS3ClientBuilder s3Builder = AmazonS3ClientBuilder.standard(); @@ -66,16 +88,24 @@ public S3TierStorage(final String awsAccessKey, try { this.s3Client = s3Builder.build(); } catch (Exception e) { - e.printStackTrace(); + final String errMsg = "Error creating a S3 Tier Storage. Region: " + awsRegion + " " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); } - - } + + /** + * Upload a object to AWS S3 bucket + * + * @param s3BucketName AWS S3 bucket + * @param s3KeyName AWS S3 object name + * @param payload AWS object content + * @throws DatabusClientRuntimeException exception if the underlying AWS S3 fails. + * + */ @Override - public void put(final String s3BucketName, - final String s3KeyName, - final byte[] payload) { + public void put(final String s3BucketName, final String s3KeyName, final byte[] payload) { try { if (!s3Client.doesBucketExistV2(s3BucketName)) { @@ -86,7 +116,7 @@ public void put(final String s3BucketName, metadata.setContentLength(payload.length); metadata.setContentType(Mimetypes.MIMETYPE_HTML); InputStream s3Object = new ByteArrayInputStream(payload); - PutObjectResult putObjectResult = s3Client.putObject(s3BucketName, s3KeyName, s3Object, metadata); + s3Client.putObject(s3BucketName, s3KeyName, s3Object, metadata); } catch (Exception e) { final String errMsg = "Error uploading S3 object: Bucket: " + " Object: " @@ -98,19 +128,37 @@ public void put(final String s3BucketName, } - public boolean doesObjectExist(String s3BucketName, String s3KeyName) { + /** + * Check if an AWS S3 object exists + * + * @param s3BucketName AWS S3 bucket + * @param s3KeyName AWS object name + * @return a boolean + * @throws DatabusClientRuntimeException exception if the underlying AWS S3 fails. + */ + @Override + public boolean doesObjectExist(final String s3BucketName, final String s3KeyName) { try { return s3Client.doesObjectExist(s3BucketName, s3KeyName); } catch (Exception e) { - final String errMsg = "Error trying to reach S3 object: Bucket: " + " Object: " + s3KeyName + " " + final String errMsg = "Error trying to find a S3 object: Bucket: " + " Object: " + s3KeyName + " " + e.getMessage(); LOG.error(errMsg, e); throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); } } + + /** + * Download a AWS S3 object content + * + * @param s3BucketName AWS S3 bucket name + * @param s3KeyName AWS S3 object name + * @return the object content + * @throws DatabusClientRuntimeException exception if the underlying AWS S3 fails. + */ @Override - public byte[] get(String s3BucketName, String s3KeyName) { + public byte[] get(final String s3BucketName, final String s3KeyName) { try { S3Object s3Object = s3Client.getObject(new GetObjectRequest(s3BucketName, s3KeyName)); return IOUtils.toByteArray(s3Object.getObjectContent()); diff --git a/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java b/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java index 7a3ee8a..adccea8 100644 --- a/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java +++ b/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java @@ -15,48 +15,96 @@ import com.opendxl.databus.exception.DatabusClientRuntimeException; import com.opendxl.databus.serialization.Serializer; import com.opendxl.databus.serialization.internal.MessageSerializer; +import org.apache.kafka.common.errors.AuthorizationException; +import org.apache.kafka.common.errors.OutOfOrderSequenceException; +import org.apache.kafka.common.errors.ProducerFencedException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Map; -import java.util.Optional; import java.util.Properties; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; - +/** + * It writes a Message to kafka and stores Payload and Header in a Tier Storage. The kafka message is used like + * offsets control and to point to payload which is stored in the Tier Storage. + * + * @param

Payload's type, tipically a byte[] + */ public class DatabusTierStorageProducer

extends DatabusProducer

{ + /** + * The logger + */ private static final Logger LOG = LoggerFactory.getLogger(DatabusTierStorageProducer.class); + /** + * Used to save the message in a separated store + */ private TierStorage tierStorage; + + /** + * Transform a user payload in a {@link DatabusMessage} + */ private MessagePayloadAdapter

messagePayloadAdapter; + /** + * Constructor + * + * @param configs Producer configuration + * @param userSerializer user serializer + * @param tierStorage tier storage + */ public DatabusTierStorageProducer(final Map configs, final Serializer

userSerializer, final TierStorage tierStorage) { this(configs, userSerializer, null, tierStorage); } + /** + * Constructor + * + * @param configs producer configuration + * @param userSerializer user serializer + * @param credential credentials + * @param tierStorage tier storage + */ public DatabusTierStorageProducer(final Map configs, final Serializer

userSerializer, final Credential credential, final TierStorage tierStorage) { super(configs, userSerializer, credential); if (tierStorage == null) { throw new IllegalArgumentException("Tier Storage cannot be null"); } + validateConfiguration(configs); this.tierStorage = tierStorage; setFieldMembers(userSerializer); initTransactions(); } + /** + * Constructor + * + * @param properties producer configuration + * @param userSerializer user serializer + * @param tierStorage tier storage + */ public DatabusTierStorageProducer(final Properties properties, final Serializer

userSerializer, final TierStorage tierStorage) { this(properties, userSerializer, null, tierStorage); } + /** + * Constructor + * + * @param properties producer configuration + * @param userSerializer user serializer + * @param credential credential + * @param tierStorage tier storage + */ public DatabusTierStorageProducer(final Properties properties, final Serializer

userSerializer, final Credential credential, final TierStorage tierStorage) { @@ -64,88 +112,215 @@ public DatabusTierStorageProducer(final Properties properties, final Serializer< if (tierStorage == null) { throw new IllegalArgumentException("Tier Storage cannot be null"); } + validateConfiguration(properties); + this.tierStorage = tierStorage; setFieldMembers(userSerializer); initTransactions(); } private void setFieldMembers(Serializer

userSerializer) { setKafkaValueSerializer(new MessageSerializer()); // The serializer used bu Kafka - this.messagePayloadAdapter = new MessagePayloadAdapter

(userSerializer); - setDatabusProducerRecordAdapter(new DatabusProducerRecordAdapter

(userSerializer)); + this.messagePayloadAdapter = new MessagePayloadAdapter<>(userSerializer); + setDatabusProducerRecordAdapter(new DatabusProducerRecordAdapter<>(userSerializer)); } + private void validateConfiguration(final Map config) { + Properties properties = new Properties(); + try { + properties.putAll(config); + } catch (Exception e) { + throw new IllegalArgumentException("Producer configuration is invalid ERROR:" + e.getMessage()); + } + validateConfiguration(properties); + } - @Override - public void send(final ProducerRecord record) { - send(record, null); + private void validateConfiguration(final Properties config) { + if (config.get(ProducerConfig.TRANSACTIONAL_ID_CONFIG) == null) { + throw new IllegalArgumentException("Transaction Id cannot be null or empty"); + } + final String transactionId = config.get(ProducerConfig.TRANSACTIONAL_ID_CONFIG).toString(); + if (transactionId == null || transactionId.trim().isEmpty()) { + throw new IllegalArgumentException("Transaction Id cannot be null or empty"); + } } + /** + * It writes a Message to kafka and stores Payload and Header in Tier Storage. + * The kafka message has headers information pointing to Tier Storage payload. + * Both operation are in the same tansaction. If something goes wrong, they will be consistently aborted + * + * @param producerRecord producer record + */ @Override - public void send(ProducerRecord

producerRecord, final Callback callback) { + public void send(final ProducerRecord

producerRecord) { + try { + validateTierStorageMetadata(producerRecord); + + // Get the Tier Storage from RoutindData which was already created by the user + final TierStorageMetadata tierStorageMetadata = + producerRecord.getRoutingData().getTierStorageMetadata(); + + // Serialize the producerRecord payload to be stored with TieredStorage + // when callback being invoked by Kafka + final DatabusMessage databusMessage = + messagePayloadAdapter.adapt(producerRecord.payload(), producerRecord.getHeaders()); + final byte[] databusMessageSerialized = getKafkaValueSerializer().serialize("", databusMessage); + + // Remove the producerRecord payload to be written in kafka. + final ProducerRecord

adaptedProducerRecord = new ProducerRecord<>(producerRecord.getRoutingData(), + producerRecord.getHeaders(), + new MessagePayload<>(null)); + + // Transform a Databus ProducerRecord in a Kafka Producer Record + org.apache.kafka.clients.producer.ProducerRecord targetProducerRecord = + getDatabusProducerRecordAdapter().adapt(adaptedProducerRecord); + + try { + beginTransaction(); + super.sendKafkaRecord(targetProducerRecord); + tierStorage.put(tierStorageMetadata.getBucketName(), + tierStorageMetadata.getObjectName(), + databusMessageSerialized); + commitTransaction(); + LOG.info("Send Ok. Message was sent and payload was stored in Tier Storage"); + } catch (ProducerFencedException | OutOfOrderSequenceException | AuthorizationException e) { + super.flush(); + super.close(); + final String errMsg = "Send cannot be performed. Producer throws an irrecoverable exception " + + "during a transaction. Producer is closed effective immediately. ERROR:" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); + + } catch (Exception e) { + abortTransaction(); + final String errMsg = "Send cannot be performed. Producer throws an exception during a transaction. " + + "Producer continues active. Message should be sent again to retry. ERROR:" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); + } + } catch (Exception e) { + final String errMsg = "send cannot be performed: ERROR:" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); + } + + } + protected void validateTierStorageMetadata(ProducerRecord

producerRecord) { if (producerRecord.getRoutingData().getTierStorageMetadata() == null - || producerRecord.getRoutingData().getTierStorageMetadata().getBucketName() == null - || producerRecord.getRoutingData().getTierStorageMetadata().getBucketName().isEmpty() - || producerRecord.getRoutingData().getTierStorageMetadata().getObjectName() == null - || producerRecord.getRoutingData().getTierStorageMetadata().getObjectName().isEmpty() + || producerRecord.getRoutingData().getTierStorageMetadata().getBucketName() == null + || producerRecord.getRoutingData().getTierStorageMetadata().getBucketName().isEmpty() + || producerRecord.getRoutingData().getTierStorageMetadata().getObjectName() == null + || producerRecord.getRoutingData().getTierStorageMetadata().getObjectName().isEmpty() ) { - final String errMsg = "Bucket metadatada is invalid"; + final String errMsg = "Send cannot be performed. Bucket metadatada is invalid"; LOG.error(errMsg); throw new IllegalArgumentException(errMsg); } + } - final TierStorageMetadata tierStorageMetadata = - producerRecord.getRoutingData().getTierStorageMetadata(); + /** + * It writes a Message to kafka and stores Payload in Tier Storage. + * The kafka message has headers information pointing to Tier Storage payload. So that a Consumer can recover + * Both operation are in the same tansaction. If something goes wrong, they will be consistently aborted + * + * @param producerRecord The non-null record to send + * @param callback A user-supplied callback to execute when the record has been acknowledged by the server + * (null indicates no callback) + */ + @Override + public void send(ProducerRecord

producerRecord, final Callback callback) { - // Serialize the producerRecord payload to be stored with TieredStorage when callback being invoked by Kafka - final DatabusMessage databusMessage = - messagePayloadAdapter.adapt(producerRecord.payload(), producerRecord.getHeaders()); - final byte[] databusMessageSerialized = getKafkaValueSerializer().serialize("", databusMessage); + validateTierStorageMetadata(producerRecord); - // Remove the producerRecord headers and payload - final ProducerRecord

adaptedProducerRecord = new ProducerRecord(producerRecord.getRoutingData(), - producerRecord.getHeaders(), - new MessagePayload(null)); + if (callback == null) { + final String errMsg = "Send cannot be performed. Producer Callback is invalid"; + LOG.error(errMsg); + throw new IllegalArgumentException(errMsg); + } + + try { + + // Get the Tier Storage from RoutindData which was already created by the user + final TierStorageMetadata tierStorageMetadata = + producerRecord.getRoutingData().getTierStorageMetadata(); + + // Serialize the producerRecord payload to be stored with TieredStorage when callback being invoked by Kafka + final DatabusMessage databusMessage = + messagePayloadAdapter.adapt(producerRecord.payload(), producerRecord.getHeaders()); + final byte[] kafkaValueSerializer = getKafkaValueSerializer().serialize("", databusMessage); + // Remove the producerRecord payload to be written in kafka. + final ProducerRecord

adaptedProducerRecord = new ProducerRecord<>(producerRecord.getRoutingData(), + producerRecord.getHeaders(), + new MessagePayload<>(null)); - // Get a Kafka Producer Record made up by a DatabusMessage: - // version = AVRO_1_S3_TIER_STORAGE_VERSION_NUMBER - // headers = empty - // payload = empty - org.apache.kafka.clients.producer.ProducerRecord targetProducerRecord = - getDatabusProducerRecordAdapter().adapt(adaptedProducerRecord); + // Transform a Databus ProducerRecord in a Kafka Producer Record + org.apache.kafka.clients.producer.ProducerRecord targetProducerRecord = + getDatabusProducerRecordAdapter().adapt(adaptedProducerRecord); - // Create the callback - CountDownLatch latch = new CountDownLatch(1); - final CallbackAdapterTierStorage callbackAdapterTierStorage; - if (callback != null) { + // Create the callback + CountDownLatch latch = new CountDownLatch(1); + final CallbackAdapterTierStorage callbackAdapterTierStorage; callbackAdapterTierStorage = new CallbackAdapterTierStorage(callback, - databusMessageSerialized, + kafkaValueSerializer, latch, tierStorageMetadata); - } else { - callbackAdapterTierStorage = null; - } - beginTransaction(); - super.sendKafkaRecord(targetProducerRecord, callbackAdapterTierStorage); - try { - // wait for callback ends - final boolean callbackFinished = latch.await(10000, TimeUnit.MILLISECONDS); - if (callbackFinished) { - if (callbackAdapterTierStorage.isOk()) { - commitTransaction(); - } else { + try { + beginTransaction(); + super.sendKafkaRecord(targetProducerRecord, callbackAdapterTierStorage); + // wait for callback ends + final boolean callbackFinished = latch.await(10000, TimeUnit.MILLISECONDS); + if (callbackFinished) { // means the callback finished before timeout + if (callbackAdapterTierStorage.isMessageAndPayloadStored()) { + commitTransaction(); + LOG.info("Send OK. Message was sent and payload was stored in Tier Storage"); + } else { // means something was wrong in kafka or tier storage + abortTransaction(); // Logging is already performed in the Callback + throw new DatabusClientRuntimeException("Send cannot be performed. Record not produced. " + + "Something was wrong producing the message in Kafka or " + + " storing the payload in Tier Storage", this.getClass()); + } + } else { // means that the callback has not finished in time abortTransaction(); + final String errMsg = "Send cannot be performed. Record not produced. " + + "Timeout: Too long time taken by Kafka or Tier Storage."; + LOG.error(errMsg); + throw new DatabusClientRuntimeException(errMsg, this.getClass()); } - } else { // means that the callback has not finished in time - LOG.error("Record not produced. Too long time taken by tier storage."); + } catch (InterruptedException e) { + abortTransaction(); + final String errMsg = "Send cannot be performed. Producer was interrupted while " + + "waiting for a Callback response. " + + "Producer continues active. Message should be sent again to retry. ERROR:" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); + } catch (ProducerFencedException | OutOfOrderSequenceException | AuthorizationException e) { + super.flush(); + super.close(); + final String errMsg = "Send cannot be performed. Producer throws an irrecoverable exception " + + "during a transaction. Producer is closed effective immediately. ERROR:" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, this.getClass()); + + } catch (Exception e) { abortTransaction(); + final String errMsg = "Producer throws an exception during a transaction. " + + "Producer continues active. Message should be sent again to retry. ERROR:" + e.getMessage(); + LOG.error(errMsg); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } - } catch (InterruptedException e) { - abortTransaction(); + } catch (Exception e) { + if (e instanceof DatabusClientRuntimeException) { + throw e; + } + final String errMsg = "Send cannot be performed: " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } + } @@ -155,23 +330,42 @@ public void send(ProducerRecord

producerRecord, final Callback callback) { * It forwards a kafka callback to databus callback */ private class CallbackAdapterTierStorage implements org.apache.kafka.clients.producer.Callback { - private final Callback callback; - private final byte[] databusMessageSerialized; + /** + * Callback defined by the user when invoking send method + */ + private final Callback userCallback; + + /** + * the kafka value serializer + */ + private final byte[] kafkaValueSerializer; + + /** + * An object to signal when callback has finished + */ private CountDownLatch latch; + + /** + * The Tier Storage in charged to store payload + */ private TierStorageMetadata tierStorageMetadata; - private AtomicBoolean isOk = new AtomicBoolean(false); /** - * @param callback Databus callback - * @param databusMessageSerialized - * @param latch + * storage operation result */ - CallbackAdapterTierStorage(final Callback callback, - final byte[] databusMessageSerialized, + private AtomicBoolean isMessageAndPayloadStored = new AtomicBoolean(false); + + /** + * @param userCallback user callback + * @param kafkaValueSerializer kafka serializer + * @param latch a object to signal when callback + */ + CallbackAdapterTierStorage(final Callback userCallback, + final byte[] kafkaValueSerializer, final CountDownLatch latch, final TierStorageMetadata tierStorageMetadata) { - this.callback = callback; - this.databusMessageSerialized = databusMessageSerialized; + this.userCallback = userCallback; + this.kafkaValueSerializer = kafkaValueSerializer; this.latch = latch; this.tierStorageMetadata = tierStorageMetadata; } @@ -187,7 +381,8 @@ public void onCompletion(final org.apache.kafka.clients.producer.RecordMetadata final Exception exception) { if (exception != null) { - LOG.error("The record was not produced. " + exception.getMessage(), exception); + LOG.error("Send cannot be performed. The record was not produced. ERROR:" + + exception.getMessage(), exception); response(recordMetadata, exception); return; } @@ -196,10 +391,11 @@ public void onCompletion(final org.apache.kafka.clients.producer.RecordMetadata tierStorage.put(tierStorageMetadata.getBucketName(), tierStorageMetadata.getObjectName(), - databusMessageSerialized); + kafkaValueSerializer); response(recordMetadata, exception); } catch (DatabusClientRuntimeException databusException) { - LOG.error("The record was not produced. " + databusException.getMessage(), databusException); + LOG.error("Send cannot be performed. The record was not produced. ERROR:" + + databusException.getMessage(), databusException); response(recordMetadata, databusException); } } @@ -207,21 +403,23 @@ public void onCompletion(final org.apache.kafka.clients.producer.RecordMetadata /** * Send callback response * - * @param recordMetadata recordMetadata + * @param kafkaRecordMetadata recordMetadata * @param exception exception */ private void response(final org.apache.kafka.clients.producer.RecordMetadata kafkaRecordMetadata, final Exception exception) { - isOk.set(exception == null); + isMessageAndPayloadStored.set(exception == null); latch.countDown(); - final RecordMetadata databusRecordMetadata = - Optional.ofNullable(new RecordMetadata(kafkaRecordMetadata)) - .orElse(null); - callback.onCompletion(databusRecordMetadata, exception); + + RecordMetadata databusRecordMetadata = null; + if (kafkaRecordMetadata != null) { + databusRecordMetadata = new RecordMetadata(kafkaRecordMetadata); + } + userCallback.onCompletion(databusRecordMetadata, exception); } - protected boolean isOk() { - return isOk.get(); + protected boolean isMessageAndPayloadStored() { + return isMessageAndPayloadStored.get(); } } diff --git a/src/main/java/com/opendxl/databus/producer/Producer.java b/src/main/java/com/opendxl/databus/producer/Producer.java index bcc6b25..81b1207 100644 --- a/src/main/java/com/opendxl/databus/producer/Producer.java +++ b/src/main/java/com/opendxl/databus/producer/Producer.java @@ -4,8 +4,6 @@ package com.opendxl.databus.producer; -import com.opendxl.databus.consumer.Consumer; -import com.opendxl.databus.exception.DatabusClientRuntimeException; import com.opendxl.databus.common.MetricName; import com.opendxl.databus.common.PartitionInfo; import com.opendxl.databus.common.RecordMetadata; @@ -13,20 +11,24 @@ import com.opendxl.databus.common.internal.adapter.DatabusProducerRecordAdapter; import com.opendxl.databus.common.internal.adapter.MetricNameMapAdapter; import com.opendxl.databus.common.internal.adapter.PartitionInfoListAdapter; +import com.opendxl.databus.consumer.Consumer; import com.opendxl.databus.consumer.OffsetAndMetadata; import com.opendxl.databus.consumer.OffsetCommitCallback; import com.opendxl.databus.entities.internal.DatabusMessage; +import com.opendxl.databus.exception.DatabusClientRuntimeException; import com.opendxl.databus.producer.metric.ProducerMetric; import com.opendxl.databus.producer.metric.ProducerMetricBuilder; import com.opendxl.databus.producer.metric.ProducerMetricEnum; import com.opendxl.databus.serialization.internal.DatabusKeySerializer; import org.apache.commons.lang.StringUtils; -import org.slf4j.LoggerFactory; import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import java.util.Map; -import java.util.List; +import java.time.Duration; +import java.time.temporal.TemporalUnit; import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.concurrent.TimeUnit; /** @@ -91,7 +93,7 @@ public Map getConfiguration() { * and the buffer is full. *

InterruptException If the thread is interrupted while blocked */ - public void send(final ProducerRecord record) { + public void send(final ProducerRecord

record) { send(record, null); } @@ -165,7 +167,9 @@ public void send(final ProducerRecord

producerRecord, final Callback callback sendKafkaRecord(targetProducerRecord, callbackAdapter); } catch (Exception e) { - throw new DatabusClientRuntimeException("send cannot be performed: " + e.getMessage(), e, Producer.class); + final String errMsg = "send cannot be performed: " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } @@ -175,6 +179,11 @@ public void send(final ProducerRecord

producerRecord, final Callback callback producer.send(record, callback); } + protected void + sendKafkaRecord(final org.apache.kafka.clients.producer.ProducerRecord record) { + producer.send(record); + } + /** * Invoking this method makes all buffered records immediately available to send (even if linger.ms is * greater than 0) and blocks on the completion of the requests associated with these records. The post-condition @@ -212,7 +221,9 @@ public void flush() { try { producer.flush(); } catch (Exception e) { - throw new DatabusClientRuntimeException("flush cannot be performed :" + e.getMessage(), e, Producer.class); + final String errMsg = "flush cannot be performed :" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } @@ -231,8 +242,9 @@ public List partitionsFor(final String topic) { List partitions = producer.partitionsFor(topic); return new PartitionInfoListAdapter().adapt(partitions); } catch (Exception e) { - throw new DatabusClientRuntimeException("partitionsFor cannot be performed :" - + e.getMessage(), e, Producer.class); + final String errMsg = "partitionsFor cannot be performed: " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } @@ -249,8 +261,9 @@ public List partitionsFor(final String topic) { return new MetricNameMapAdapter().adapt(metrics); } catch (Exception e) { - throw new DatabusClientRuntimeException("metrics cannot be performed :" - + e.getMessage(), e, Producer.class); + final String errMsg = "metrics cannot be performed: " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } @@ -272,7 +285,9 @@ public void close() { try { producer.close(); } catch (Exception e) { - throw new DatabusClientRuntimeException("close cannot be performed :" + e.getMessage(), e, Producer.class); + final String errMsg = "close cannot be performed :" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } @@ -294,15 +309,48 @@ public void close() { *

InterruptException If the thread is interrupted while blocked *

IllegalArgumentException If the timeout is negative. */ + @Deprecated public void close(final long timeout, final TimeUnit timeUnit) { try { producer.close(timeout, timeUnit); } catch (Exception e) { - throw new DatabusClientRuntimeException("close cannot be performed :" + e.getMessage(), e, Producer.class); + final String errMsg = "close cannot be performed :" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } + /** + * This method waits up to timeout for the producer to complete the sending of all incomplete requests. + *

+ * If the producer is unable to complete all requests before the timeout expires, this method will fail + * any unsent and unacknowledged records immediately. + *

+ * If invoked from within a {@link Callback} this method will not block and will be equivalent to + * close(0, TimeUnit.MILLISECONDS). This is done since no further sending will happen while + * blocking the I/O thread of the producer. + * + * @param duration The maximum time to wait for producer to complete any pending requests. The value should be + * non-negative. Specifying a timeout of zero means do not wait for pending send + * requests to complete. + * @param timeUnit The time unit for the timeoutl + * @throws DatabusClientRuntimeException If close method fails. The original cause could be any of these exceptions: + *

InterruptException If the thread is interrupted while blocked + *

IllegalArgumentException If the timeout is negative. + */ + public void close(long duration, TemporalUnit timeUnit) { + try { + producer.close(Duration.of(duration, timeUnit)); + } catch (Exception e) { + final String errMsg = "close cannot be performed :" + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); + } + + } + + /** * Set the DatabusKeySerializer in producer * @@ -315,7 +363,7 @@ protected void setKafkaKeySerializer(final DatabusKeySerializer keySerializer) { /** * Set the value serializer in producer * - * @param valueSerializer A Serializer object instance for the value serializer + * @param kafkaValueSerializer A Serializer object instance for the value serializer */ protected void setKafkaValueSerializer(final org.apache.kafka.common.serialization.Serializer @@ -343,8 +391,7 @@ protected org.apache.kafka.common.serialization.Serializer getKa /** * Set a Kafka producer instance to the producer. - * - * @return A {@link org.apache.kafka.clients.producer.Producer} object instance to set in the producer + * @param producer Producer */ protected void setProducer(final org.apache.kafka.clients.producer.Producer producer) { this.producer = producer; @@ -360,10 +407,10 @@ protected void setDatabusProducerRecordAdapter(final DatabusProducerRecordAdapte } + /** - * Set a {@link DatabusProducerRecordAdapter} associated to the producer. * - * @param databusProducerRecordAdapter The {@link DatabusProducerRecordAdapter} to set to the producer + * @return Databus producer adapter */ protected DatabusProducerRecordAdapter

getDatabusProducerRecordAdapter() { return this.databusProducerRecordAdapter; @@ -445,8 +492,9 @@ public void initTransactions() { try { producer.initTransactions(); } catch (Exception e) { - throw new DatabusClientRuntimeException("initTransactions cannot be performed: " - + e.getMessage(), e, Producer.class); + final String errMsg = "initTransactions cannot be performed: " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } @@ -468,8 +516,9 @@ public void beginTransaction() { try { producer.beginTransaction(); } catch (Exception e) { - throw new DatabusClientRuntimeException("beginTransaction cannot be performed: " - + e.getMessage(), e, Producer.class); + final String errMsg = "beginTransaction cannot be performed: " + e.getMessage(); + LOG.error(errMsg, e); + throw new DatabusClientRuntimeException(errMsg, e, Producer.class); } } @@ -519,8 +568,9 @@ public void sendOffsetsToTransaction(final Map { */ P deserialize(String topic, byte[] data); + /** + * + * @param topic the topic where the message comes from + * @param data data to be deserialized + * @param tierStorage tier storage where the payload should be read + * @return data of type P + */ P deserialize(String topic, byte[] data, TierStorage tierStorage); diff --git a/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java index db85664..d4af046 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/LegacyMessageDeserializer.java @@ -33,6 +33,13 @@ public final class LegacyMessageDeserializer implements InternalDeserializer { + /** + * Tier Storage + */ private TierStorage tierStorage; + /** + * Constructor + * + * @param tierStorage If null it will be ignored and payload won't be read + */ public MessageDeserializer(final TierStorage tierStorage) { this.tierStorage = tierStorage; } + /** + * Constructor + */ public MessageDeserializer() { this(null); } @@ -35,6 +46,7 @@ public void configure(final Map map, final boolean b) { /** * Deserialize a message to a {@link DatabusMessage} + * If tierStorage is not null will be used to read the payload from the underlying Tier Storage. * * @param topic The topic name. * @param serializedMessage A serialized message. diff --git a/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java b/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java index 964d5fa..9659fc5 100644 --- a/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java +++ b/src/main/java/com/opendxl/databus/serialization/internal/RawMessageDeserializer.java @@ -25,8 +25,15 @@ public DatabusMessage deserialize(final String topic, final byte[] data) { return new DatabusMessage(null, data); } + /** + * + * @param topic the topic where the message comes from + * @param data data to be deserialized + * @param tierStorage It is not used + * @return A {@link DatabusMessage} with binary data as is. + */ @Override - public DatabusMessage deserialize(String topic, byte[] data, TierStorage tierStorage) { + public DatabusMessage deserialize(final String topic, final byte[] data, final TierStorage tierStorage) { return deserialize(topic, data); } } diff --git a/src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java b/src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java index ef0e886..d2eccc5 100644 --- a/src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java +++ b/src/test/java/com/opendxl/databus/entities/S3TierStorageTest.java @@ -54,8 +54,8 @@ public void shouldPutAngGetAnS3ObjectWithCredentials() { final byte[] objectContent = objectRaw.getBytes(); try { - TierStorage tierStorage = new S3TierStorage(AWS_ACCESS_KEY, AWS_SECRET_KEY, AWS_REGION, - new ClientConfiguration()); + TierStorage tierStorage = new S3TierStorage(AWS_REGION,new ClientConfiguration(), + AWS_ACCESS_KEY, AWS_SECRET_KEY); PA.setValue(tierStorage, "s3Client", client); tierStorage.put(bucketName, objectName, objectContent); diff --git a/src/test/java/com/opendxl/databus/entities/S3TierStorageTestForUnreachableService.java b/src/test/java/com/opendxl/databus/entities/S3TierStorageTestForUnreachableService.java new file mode 100644 index 0000000..af4a779 --- /dev/null +++ b/src/test/java/com/opendxl/databus/entities/S3TierStorageTestForUnreachableService.java @@ -0,0 +1,98 @@ +package com.opendxl.databus.entities; + +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.opendxl.databus.exception.DatabusClientRuntimeException; +import io.findify.s3mock.S3Mock; +import junit.extensions.PA; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +public class S3TierStorageTestForUnreachableService { + + private static final String AWS_SECRET_KEY = "secretKey"; + private static final String AWS_ACCESS_KEY = "accessKey"; + private static final String AWS_REGION = "us-east-1"; + private static S3Mock api; + private static AmazonS3Client client; + + + @BeforeClass + public static void beforeClass() { + api = new S3Mock.Builder().withPort(8001).withInMemoryBackend().build(); + AwsClientBuilder.EndpointConfiguration endpoint = + new AwsClientBuilder + .EndpointConfiguration("http://localhost:8001", "us-east-1"); + + client = (AmazonS3Client) AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withEndpointConfiguration(endpoint) + .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .build(); + } + + + @Test + public void shouldThrowAnExceptionWhenPutAnObject() { + final String bucketName = "bucket-name"; + final String objectName = "object-name"; + final String objectRaw = "Hello!"; + final byte[] objectContent = objectRaw.getBytes(); + + try { + TierStorage tierStorage = new S3TierStorage(AWS_REGION, new ClientConfiguration()); + PA.setValue(tierStorage, "s3Client", client); + tierStorage.put(bucketName, objectName, objectContent); + Assert.fail("An Exception is expected"); + } catch (DatabusClientRuntimeException e) { + Assert.assertTrue(true); + } catch (Exception e) { + Assert.fail("Unexpected Exception"); + } + } + + @Test + public void shouldThrowAnExceptionWhenGetAnObject() { + final String bucketName = "bucket-name"; + final String objectName = "object-name"; + final String objectRaw = "Hello!"; + final byte[] objectContent = objectRaw.getBytes(); + + try { + TierStorage tierStorage = new S3TierStorage(AWS_REGION, new ClientConfiguration()); + PA.setValue(tierStorage, "s3Client", client); + tierStorage.get(bucketName, objectName); + Assert.fail("An Exception is expected"); + } catch (DatabusClientRuntimeException e) { + Assert.assertTrue(true); + } catch (Exception e) { + Assert.fail("Unexpected Exception"); + } + } + + @Test + public void shouldThrowAnExceptionSearchAnObject() { + final String bucketName = "bucket-name"; + final String objectName = "object-name"; + final String objectRaw = "Hello!"; + final byte[] objectContent = objectRaw.getBytes(); + + try { + TierStorage tierStorage = new S3TierStorage(AWS_REGION, new ClientConfiguration()); + PA.setValue(tierStorage, "s3Client", client); + tierStorage.doesObjectExist(bucketName, objectName); + Assert.fail("An Exception is expected"); + } catch (DatabusClientRuntimeException e) { + Assert.assertTrue(true); + } catch (Exception e) { + Assert.fail("Unexpected Exception"); + } + } + +} \ No newline at end of file diff --git a/src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerTest.java b/src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerTest.java new file mode 100644 index 0000000..e165252 --- /dev/null +++ b/src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerTest.java @@ -0,0 +1,397 @@ +package com.opendxl.databus.producer; + +import broker.ClusterHelper; +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.opendxl.databus.common.internal.util.HeaderInternalField; +import com.opendxl.databus.consumer.*; +import com.opendxl.databus.entities.*; +import com.opendxl.databus.exception.DatabusClientRuntimeException; +import com.opendxl.databus.serialization.ByteArrayDeserializer; +import com.opendxl.databus.serialization.ByteArraySerializer; +import io.findify.s3mock.S3Mock; +import junit.extensions.PA; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.nio.charset.Charset; +import java.time.LocalDateTime; +import java.util.*; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +public class DatabusTierStorageProducerTest { + + private static final String AWS_SECRET_KEY = "secretKey"; + private static final String AWS_ACCESS_KEY = "accessKey"; + private static final String AWS_REGION = "us-east-1"; + private static final String BUCKET_NAME = "databus-poc-test"; + private static S3Mock api; + private static AmazonS3Client client; + private static S3TierStorage tierStorage; + + @BeforeClass + public static void beforeClass() { + // Start Kafka cluster + ClusterHelper + .getInstance() + .addBroker(9092) + .zookeeperPort(2181) + .start(); + + api = new S3Mock.Builder().withPort(8001).withInMemoryBackend().build(); + api.start(); + AwsClientBuilder.EndpointConfiguration endpoint = + new AwsClientBuilder + .EndpointConfiguration("http://localhost:8001", "us-east-1"); + + client = (AmazonS3Client) AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withEndpointConfiguration(endpoint) + .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .build(); + + tierStorage = new S3TierStorage(AWS_REGION, new ClientConfiguration(), + AWS_ACCESS_KEY, AWS_SECRET_KEY); + PA.setValue(tierStorage, "s3Client", client); + + } + @AfterClass + public static void afterClass() { + ClusterHelper.getInstance().stop(); + api.shutdown(); // kills the underlying actor system. Use api.stop() to just unbind the port. + } + + @Test(expected = IllegalArgumentException.class) + public void shouldFailWhenTierStorageIsNull() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString()); + config.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, true); + new DatabusTierStorageProducer(config, new ByteArraySerializer(), null); + } + + @Test(expected = IllegalArgumentException.class) + public void shouldFailWhenTransactionIdIsNotDefined() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + + // The following line is commented on purpose to show that transaction id is not configured + //config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString()); + + new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); + } + + @Test(expected = IllegalArgumentException.class) + public void shouldFailWhenTransactionIdIsNull() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + + // The following line set TransactionId null + config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, null); + + new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); + } + + @Test(expected = DatabusClientRuntimeException.class) + public void shouldFailWhenTransactionIdIsEmpty() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + + // The following line set TransactionId empty + config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, ""); + + new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); + } + + + @Test + public void shouldProduceARecordWithCallBackAndTierStorageRecord() { + final String topicName = UUID.randomUUID().toString(); + + Producer producer = null; + Consumer consumer = null; + + try { + producer = getProducer(); + + // Prepare a record + final String message = "Hello World at "+ LocalDateTime.now(); + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final String key = UUID.randomUUID().toString(); + final ProducerRecord producerRecord = getProducerRecord(topicName, payload, key); + + // Send the record and set an anonymous callback for check the result + CountDownLatch latch = new CountDownLatch(1); + producer.send(producerRecord, (metadata, exception) -> { + try { + if(exception != null) { + Assert.fail(exception.getMessage()); + } + } finally { + latch.countDown(); + } + }); + + // Wait for callback being invoked by Kafka + boolean isTimeout = latch.await(10000, TimeUnit.MILLISECONDS); + if(!isTimeout) { + Assert.fail("Producer take a long time to produce a record"); + return; + } + + // Consume the record + consumer = getConsumer(); + consumer.subscribe(Collections.singletonList(topicName)); + boolean closed = false; + while(!closed) { + final ConsumerRecords records = consumer.poll(1000); + for(ConsumerRecord record : records) { + if(record.getKey().equals(key)) { + final String actualMessage = new String(record.getMessagePayload().getPayload()); + Assert.assertTrue(actualMessage.equals(message)); + closed = true; + break; + } + } + } + Assert.assertTrue(tierStorage.doesObjectExist(BUCKET_NAME, topicName + key)); + + } catch (Exception e) { + Assert.fail(e.getMessage()); + } finally { + if (producer != null) { + producer.close(); + } + if (consumer != null) { + consumer.close(); + } + + } + } + + @Test + public void shouldProduceARecordWithoutCallBackAndTierStorageRecord() { + final String topicName = UUID.randomUUID().toString(); + + Producer producer = null; + Consumer consumer = null; + + try { + producer = getProducer(); + + // Prepare a record + final String message = "Hello World at "+ LocalDateTime.now(); + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final String key = UUID.randomUUID().toString(); + final ProducerRecord producerRecord = getProducerRecord(topicName, payload, key); + + // Send the record + producer.send(producerRecord); + + // Consume the record + consumer = getConsumer(); + consumer.subscribe(Collections.singletonList(topicName)); + boolean closed = false; + while(!closed) { + final ConsumerRecords records = consumer.poll(1000); + for(ConsumerRecord record : records) { + if(record.getKey().equals(key)) { + final String actualMessage = new String(record.getMessagePayload().getPayload()); + Assert.assertTrue(actualMessage.equals(message)); + closed = true; + break; + } + } + } + Assert.assertTrue(tierStorage.doesObjectExist(BUCKET_NAME, topicName + key)); + + } catch (Exception e) { + Assert.fail(e.getMessage()); + } finally { + if (producer != null) { + producer.close(); + } + if (consumer != null) { + consumer.close(); + } + + } + } + + + @Test + public void shouldConsumeWithoutTierStorage() { + final String topicName = UUID.randomUUID().toString(); + + Producer producer = null; + Consumer consumer = null; + + try { + producer = getProducer(); + + // Prepare a record + final String message = "Hello World at "+ LocalDateTime.now(); + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final String key = UUID.randomUUID().toString(); + final ProducerRecord producerRecord = getProducerRecord(topicName, payload, key); + + // Send the record and set an anonymous callback for check the result + CountDownLatch latch = new CountDownLatch(1); + producer.send(producerRecord, (metadata, exception) -> { + try { + if(exception != null) { + Assert.fail(exception.getMessage()); + } + } finally { + latch.countDown(); + } + }); + + // Wait for callback being invoked by Kafka + boolean isTimeout = latch.await(10000, TimeUnit.MILLISECONDS); + if(!isTimeout) { + Assert.fail("Producer take a long time to produce a record"); + return; + } + + // Consume the record + consumer = getConsumerWOTierStorage(); + consumer.subscribe(Collections.singletonList(topicName)); + boolean closed = false; + while(!closed) { + final ConsumerRecords records = consumer.poll(500); + for (ConsumerRecord record : records) { + final Headers headers = record.getHeaders(); + final String bucketName = headers.get(HeaderInternalField.TIER_STORAGE_BUCKET_NAME_KEY); + final String objectName = headers.get(HeaderInternalField.TIER_STORAGE_OBJECT_NAME_KEY); + Assert.assertTrue(bucketName.equals(BUCKET_NAME)); + Assert.assertTrue(objectName.equals(topicName + key)); + closed = true; + break; + } + } + + } catch (Exception e) { + Assert.fail(e.getMessage()); + } finally { + if (producer != null) { + producer.close(); + } + if (consumer != null) { + consumer.close(); + } + } + } + + + @Test + public void shouldConsumeWithoutCallbackAndTierStorage() { + final String topicName = UUID.randomUUID().toString(); + + Producer producer = null; + Consumer consumer = null; + + try { + producer = getProducer(); + + // Prepare a record + final String message = "Hello World at "+ LocalDateTime.now(); + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final String key = UUID.randomUUID().toString(); + final ProducerRecord producerRecord = getProducerRecord(topicName, payload, key); + + // Send the record + producer.send(producerRecord); + + // Consume the record + consumer = getConsumerWOTierStorage(); + consumer.subscribe(Collections.singletonList(topicName)); + boolean closed = false; + while(!closed) { + final ConsumerRecords records = consumer.poll(500); + for (ConsumerRecord record : records) { + final Headers headers = record.getHeaders(); + final String bucketName = headers.get(HeaderInternalField.TIER_STORAGE_BUCKET_NAME_KEY); + final String objectName = headers.get(HeaderInternalField.TIER_STORAGE_OBJECT_NAME_KEY); + Assert.assertTrue(bucketName.equals(BUCKET_NAME)); + Assert.assertTrue(objectName.equals(topicName + key)); + closed = true; + break; + } + } + + } catch (Exception e) { + Assert.fail(e.getMessage()); + } finally { + if (producer != null) { + producer.close(); + } + if (consumer != null) { + consumer.close(); + } + } + } + + public Consumer getConsumer() { + final Properties consumerProps = new Properties(); + consumerProps.put(ConsumerConfiguration.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + consumerProps.put(ConsumerConfiguration.GROUP_ID_CONFIG, UUID.randomUUID().toString()); + consumerProps.put(ConsumerConfiguration.ENABLE_AUTO_COMMIT_CONFIG, "true"); + consumerProps.put(ConsumerConfiguration.SESSION_TIMEOUT_MS_CONFIG, "30000"); + consumerProps.put(ConsumerConfiguration.CLIENT_ID_CONFIG, "consumer-id-sample"); + consumerProps.put(ConsumerConfiguration.AUTO_OFFSET_RESET_CONFIG, "earliest"); + return new DatabusConsumer(consumerProps, new ByteArrayDeserializer(), tierStorage ); + } + + public Consumer getConsumerWOTierStorage() { + final Properties consumerProps = new Properties(); + consumerProps.put(ConsumerConfiguration.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + consumerProps.put(ConsumerConfiguration.GROUP_ID_CONFIG, UUID.randomUUID().toString()); + consumerProps.put(ConsumerConfiguration.ENABLE_AUTO_COMMIT_CONFIG, "true"); + consumerProps.put(ConsumerConfiguration.SESSION_TIMEOUT_MS_CONFIG, "30000"); + consumerProps.put(ConsumerConfiguration.CLIENT_ID_CONFIG, "consumer-id-sample"); + consumerProps.put(ConsumerConfiguration.AUTO_OFFSET_RESET_CONFIG, "earliest"); + return new DatabusConsumer(consumerProps, new ByteArrayDeserializer()); + } + public Producer getProducer() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString()); + return new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); + } + + public ProducerRecord getProducerRecord(final String topic, final byte[] payload, String key) { + final TierStorageMetadata tStorageMetadata = + new TierStorageMetadata(BUCKET_NAME, topic + key); + final RoutingData routingData = new RoutingData(topic, key, null, tStorageMetadata); + final Headers headers = new Headers(); + final MessagePayload messagePayload = new MessagePayload<>(payload); + return new ProducerRecord<>(routingData, headers, messagePayload); + } + + + +} \ No newline at end of file diff --git a/src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerWithoutS3Test.java b/src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerWithoutS3Test.java new file mode 100644 index 0000000..6c2b99c --- /dev/null +++ b/src/test/java/com/opendxl/databus/producer/DatabusTierStorageProducerWithoutS3Test.java @@ -0,0 +1,187 @@ +package com.opendxl.databus.producer; + +import broker.ClusterHelper; +import com.amazonaws.ClientConfiguration; +import com.amazonaws.auth.AWSStaticCredentialsProvider; +import com.amazonaws.auth.AnonymousAWSCredentials; +import com.amazonaws.client.builder.AwsClientBuilder; +import com.amazonaws.services.s3.AmazonS3Client; +import com.amazonaws.services.s3.AmazonS3ClientBuilder; +import com.opendxl.databus.consumer.Consumer; +import com.opendxl.databus.consumer.ConsumerConfiguration; +import com.opendxl.databus.consumer.DatabusConsumer; +import com.opendxl.databus.entities.*; +import com.opendxl.databus.exception.DatabusClientRuntimeException; +import com.opendxl.databus.serialization.ByteArrayDeserializer; +import com.opendxl.databus.serialization.ByteArraySerializer; +import io.findify.s3mock.S3Mock; +import junit.extensions.PA; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.nio.charset.Charset; +import java.time.LocalDateTime; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.UUID; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +public class DatabusTierStorageProducerWithoutS3Test { + + private static final String AWS_SECRET_KEY = "secretKey"; + private static final String AWS_ACCESS_KEY = "accessKey"; + private static final String AWS_REGION = "us-east-1"; + private static final String BUCKET_NAME = "databus-poc-test"; + private static S3Mock api; + private static AmazonS3Client client; + private static S3TierStorage tierStorage; + + @BeforeClass + public static void beforeClass() { + // Start Kafka cluster + ClusterHelper + .getInstance() + .addBroker(9092) + .zookeeperPort(2181) + .start(); + + api = new S3Mock.Builder().withPort(8001).withInMemoryBackend().build(); + // api.start is missing on purpose to keep Tier Storage down + AwsClientBuilder.EndpointConfiguration endpoint = + new AwsClientBuilder + .EndpointConfiguration("http://localhost:8001", "us-east-1"); + + client = (AmazonS3Client) AmazonS3ClientBuilder + .standard() + .withPathStyleAccessEnabled(true) + .withEndpointConfiguration(endpoint) + .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) + .build(); + + tierStorage = new S3TierStorage(AWS_REGION, new ClientConfiguration(), + AWS_ACCESS_KEY, AWS_SECRET_KEY); + PA.setValue(tierStorage, "s3Client", client); + + } + @AfterClass + public static void afterClass() { + ClusterHelper.getInstance().stop(); + } + + @Test + public void shouldFailBecauseTierStorageIsUnreachable() { + final String topicName = UUID.randomUUID().toString(); + + Producer producer = null; + + try { + producer = getProducer(); + + // Prepare a record + final String message = "Hello World at " + LocalDateTime.now(); + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final String key = UUID.randomUUID().toString(); + final ProducerRecord producerRecord = getProducerRecord(topicName, payload, key); + + // Send the record + CountDownLatch latch = new CountDownLatch(1); + producer.send(producerRecord, (metadata, exception) -> { + try { + if (exception != null) { + Assert.fail(exception.getMessage()); + } + } finally { + latch.countDown(); + } + }); + latch.await(10000, TimeUnit.MILLISECONDS); + Assert.fail(); + } catch (DatabusClientRuntimeException e) { + Assert.assertTrue(true); + } catch (Exception e) { + Assert.fail(); + } finally { + if (producer != null) { + producer.close(); + } + } + } + + + @Test + public void shouldFailBecauseTierStorageIsUnreachable1() { + final String topicName = UUID.randomUUID().toString(); + + Producer producer = null; + + try { + producer = getProducer(); + + // Prepare a record + final String message = "Hello World at " + LocalDateTime.now(); + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final String key = UUID.randomUUID().toString(); + final ProducerRecord producerRecord = getProducerRecord(topicName, payload, key); + + // Send the record + producer.send(producerRecord); + Assert.fail(); + } catch (DatabusClientRuntimeException e) { + Assert.assertTrue(true); + } catch (Exception e) { + Assert.fail(); + } finally { + if (producer != null) { + producer.close(); + } + } + } + + public Consumer getConsumer() { + final Properties consumerProps = new Properties(); + consumerProps.put(ConsumerConfiguration.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + consumerProps.put(ConsumerConfiguration.GROUP_ID_CONFIG, UUID.randomUUID().toString()); + consumerProps.put(ConsumerConfiguration.ENABLE_AUTO_COMMIT_CONFIG, "true"); + consumerProps.put(ConsumerConfiguration.SESSION_TIMEOUT_MS_CONFIG, "30000"); + consumerProps.put(ConsumerConfiguration.CLIENT_ID_CONFIG, "consumer-id-sample"); + consumerProps.put(ConsumerConfiguration.AUTO_OFFSET_RESET_CONFIG, "earliest"); + return new DatabusConsumer(consumerProps, new ByteArrayDeserializer(), tierStorage ); + } + + public Consumer getConsumerWOTierStorage() { + final Properties consumerProps = new Properties(); + consumerProps.put(ConsumerConfiguration.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + consumerProps.put(ConsumerConfiguration.GROUP_ID_CONFIG, UUID.randomUUID().toString()); + consumerProps.put(ConsumerConfiguration.ENABLE_AUTO_COMMIT_CONFIG, "true"); + consumerProps.put(ConsumerConfiguration.SESSION_TIMEOUT_MS_CONFIG, "30000"); + consumerProps.put(ConsumerConfiguration.CLIENT_ID_CONFIG, "consumer-id-sample"); + consumerProps.put(ConsumerConfiguration.AUTO_OFFSET_RESET_CONFIG, "earliest"); + return new DatabusConsumer(consumerProps, new ByteArrayDeserializer()); + } + public Producer getProducer() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString()); + config.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, true); + return new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); + } + + public ProducerRecord getProducerRecord(final String topic, final byte[] payload, String key) { + TierStorageMetadata tStorageMetadata = + new TierStorageMetadata(BUCKET_NAME, topic + key); + RoutingData routingData = new RoutingData(topic, key, null, tStorageMetadata); + Headers headers = new Headers(); + MessagePayload messagePayload = new MessagePayload<>(payload); + return new ProducerRecord<>(routingData, headers, messagePayload); + } + + + +} \ No newline at end of file From 9577c78bc72bb9c8c7f202ae4a2dc27e7493d1e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20Villalba=C2=A0=20=C2=A0=20=C2=A0=20=C2=A0?= Date: Thu, 23 Apr 2020 12:05:10 -0300 Subject: [PATCH 17/20] Update dependency versions, fix example --- build.gradle | 3 ++- sample/src/sample/BasicConsumerProducerExample.java | 13 +++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/build.gradle b/build.gradle index b24797e..6ac3b6a 100644 --- a/build.gradle +++ b/build.gradle @@ -19,7 +19,7 @@ plugins { id "com.github.johnrengelman.shadow" version "4.0.3" id "kr.motd.sphinx" version "2.3.1" id "jacoco" - id "org.owasp.dependencycheck" version "5.2.1" + id "org.owasp.dependencycheck" version "5.3.2" } group 'com.opendxl' @@ -83,6 +83,7 @@ dependencies { } testImplementation 'org.scala-lang:scala-reflect:2.12.11' testImplementation('io.netty:netty-all:4.1.43.Final') { + force = true } testImplementation 'commons-io:commons-io:2.6' diff --git a/sample/src/sample/BasicConsumerProducerExample.java b/sample/src/sample/BasicConsumerProducerExample.java index d6bb204..3ab16a9 100644 --- a/sample/src/sample/BasicConsumerProducerExample.java +++ b/sample/src/sample/BasicConsumerProducerExample.java @@ -4,6 +4,7 @@ package sample; +import broker.ClusterHelper; import com.opendxl.databus.common.RecordMetadata; import com.opendxl.databus.common.internal.builder.TopicNameBuilder; import com.opendxl.databus.consumer.*; @@ -46,11 +47,11 @@ public class BasicConsumerProducerExample { public BasicConsumerProducerExample() { // Start Kafka cluster -// ClusterHelper -// .getInstance() -// .addBroker(9092) -// .zookeeperPort(2181) -// .start(); + ClusterHelper + .getInstance() + .addBroker(9092) + .zookeeperPort(2181) + .start(); // Prepare a Producer this.producer = getProducer(); @@ -196,7 +197,7 @@ synchronized private void stopExample(final ExecutorService executor) { try { closed.set(true); consumer.wakeup(); -// ClusterHelper.getInstance().stop(); + ClusterHelper.getInstance().stop(); executor.shutdown(); executor.awaitTermination(5, TimeUnit.SECONDS); } catch (InterruptedException e) { From a46bbbdd8c5b81a4a6dfe6009e1af69d19e66090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20Villalba=C2=A0=20=C2=A0=20=C2=A0=20=C2=A0?= Date: Thu, 23 Apr 2020 19:48:10 -0300 Subject: [PATCH 18/20] Add sphinx doc for Tiered Storage example --- docs/Basic-S3-tiered-storage-example.rst | 314 ++++++++++++++++++ docs/index.rst | 1 + .../producer/DatabusTierStorageProducer.java | 18 +- 3 files changed, 324 insertions(+), 9 deletions(-) create mode 100644 docs/Basic-S3-tiered-storage-example.rst diff --git a/docs/Basic-S3-tiered-storage-example.rst b/docs/Basic-S3-tiered-storage-example.rst new file mode 100644 index 0000000..fedc23a --- /dev/null +++ b/docs/Basic-S3-tiered-storage-example.rst @@ -0,0 +1,314 @@ +S3 Tierered Storage Example +--------------------------- + +This sample demonstrates how to produce messages to Kafka topic and S3 bucket. At the same time it shows how a consumer +is able to read messages from Kafka and S3 in seamless fashion. + +Benefits Tiered Storage Feature + +- Reduces costs because it stores the message payload in S3 bucket and uses a Kafka message part as control and index +- It stores in Kafka topic and S3 bucket atomically. +- Exposes a new extended Producer type to differentiate from the regular one. +- Consumer is backward compatible. +- Consumer is able to read raw kafka, databus or Kakfa + S3 messages. +- Previous consumer (2.4.1 and below) won't break when reading a Tiered Storage message. + + +Code highlights are shown below: + +Sample Code +~~~~~~~~~~~ + +.. code:: java + + package sample; + + import broker.ClusterHelper; + import com.amazonaws.ClientConfiguration; + import com.opendxl.databus.common.RecordMetadata; + import com.opendxl.databus.common.internal.builder.TopicNameBuilder; + import com.opendxl.databus.consumer.Consumer; + import com.opendxl.databus.consumer.ConsumerConfiguration; + import com.opendxl.databus.consumer.ConsumerRecord; + import com.opendxl.databus.consumer.ConsumerRecords; + import com.opendxl.databus.consumer.DatabusConsumer; + import com.opendxl.databus.entities.Headers; + import com.opendxl.databus.entities.MessagePayload; + import com.opendxl.databus.entities.RoutingData; + import com.opendxl.databus.entities.S3TierStorage; + import com.opendxl.databus.entities.TierStorage; + import com.opendxl.databus.entities.TierStorageMetadata; + import com.opendxl.databus.producer.Callback; + import com.opendxl.databus.producer.DatabusTierStorageProducer; + import com.opendxl.databus.producer.Producer; + import com.opendxl.databus.producer.ProducerConfig; + import com.opendxl.databus.producer.ProducerRecord; + import com.opendxl.databus.serialization.ByteArrayDeserializer; + import com.opendxl.databus.serialization.ByteArraySerializer; + import org.slf4j.Logger; + import org.slf4j.LoggerFactory; + + import java.nio.charset.Charset; + import java.time.LocalDateTime; + import java.util.Collections; + import java.util.HashMap; + import java.util.Map; + import java.util.Properties; + import java.util.UUID; + import java.util.concurrent.ExecutorService; + import java.util.concurrent.Executors; + import java.util.concurrent.TimeUnit; + import java.util.concurrent.atomic.AtomicBoolean; + + + public class BasicS3TierStorageConsumerProducerExample { + + private static final String AWS_REGION = "add-aws-region-name-here"; + private static final String S3_ACCESS_KEY = "add-your-access-key-here"; + private static final String S3_SECRET_KEY = "add-your-secret-key-here"; + private final Producer producer; + private final ExecutorService executor; + private final TierStorage tierStorage; + private Consumer consumer; + private String producerTopic = "topic1"; + private String consumerTopic = "topic1"; + + private static final long PRODUCER_TIME_CADENCE_MS = 1000L; + private static final long CONSUMER_TIME_CADENCE_MS = 1000L; + private final AtomicBoolean closed = new AtomicBoolean(false); + + private static Logger LOG = LoggerFactory.getLogger(BasicS3TierStorageConsumerProducerExample.class); + + public BasicS3TierStorageConsumerProducerExample() { + + // Start Kafka cluster + ClusterHelper.getInstance().addBroker(9092).zookeeperPort(2181).start(); + + // Prepare a S3 Tiered Storage + ClientConfiguration awsClientConfiguration = new ClientConfiguration(); + this.tierStorage = new S3TierStorage(AWS_REGION, awsClientConfiguration, S3_ACCESS_KEY, S3_SECRET_KEY); + + // Prepare a Producer + this.producer = getProducer(); + + // Prepare a Consumer + this.consumer = getConsumer(); + + // Subscribe to topic + this.consumer.subscribe(Collections.singletonList(consumerTopic)); + + this.executor = Executors.newFixedThreadPool(2); + + } + + public Producer getProducer() { + final Map config = new HashMap(); + config.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + config.put(ProducerConfig.CLIENT_ID_CONFIG, "producer-id-sample"); + config.put(ProducerConfig.LINGER_MS_CONFIG, "100"); + config.put(ProducerConfig.BATCH_SIZE_CONFIG, "150000"); + config.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString()); + return new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); + + } + + public Consumer getConsumer() { + final Properties consumerProps = new Properties(); + consumerProps.put(ConsumerConfiguration.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092"); + consumerProps.put(ConsumerConfiguration.GROUP_ID_CONFIG, "cg1"); + consumerProps.put(ConsumerConfiguration.ENABLE_AUTO_COMMIT_CONFIG, "true"); + consumerProps.put(ConsumerConfiguration.SESSION_TIMEOUT_MS_CONFIG, "30000"); + consumerProps.put(ConsumerConfiguration.CLIENT_ID_CONFIG, "consumer-id-sample"); + return new DatabusConsumer(consumerProps, new ByteArrayDeserializer(), tierStorage); + } + + private Runnable getProducerTask() { + return () -> { + LOG.info("Producer started"); + while (!closed.get()) { + + // Prepare a record + final String message = "Hello World at "+ LocalDateTime.now(); + + // user should provide the encoding + final byte[] payload = message.getBytes(Charset.defaultCharset()); + final ProducerRecord producerRecord = getProducerRecord(producerTopic, payload); + + // Send the record + // Get headers as String + final StringBuilder headers = new StringBuilder().append("["); + producerRecord.getHeaders().getAll().forEach((k, v) -> headers.append("[" + k + ":" + v + "]")); + headers.append("]"); + + producer.send(producerRecord, new MyCallback(producerRecord.getRoutingData().getShardingKey())); + LOG.info("[PRODUCER -> KAFKA][SENDING MSG] ID " + producerRecord.getRoutingData().getShardingKey() + + " TOPIC:" + TopicNameBuilder.getTopicName(producerTopic, null) + + " HEADERS:" + headers + + " PAYLOAD:" + message); + + justWait(PRODUCER_TIME_CADENCE_MS); + } + producer.flush(); + producer.close(); + LOG.info("Producer closed"); + + }; + } + + private Runnable getConsumerTask() { + return () -> { + try { + LOG.info("Consumer started"); + while (!closed.get()) { + + // Polling the databus + final ConsumerRecords records = consumer.poll(CONSUMER_TIME_CADENCE_MS); + + // Iterate records + for (ConsumerRecord record : records) { + + // Get headers as String + final StringBuilder headers = new StringBuilder().append("["); + record.getHeaders().getAll().forEach((k, v) -> headers.append("[" + k + ":" + v + "]")); + headers.append("]"); + + LOG.info("[CONSUMER <- KAFKA][MSG RCEIVED] ID " + record.getKey() + + " TOPIC:" + record.getComposedTopic() + + " KEY:" + record.getKey() + + " PARTITION:" + record.getPartition() + + " OFFSET:" + record.getOffset() + + " TIMESTAMP:" + record.getTimestamp() + + " HEADERS:" + headers + + " PAYLOAD:" + new String(record.getMessagePayload().getPayload())); + } + consumer.commitAsync(); + } + } catch (Exception e) { + LOG.error(e.getMessage()); + } finally { + consumer.unsubscribe(); + try { + consumer.close(); + } catch (Exception e) { + LOG.error(e.getMessage()); + } + LOG.info("Consumer closed"); + + } + + }; + } + + public ProducerRecord getProducerRecord(final String topic, final byte[] payload) { + String key = String.valueOf(System.currentTimeMillis()); + TierStorageMetadata tStorageMetadata = new TierStorageMetadata("databus-poc-test", topic + key); + RoutingData routingData = new RoutingData(topic, key, null, tStorageMetadata); + Headers headers = new Headers(); + headers.put("k","v"); + MessagePayload messagePayload = new MessagePayload<>(payload); + return new ProducerRecord<>(routingData, headers, messagePayload); + } + + private void justWait(long time) { + try { + Thread.sleep(time); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + + private static class MyCallback implements Callback { + + private String shardingKey; + + public MyCallback(String shardingKey) { + + this.shardingKey = shardingKey; + } + + public void onCompletion(RecordMetadata metadata, Exception exception) { + if (exception != null) { + LOG.warn("Error sending a record " + exception.getMessage()); + return; + } + LOG.info("[PRODUCER <- KAFKA][OK MSG SENT] ID " + shardingKey + + " TOPIC:" + metadata.topic() + + " PARTITION:" + metadata.partition() + + " OFFSET:" + metadata.offset()); + } + } + + synchronized private void stopExample(final ExecutorService executor) { + try { + closed.set(true); + consumer.wakeup(); + ClusterHelper.getInstance().stop(); + executor.shutdown(); + executor.awaitTermination(5, TimeUnit.SECONDS); + } catch (InterruptedException e) { + } finally { + executor.shutdownNow(); + } + } + + public void startExample() throws InterruptedException { + + Runnable consumerTask = getConsumerTask(); + Runnable producerTask = getProducerTask(); + + executor.submit(consumerTask); + executor.submit(producerTask); + + Runtime.getRuntime().addShutdownHook( + new Thread( + new Runnable() { + public void run() { + stopExample(executor); + LOG.info("Example finished"); + } + })); + + } + + + public static void main(String[] args) throws InterruptedException { + LOG.info("Ctrl-C to finish"); + new BasicS3TierStorageConsumerProducerExample().startExample(); + } + + } + + +The first step is to create a ``DatabusTierStorageProducer`` by passing a ``S3TierStorage`` instance + +.. code:: java + + // Prepares a S3 Tiered Storage + this.tierStorage = new S3TierStorage(AWS_REGION, awsClientConfiguration, S3_ACCESS_KEY, S3_SECRET_KEY); + ... + public Producer getProducer() { + ... + // Creates a Tiered Storage Producer + return new DatabusTierStorageProducer(config, new ByteArraySerializer(), tierStorage); + } + +Then a ``ProducerRecord`` message should be created by using ``TierStorageMetadata`` instance. The S3 bucket and +the S3 object name must be specified. + +.. code:: java + + public ProducerRecord getProducerRecord(final String topic, final byte[] payload) { + String key = String.valueOf(System.currentTimeMillis()); + TierStorageMetadata tStorageMetadata = new TierStorageMetadata("databus-poc-test", topic + key); + RoutingData routingData = new RoutingData(topic, key, null, tStorageMetadata); + ... + MessagePayload messagePayload = new MessagePayload<>(payload); + return new ProducerRecord<>(routingData, headers, messagePayload); + } + +Finally it sends the message to Kafka and S3 + +.. code:: java + + producer.send(producerRecord, ...); + diff --git a/docs/index.rst b/docs/index.rst index 0152eee..1911fe7 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -76,6 +76,7 @@ Basic Consumer-metrics-sample.rst Transactions-producer-consumer-example.rst Basic-push-consumer-example.rst + Basic-S3-tiered-storage-example.rst Bugs and Feedback ----------------- diff --git a/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java b/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java index adccea8..ca3cc97 100644 --- a/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java +++ b/src/main/java/com/opendxl/databus/producer/DatabusTierStorageProducer.java @@ -166,7 +166,7 @@ public void send(final ProducerRecord

producerRecord) { messagePayloadAdapter.adapt(producerRecord.payload(), producerRecord.getHeaders()); final byte[] databusMessageSerialized = getKafkaValueSerializer().serialize("", databusMessage); - // Remove the producerRecord payload to be written in kafka. + // Remove the producerRecord payload to be written in kafka and keeps Headers. final ProducerRecord

adaptedProducerRecord = new ProducerRecord<>(producerRecord.getRoutingData(), producerRecord.getHeaders(), new MessagePayload<>(null)); @@ -248,9 +248,9 @@ public void send(ProducerRecord

producerRecord, final Callback callback) { // Serialize the producerRecord payload to be stored with TieredStorage when callback being invoked by Kafka final DatabusMessage databusMessage = messagePayloadAdapter.adapt(producerRecord.payload(), producerRecord.getHeaders()); - final byte[] kafkaValueSerializer = getKafkaValueSerializer().serialize("", databusMessage); + final byte[] kafkaValueSerialized = getKafkaValueSerializer().serialize("", databusMessage); - // Remove the producerRecord payload to be written in kafka. + // Remove the producerRecord payload to be written in kafka and keeps Headers. final ProducerRecord

adaptedProducerRecord = new ProducerRecord<>(producerRecord.getRoutingData(), producerRecord.getHeaders(), new MessagePayload<>(null)); @@ -263,7 +263,7 @@ public void send(ProducerRecord

producerRecord, final Callback callback) { CountDownLatch latch = new CountDownLatch(1); final CallbackAdapterTierStorage callbackAdapterTierStorage; callbackAdapterTierStorage = new CallbackAdapterTierStorage(callback, - kafkaValueSerializer, + kafkaValueSerialized, latch, tierStorageMetadata); @@ -338,7 +338,7 @@ private class CallbackAdapterTierStorage implements org.apache.kafka.clients.pro /** * the kafka value serializer */ - private final byte[] kafkaValueSerializer; + private final byte[] kafkaValueSerialized; /** * An object to signal when callback has finished @@ -357,15 +357,15 @@ private class CallbackAdapterTierStorage implements org.apache.kafka.clients.pro /** * @param userCallback user callback - * @param kafkaValueSerializer kafka serializer + * @param kafkaValueSerialized kafka serializer * @param latch a object to signal when callback */ CallbackAdapterTierStorage(final Callback userCallback, - final byte[] kafkaValueSerializer, + final byte[] kafkaValueSerialized, final CountDownLatch latch, final TierStorageMetadata tierStorageMetadata) { this.userCallback = userCallback; - this.kafkaValueSerializer = kafkaValueSerializer; + this.kafkaValueSerialized = kafkaValueSerialized; this.latch = latch; this.tierStorageMetadata = tierStorageMetadata; } @@ -391,7 +391,7 @@ public void onCompletion(final org.apache.kafka.clients.producer.RecordMetadata tierStorage.put(tierStorageMetadata.getBucketName(), tierStorageMetadata.getObjectName(), - kafkaValueSerializer); + kafkaValueSerialized); response(recordMetadata, exception); } catch (DatabusClientRuntimeException databusException) { LOG.error("Send cannot be performed. The record was not produced. ERROR:" From 787ff34e2f9684bc5e83b21eca7213ce1642b80f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20Villalba=C2=A0=20=C2=A0=20=C2=A0=20=C2=A0?= Date: Mon, 6 Jul 2020 22:53:42 -0300 Subject: [PATCH 19/20] Fix DatabusPushConsumer when a rebalance scenario occurs --- build.gradle | 4 +- .../databus/consumer/DatabusPushConsumer.java | 74 +++++++++++++++++++ 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/build.gradle b/build.gradle index 6ac3b6a..508ea64 100644 --- a/build.gradle +++ b/build.gradle @@ -6,7 +6,7 @@ buildscript { mavenCentral() } dependencies { - classpath 'org.owasp:dependency-check-gradle:5.2.1' + classpath 'org.owasp:dependency-check-gradle:5.3.2.1' } } @@ -19,7 +19,7 @@ plugins { id "com.github.johnrengelman.shadow" version "4.0.3" id "kr.motd.sphinx" version "2.3.1" id "jacoco" - id "org.owasp.dependencycheck" version "5.3.2" + id "org.owasp.dependencycheck" version "5.3.2.1" } group 'com.opendxl' diff --git a/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java b/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java index 64183c1..fc20a16 100644 --- a/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java +++ b/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java @@ -15,8 +15,11 @@ import java.io.Closeable; import java.time.Duration; +import java.util.Collection; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Properties; import java.util.Set; import java.util.concurrent.CancellationException; @@ -87,6 +90,11 @@ public final class DatabusPushConsumer

extends DatabusConsumer

implements */ private CountDownLatch countDownLatch = new CountDownLatch(1); + /** + * An boolean to signal if pause operation has to be refreshed + */ + private AtomicBoolean refreshPause = new AtomicBoolean(false); + /** * Constructor * @@ -204,6 +212,65 @@ public DatabusPushConsumer(final Properties properties, super(properties, messageDeserializer, credential, tierStorage); this.consumerListener = consumerListener; } + + /** + * {@inheritDoc} + */ + @Override + public void subscribe(final Map> groupTopics) { + super.subscribe(groupTopics, new PushConsumerRebalanceListener(null)); + } + + /** + * {@inheritDoc} + */ + @Override + public void subscribe(final Map> groupTopics, + final ConsumerRebalanceListener consumerRebalanceListener) { + super.subscribe(groupTopics, new PushConsumerRebalanceListener(consumerRebalanceListener)); + } + + /** + * {@inheritDoc} + */ + @Override + public void subscribe(final List topics, + final ConsumerRebalanceListener consumerRebalanceListener) { + super.subscribe(topics, new PushConsumerRebalanceListener(consumerRebalanceListener)); + } + + /** + * {@inheritDoc} + */ + @Override + public void subscribe(final List topics) { + super.subscribe(topics, new PushConsumerRebalanceListener(null)); + } + + private class PushConsumerRebalanceListener implements ConsumerRebalanceListener { + + private final ConsumerRebalanceListener customerListener; + + PushConsumerRebalanceListener(final ConsumerRebalanceListener customerListener) { + this.customerListener = Optional.ofNullable(customerListener).orElse(new NoOpConsumerRebalanceListener()); + + } + + @Override + public void onPartitionsRevoked(final Collection partitions) { + customerListener.onPartitionsRevoked(partitions); + + } + + @Override + public void onPartitionsAssigned(final Collection partitions) { + refreshPause.set(true); + customerListener.onPartitionsAssigned(partitions); + + } + } + + /** * {@inheritDoc} */ @@ -367,6 +434,13 @@ private void push(final DatabusPushConsumerFuture databusPushConsumerFuture, LOG.info("Consumer " + super.getClientId() + " is resumed"); } catch (TimeoutException e) { + // refreshPause == true means a rebalance was performed and partitions might be reassigned. + // Then, in order to avoid reading messages and just sends the heartbeat when poll(), + // a pause() method has to be invoked with the updated partitions assignment. + if (refreshPause.get()) { + refreshPause.set(false); + pause(assignment()); + } // TimeoutException means that listener is still working. // So, a poll is performed to heartbeat Databus super.poll(Duration.ofMillis(0)); From a4a6c96db7b74e78cf6ae77ecd194327695feb19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20Villalba=C2=A0=20=C2=A0=20=C2=A0=20=C2=A0?= Date: Mon, 6 Jul 2020 23:01:45 -0300 Subject: [PATCH 20/20] Change methods order --- .../databus/consumer/DatabusPushConsumer.java | 46 +++++++++---------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java b/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java index fc20a16..4296ea9 100644 --- a/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java +++ b/src/main/java/com/opendxl/databus/consumer/DatabusPushConsumer.java @@ -247,30 +247,6 @@ public void subscribe(final List topics) { super.subscribe(topics, new PushConsumerRebalanceListener(null)); } - private class PushConsumerRebalanceListener implements ConsumerRebalanceListener { - - private final ConsumerRebalanceListener customerListener; - - PushConsumerRebalanceListener(final ConsumerRebalanceListener customerListener) { - this.customerListener = Optional.ofNullable(customerListener).orElse(new NoOpConsumerRebalanceListener()); - - } - - @Override - public void onPartitionsRevoked(final Collection partitions) { - customerListener.onPartitionsRevoked(partitions); - - } - - @Override - public void onPartitionsAssigned(final Collection partitions) { - refreshPause.set(true); - customerListener.onPartitionsAssigned(partitions); - - } - } - - /** * {@inheritDoc} */ @@ -575,6 +551,28 @@ public void close() { } } + private class PushConsumerRebalanceListener implements ConsumerRebalanceListener { + + private final ConsumerRebalanceListener customerListener; + + PushConsumerRebalanceListener(final ConsumerRebalanceListener customerListener) { + this.customerListener = Optional.ofNullable(customerListener).orElse(new NoOpConsumerRebalanceListener()); + + } + + @Override + public void onPartitionsRevoked(final Collection partitions) { + customerListener.onPartitionsRevoked(partitions); + + } + + @Override + public void onPartitionsAssigned(final Collection partitions) { + refreshPause.set(true); + customerListener.onPartitionsAssigned(partitions); + + } + } }