![JAR search and dependency download from the Maven repository](/logo.png)
org.apache.pulsar.io.kinesis.KinesisSink Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pulsar.io.kinesis;
import static com.google.common.util.concurrent.Futures.addCallback;
import static com.google.common.util.concurrent.MoreExecutors.directExecutor;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.services.kinesis.producer.KinesisProducer;
import com.amazonaws.services.kinesis.producer.KinesisProducerConfiguration;
import com.amazonaws.services.kinesis.producer.KinesisProducerConfiguration.ThreadingModel;
import com.amazonaws.services.kinesis.producer.UserRecordFailedException;
import com.amazonaws.services.kinesis.producer.UserRecordResult;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.ListenableFuture;
import io.netty.util.Recycler;
import io.netty.util.Recycler.Handle;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicIntegerFieldUpdater;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.builder.ReflectionToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.pulsar.client.api.schema.GenericObject;
import org.apache.pulsar.functions.api.Record;
import org.apache.pulsar.io.aws.AbstractAwsConnector;
import org.apache.pulsar.io.aws.AwsCredentialProviderPlugin;
import org.apache.pulsar.io.core.Sink;
import org.apache.pulsar.io.core.SinkContext;
import org.apache.pulsar.io.core.annotations.Connector;
import org.apache.pulsar.io.core.annotations.IOType;
import org.apache.pulsar.io.kinesis.KinesisSinkConfig.MessageFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A Kinesis sink which can be configured by {@link KinesisSinkConfig}.
*
* {@link KinesisSinkConfig} accepts
* 1. awsEndpoint: kinesis end-point url can be found at : https://docs.aws.amazon.com/general/latest/gr/rande.html
* 2. awsRegion: appropriate aws region eg: us-west-1, us-west-2
* 3. awsKinesisStreamName: kinesis stream name
* 4. awsCredentialPluginName: Fully-Qualified class name of implementation of {@link AwsCredentialProviderPlugin}.
* - It is a factory class which creates an {@link AWSCredentialsProvider} that will be used by {@link KinesisProducer}
* - If it is empty then {@link KinesisSink} creates default {@link AWSCredentialsProvider}
* which accepts json-map of credentials in awsCredentialPluginParam
* eg: awsCredentialPluginParam = {"accessKey":"my-access-key","secretKey":"my-secret-key"}
* 5. awsCredentialPluginParam: json-parameters to initialize {@link AwsCredentialProviderPlugin}
* 6. messageFormat: enum:["ONLY_RAW_PAYLOAD","FULL_MESSAGE_IN_JSON","FULL_MESSAGE_IN_FB"]
* a. ONLY_RAW_PAYLOAD: publishes raw payload to stream
* b. FULL_MESSAGE_IN_JSON: publish full message (encryptionCtx + properties + payload) in json format
* json-schema:
* {"type":"object","properties":{"encryptionCtx":{"type":"object","properties":{"metadata":{"type":"object","additionalProperties":{"type":"string"}},"uncompressedMessageSize":{"type":"integer"},"keysMetadataMap":{"type":"object","additionalProperties":{"type":"object","additionalProperties":{"type":"string"}}},"keysMapBase64":{"type":"object","additionalProperties":{"type":"string"}},"encParamBase64":{"type":"string"},"compressionType":{"type":"string","enum":["NONE","LZ4","ZLIB"]},"batchSize":{"type":"integer"},"algorithm":{"type":"string"}}},"payloadBase64":{"type":"string"},"properties":{"type":"object","additionalProperties":{"type":"string"}}}}
* Example:
* {"payloadBase64":"cGF5bG9hZA==","properties":{"prop1":"value"},"encryptionCtx":{"keysMapBase64":{"key1":"dGVzdDE=","key2":"dGVzdDI="},"keysMetadataMap":{"key1":{"ckms":"cmks-1","version":"v1"},"key2":{"ckms":"cmks-2","version":"v2"}},"metadata":{"ckms":"cmks-1","version":"v1"},"encParamBase64":"cGFyYW0=","algorithm":"algo","compressionType":"LZ4","uncompressedMessageSize":10,"batchSize":10}}
* c. FULL_MESSAGE_IN_FB: publish full message (encryptionCtx + properties + payload) in flatbuffer format
* d. FULL_MESSAGE_IN_JSON_EXPAND_VALUE: publish full message (topic + key + value + properties + event time) in JSON format using the schema to expand the value in JSON.
*
*
*/
@Connector(
name = "kinesis",
type = IOType.SINK,
help = "A sink connector that copies messages from Pulsar to Kinesis",
configClass = KinesisSinkConfig.class
)
public class KinesisSink extends AbstractAwsConnector implements Sink {
private static final Logger LOG = LoggerFactory.getLogger(KinesisSink.class);
private KinesisProducer kinesisProducer;
private KinesisSinkConfig kinesisSinkConfig;
private String streamName;
private static final String defaultPartitionedKey = "default";
private static final int maxPartitionedKeyLength = 256;
private SinkContext sinkContext;
private ScheduledExecutorService scheduledExecutor;
private ObjectMapper objectMapper;
//
private static final int FALSE = 0;
private static final int TRUE = 1;
private volatile int previousPublishFailed = FALSE;
private static final AtomicIntegerFieldUpdater IS_PUBLISH_FAILED =
AtomicIntegerFieldUpdater.newUpdater(KinesisSink.class, "previousPublishFailed");
public static final String METRICS_TOTAL_INCOMING = "_kinesis_total_incoming_";
public static final String METRICS_TOTAL_INCOMING_BYTES = "_kinesis_total_incoming_bytes_";
public static final String METRICS_TOTAL_SUCCESS = "_kinesis_total_success_";
public static final String METRICS_TOTAL_FAILURE = "_kinesis_total_failure_";
private void sendUserRecord(ProducerSendCallback producerSendCallback) {
ListenableFuture addRecordResult = kinesisProducer.addUserRecord(this.streamName,
producerSendCallback.partitionedKey, producerSendCallback.data);
addCallback(addRecordResult, producerSendCallback, directExecutor());
}
@Override
public void write(Record record) throws Exception {
// kpl-thread captures publish-failure. fail the publish on main pulsar-io-thread to maintain the ordering
if (kinesisSinkConfig.isRetainOrdering() && previousPublishFailed == TRUE) {
LOG.warn("Skip acking message to retain ordering with previous failed message {}-{}", this.streamName,
record.getRecordSequence());
throw new IllegalStateException("kinesis queue has publish failure");
}
String partitionedKey = record.getKey().orElse(record.getTopicName().orElse(defaultPartitionedKey));
partitionedKey = partitionedKey.length() > maxPartitionedKeyLength
? partitionedKey.substring(0, maxPartitionedKeyLength - 1)
: partitionedKey; // partitionedKey Length must be at least one, and at most 256
ByteBuffer data = createKinesisMessage(kinesisSinkConfig.getMessageFormat(), record);
int size = data.remaining();
sendUserRecord(ProducerSendCallback.create(this, record, System.nanoTime(), partitionedKey, data));
if (sinkContext != null) {
sinkContext.recordMetric(METRICS_TOTAL_INCOMING, 1);
sinkContext.recordMetric(METRICS_TOTAL_INCOMING_BYTES, data.array().length);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Published message to kinesis stream {} with size {}", streamName, size);
}
}
@Override
public void close() {
if (kinesisProducer != null) {
kinesisProducer.flush();
kinesisProducer.destroy();
}
LOG.info("Kinesis sink stopped.");
}
@Override
public void open(Map config, SinkContext sinkContext) {
scheduledExecutor = Executors.newSingleThreadScheduledExecutor();
kinesisSinkConfig = KinesisSinkConfig.load(config, sinkContext);
this.sinkContext = sinkContext;
KinesisProducerConfiguration kinesisConfig = new KinesisProducerConfiguration();
if (isNotBlank(kinesisSinkConfig.getAwsEndpoint())) {
kinesisConfig.setKinesisEndpoint(kinesisSinkConfig.getAwsEndpoint());
}
if (isNotBlank(kinesisSinkConfig.getCloudwatchEndpoint())) {
kinesisConfig.setCloudwatchEndpoint(kinesisSinkConfig.getCloudwatchEndpoint());
}
if (kinesisSinkConfig.getAwsEndpointPort() != null) {
kinesisConfig.setKinesisPort(kinesisSinkConfig.getAwsEndpointPort());
}
if (kinesisSinkConfig.getAwsStsEndpoint() != null) {
kinesisConfig.setStsEndpoint(kinesisSinkConfig.getAwsStsEndpoint());
}
if (kinesisSinkConfig.getAwsStsPort() != null) {
kinesisConfig.setStsPort(kinesisSinkConfig.getAwsStsPort());
}
kinesisConfig.setRegion(kinesisSinkConfig.getAwsRegion());
kinesisConfig.setThreadingModel(ThreadingModel.POOLED);
kinesisConfig.setThreadPoolSize(4);
kinesisConfig.setCollectionMaxCount(1);
if (kinesisSinkConfig.getSkipCertificateValidation() != null
&& kinesisSinkConfig.getSkipCertificateValidation()) {
kinesisConfig.setVerifyCertificate(false);
}
AWSCredentialsProvider credentialsProvider = createCredentialProvider(
kinesisSinkConfig.getAwsCredentialPluginName(),
kinesisSinkConfig.getAwsCredentialPluginParam())
.getCredentialProvider();
kinesisConfig.setCredentialsProvider(credentialsProvider);
kinesisConfig.setNativeExecutable(StringUtils.trimToEmpty(kinesisSinkConfig.getNativeExecutable()));
this.streamName = kinesisSinkConfig.getAwsKinesisStreamName();
this.kinesisProducer = new KinesisProducer(kinesisConfig);
this.objectMapper = new ObjectMapper();
if (kinesisSinkConfig.isJsonIncludeNonNulls()) {
objectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
}
IS_PUBLISH_FAILED.set(this, FALSE);
LOG.info("Kinesis sink started. {}",
ReflectionToStringBuilder.toString(kinesisConfig, ToStringStyle.SHORT_PREFIX_STYLE));
}
private static final class ProducerSendCallback implements FutureCallback {
private Record resultContext;
private long startTime = 0;
private final Handle recyclerHandle;
private KinesisSink kinesisSink;
private Backoff backoff;
private String partitionedKey;
private ByteBuffer data;
private ProducerSendCallback(Handle recyclerHandle) {
this.recyclerHandle = recyclerHandle;
}
static ProducerSendCallback create(KinesisSink kinesisSink, Record resultContext, long startTime,
String partitionedKey, ByteBuffer data) {
ProducerSendCallback sendCallback = RECYCLER.get();
sendCallback.resultContext = resultContext;
sendCallback.kinesisSink = kinesisSink;
sendCallback.startTime = startTime;
sendCallback.partitionedKey = partitionedKey;
sendCallback.data = data;
if (kinesisSink.kinesisSinkConfig.isRetainOrdering() && sendCallback.backoff == null) {
sendCallback.backoff = new Backoff(kinesisSink.kinesisSinkConfig.getRetryInitialDelayInMillis(),
TimeUnit.MILLISECONDS, kinesisSink.kinesisSinkConfig.getRetryMaxDelayInMillis(),
TimeUnit.MILLISECONDS, 0, TimeUnit.SECONDS);
}
return sendCallback;
}
private void recycle() {
resultContext = null;
kinesisSink = null;
startTime = 0;
if (backoff != null) {
backoff.reset();
}
partitionedKey = null;
data = null;
recyclerHandle.recycle(this);
}
private static final Recycler RECYCLER = new Recycler() {
@Override
protected ProducerSendCallback newObject(Handle handle) {
return new ProducerSendCallback(handle);
}
};
@Override
public void onSuccess(UserRecordResult result) {
if (LOG.isDebugEnabled()) {
LOG.debug("Successfully published message for {}-{} with latency {}",
kinesisSink.streamName, result.getShardId(),
TimeUnit.NANOSECONDS.toMillis((System.nanoTime() - startTime)));
}
if (kinesisSink.sinkContext != null) {
kinesisSink.sinkContext.recordMetric(METRICS_TOTAL_SUCCESS, 1);
}
kinesisSink.previousPublishFailed = FALSE;
this.resultContext.ack();
recycle();
}
@Override
public void onFailure(Throwable exception) {
if (exception instanceof UserRecordFailedException) {
// If the exception is UserRecordFailedException, we need to extract it to see real error messages.
UserRecordFailedException failedException = (UserRecordFailedException) exception;
StringBuffer stringBuffer = new StringBuffer();
failedException.getResult().getAttempts().forEach(attempt ->
stringBuffer.append(String.format("errorMessage:%s, errorCode:%s, delay:%d, duration:%d;",
attempt.getErrorMessage(), attempt.getErrorCode(), attempt.getDelay(), attempt.getDuration())));
LOG.error("[{}] Failed to published message for replicator of {}-{}: Attempts:{}",
kinesisSink.streamName, resultContext.getPartitionId(),
resultContext.getRecordSequence(), stringBuffer);
} else {
if (StringUtils.isEmpty(exception.getMessage())) {
LOG.error("[{}] Failed to published message for replicator of {}-{}", kinesisSink.streamName,
resultContext.getPartitionId(), resultContext.getRecordSequence(), exception);
} else {
LOG.error("[{}] Failed to published message for replicator of {}-{}, {} ", kinesisSink.streamName,
resultContext.getPartitionId(), resultContext.getRecordSequence(), exception.getMessage());
}
}
kinesisSink.previousPublishFailed = TRUE;
if (kinesisSink.sinkContext != null) {
kinesisSink.sinkContext.recordMetric(METRICS_TOTAL_FAILURE, 1);
}
if (backoff != null) {
long nextDelay = backoff.next();
LOG.info("[{}] Retry to publish message for replicator of {}-{} after {} ms.", kinesisSink.streamName,
resultContext.getPartitionId(), resultContext.getRecordSequence(), nextDelay);
kinesisSink.scheduledExecutor.schedule(() -> kinesisSink.sendUserRecord(this),
nextDelay, TimeUnit.MICROSECONDS);
} else {
recycle();
}
}
}
public ByteBuffer createKinesisMessage(MessageFormat msgFormat, Record record)
throws JsonProcessingException {
switch (msgFormat) {
case FULL_MESSAGE_IN_JSON:
return ByteBuffer.wrap(Utils.serializeRecordToJson(record).getBytes(StandardCharsets.UTF_8));
case FULL_MESSAGE_IN_FB:
return Utils.serializeRecordToFlatBuffer(record);
case FULL_MESSAGE_IN_JSON_EXPAND_VALUE:
return ByteBuffer.wrap(
Utils.serializeRecordToJsonExpandingValue(objectMapper, record,
kinesisSinkConfig.isJsonFlatten())
.getBytes(StandardCharsets.UTF_8));
default:
// send raw-message
return ByteBuffer.wrap(Utils.getMessage(record).getData());
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy