Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pulsar.functions.sink;
import com.google.common.annotations.VisibleForTesting;
import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.function.Function;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.pulsar.client.api.MessageId;
import org.apache.pulsar.client.api.Producer;
import org.apache.pulsar.client.api.PulsarClient;
import org.apache.pulsar.client.api.PulsarClientException;
import org.apache.pulsar.client.api.Schema;
import org.apache.pulsar.client.api.TypedMessageBuilder;
import org.apache.pulsar.client.api.schema.GenericRecord;
import org.apache.pulsar.client.api.schema.KeyValueSchema;
import org.apache.pulsar.client.impl.schema.AutoConsumeSchema;
import org.apache.pulsar.common.functions.ConsumerConfig;
import org.apache.pulsar.common.functions.FunctionConfig;
import org.apache.pulsar.common.schema.KeyValueEncodingType;
import org.apache.pulsar.common.schema.SchemaType;
import org.apache.pulsar.common.util.Reflections;
import org.apache.pulsar.functions.api.Record;
import org.apache.pulsar.functions.instance.AbstractSinkRecord;
import org.apache.pulsar.functions.instance.ProducerBuilderFactory;
import org.apache.pulsar.functions.instance.ProducerCache;
import org.apache.pulsar.functions.instance.stats.ComponentStatsManager;
import org.apache.pulsar.functions.source.PulsarRecord;
import org.apache.pulsar.functions.source.TopicSchema;
import org.apache.pulsar.io.core.Sink;
import org.apache.pulsar.io.core.SinkContext;
@Slf4j
public class PulsarSink implements Sink {
private final PulsarClient client;
private final PulsarSinkConfig pulsarSinkConfig;
private final Map properties;
private final ClassLoader functionClassLoader;
private ComponentStatsManager stats;
private final ProducerCache producerCache;
@VisibleForTesting
PulsarSinkProcessor pulsarSinkProcessor;
private final TopicSchema topicSchema;
private Schema schema;
private ProducerBuilderFactory producerBuilderFactory;
private interface PulsarSinkProcessor {
TypedMessageBuilder newMessage(AbstractSinkRecord record);
void sendOutputMessage(TypedMessageBuilder msg, AbstractSinkRecord record);
void close() throws Exception;
}
abstract class PulsarSinkProcessorBase implements PulsarSinkProcessor {
protected Producer getProducer(String destinationTopic, Schema schema) {
return getProducer(destinationTopic, schema, null, null);
}
protected Producer getProducer(String topicName, Schema schema, String producerName, String partitionId) {
return producerCache.getOrCreateProducer(ProducerCache.CacheArea.SINK_RECORD_CACHE, topicName, partitionId,
() -> {
Producer producer = createProducer(topicName, schema, producerName);
log.info(
"Initialized producer with name '{}' on topic '{}' with schema {} partitionId {} "
+ "-> {}",
producerName, topicName, schema, partitionId, producer);
return producer;
});
}
@Override
public void close() throws Exception {
// no op
}
public Function getPublishErrorHandler(AbstractSinkRecord record, boolean failSource) {
return throwable -> {
Record srcRecord = record.getSourceRecord();
if (failSource) {
srcRecord.fail();
}
String topic = record.getDestinationTopic().orElse(pulsarSinkConfig.getTopic());
String errorMsg;
if (srcRecord instanceof PulsarRecord) {
errorMsg = String.format("Failed to publish to topic [%s] with error [%s] with src message id [%s]",
topic, throwable.getMessage(), ((PulsarRecord) srcRecord).getMessageId());
} else {
errorMsg = String.format("Failed to publish to topic [%s] with error [%s]", topic,
throwable.getMessage());
if (record.getRecordSequence().isPresent()) {
errorMsg = String.format(errorMsg + " with src sequence id [%s]",
record.getRecordSequence().get());
}
}
log.error(errorMsg);
stats.incrSinkExceptions(new Exception(errorMsg));
return null;
};
}
}
@VisibleForTesting
class PulsarSinkAtMostOnceProcessor extends PulsarSinkProcessorBase {
public PulsarSinkAtMostOnceProcessor() {
if (!(schema instanceof AutoConsumeSchema)) {
// initialize default topic
getProducer(pulsarSinkConfig.getTopic(), schema);
} else {
if (log.isDebugEnabled()) {
log.debug("The Pulsar producer is not initialized until the first record is"
+ " published for `AUTO_CONSUME` schema.");
}
}
}
@Override
public TypedMessageBuilder newMessage(AbstractSinkRecord record) {
Schema schemaToWrite = record.getSchema();
if (!record.shouldSetSchema()) {
// we are receiving data directly from another Pulsar topic
// and the Function return type is not a Record
// we must use the destination topic schema
schemaToWrite = schema;
}
if (schemaToWrite != null) {
return getProducer(record
.getDestinationTopic()
.orElse(pulsarSinkConfig.getTopic()), schemaToWrite)
.newMessage(schemaToWrite);
} else {
return getProducer(record
.getDestinationTopic()
.orElse(pulsarSinkConfig.getTopic()), null)
.newMessage();
}
}
@Override
public void sendOutputMessage(TypedMessageBuilder msg, AbstractSinkRecord record) {
msg.sendAsync().thenAccept(messageId -> {
//no op
}).exceptionally(getPublishErrorHandler(record, false));
}
}
@VisibleForTesting
class PulsarSinkAtLeastOnceProcessor extends PulsarSinkAtMostOnceProcessor {
@Override
public void sendOutputMessage(TypedMessageBuilder msg, AbstractSinkRecord record) {
msg.sendAsync()
.thenAccept(messageId -> record.ack())
.exceptionally(getPublishErrorHandler(record, true));
}
}
@VisibleForTesting
class PulsarSinkManualProcessor extends PulsarSinkAtMostOnceProcessor {
@Override
public void sendOutputMessage(TypedMessageBuilder msg, AbstractSinkRecord record) {
super.sendOutputMessage(msg, record);
}
}
@VisibleForTesting
class PulsarSinkEffectivelyOnceProcessor extends PulsarSinkProcessorBase {
@Override
public TypedMessageBuilder newMessage(AbstractSinkRecord record) {
if (!record.getPartitionId().isPresent()) {
throw new RuntimeException(
"PartitionId needs to be specified for every record while in Effectively-once mode");
}
Schema schemaToWrite = record.getSchema();
if (!record.shouldSetSchema()) {
// we are receiving data directly from another Pulsar topic
// and the Function return type is not a Record
// we must use the destination topic schema
schemaToWrite = schema;
}
String topicName = record.getDestinationTopic().orElse(pulsarSinkConfig.getTopic());
String partitionId = record.getPartitionId().get();
String producerName = partitionId;
Producer producer = getProducer(topicName, schemaToWrite, producerName, partitionId);
if (schemaToWrite != null) {
return producer.newMessage(schemaToWrite);
} else {
return producer.newMessage();
}
}
@Override
public void sendOutputMessage(TypedMessageBuilder msg, AbstractSinkRecord record) {
if (!record.getRecordSequence().isPresent()) {
throw new RuntimeException(
"RecordSequence needs to be specified for every record while in Effectively-once mode");
}
// assign sequence id to output message for idempotent producing
msg.sequenceId(record.getRecordSequence().get());
CompletableFuture future = msg.sendAsync();
future.thenAccept(messageId -> record.ack()).exceptionally(getPublishErrorHandler(record, true));
}
}
public PulsarSink(PulsarClient client, PulsarSinkConfig pulsarSinkConfig, Map properties,
ComponentStatsManager stats, ClassLoader functionClassLoader, ProducerCache producerCache) {
this.client = client;
this.pulsarSinkConfig = pulsarSinkConfig;
this.topicSchema = new TopicSchema(client, functionClassLoader);
this.properties = properties;
this.stats = stats;
this.functionClassLoader = functionClassLoader;
this.producerCache = producerCache;
}
@Override
public void open(Map config, SinkContext sinkContext) throws Exception {
log.info("Opening pulsar sink with config: {}", pulsarSinkConfig);
schema = initializeSchema();
if (schema == null) {
log.info("Since output type is null, not creating any real sink");
return;
}
producerBuilderFactory =
new ProducerBuilderFactory(client, pulsarSinkConfig.getProducerConfig(), functionClassLoader, null);
FunctionConfig.ProcessingGuarantees processingGuarantees = this.pulsarSinkConfig.getProcessingGuarantees();
switch (processingGuarantees) {
case ATMOST_ONCE:
this.pulsarSinkProcessor = new PulsarSinkAtMostOnceProcessor();
break;
case ATLEAST_ONCE:
this.pulsarSinkProcessor = new PulsarSinkAtLeastOnceProcessor();
break;
case EFFECTIVELY_ONCE:
this.pulsarSinkProcessor = new PulsarSinkEffectivelyOnceProcessor();
break;
case MANUAL:
this.pulsarSinkProcessor = new PulsarSinkManualProcessor();
break;
}
}
@Override
public void write(Record record) {
AbstractSinkRecord sinkRecord = (AbstractSinkRecord) record;
TypedMessageBuilder msg = pulsarSinkProcessor.newMessage(sinkRecord);
if (record.getKey().isPresent() && !(record.getSchema() instanceof KeyValueSchema
&& ((KeyValueSchema) record.getSchema()).getKeyValueEncodingType() == KeyValueEncodingType.SEPARATED)) {
msg.key(record.getKey().get());
}
msg.value(record.getValue());
if (!record.getProperties().isEmpty()
&& (sinkRecord.shouldAlwaysSetMessageProperties() || pulsarSinkConfig.isForwardSourceMessageProperty())) {
msg.properties(record.getProperties());
}
if (sinkRecord.getSourceRecord() instanceof PulsarRecord) {
PulsarRecord pulsarRecord = (PulsarRecord) sinkRecord.getSourceRecord();
// forward user properties to sink-topic
msg.property("__pfn_input_topic__", pulsarRecord.getTopicName().get())
.property("__pfn_input_msg_id__",
new String(Base64.getEncoder().encode(pulsarRecord.getMessageId().toByteArray()),
StandardCharsets.UTF_8));
} else {
// It is coming from some source
Optional eventTime = sinkRecord.getSourceRecord().getEventTime();
eventTime.ifPresent(msg::eventTime);
}
pulsarSinkProcessor.sendOutputMessage(msg, sinkRecord);
}
@Override
public void close() throws Exception {
if (this.pulsarSinkProcessor != null) {
this.pulsarSinkProcessor.close();
}
}
Producer createProducer(String topicName, Schema schema, String producerName) {
Schema schemaToUse = schema != null ? schema : this.schema;
try {
log.info("Initializing producer {} on topic {} with schema {}", producerName, topicName, schemaToUse);
return producerBuilderFactory.createProducerBuilder(topicName, schemaToUse, producerName)
.properties(properties)
.create();
} catch (PulsarClientException e) {
throw new RuntimeException("Failed to create Producer for topic " + topicName
+ " producerName " + producerName + " schema " + schemaToUse, e);
}
}
@SuppressWarnings("unchecked")
@VisibleForTesting
Schema initializeSchema() throws ClassNotFoundException {
if (StringUtils.isEmpty(this.pulsarSinkConfig.getTypeClassName())) {
return (Schema) Schema.BYTES;
}
Class typeArg = Reflections.loadClass(this.pulsarSinkConfig.getTypeClassName(), functionClassLoader);
if (Void.class.equals(typeArg)) {
// return type is 'void', so there's no schema to check
return null;
}
ConsumerConfig consumerConfig = new ConsumerConfig();
consumerConfig.setSchemaProperties(pulsarSinkConfig.getSchemaProperties());
if (!StringUtils.isEmpty(pulsarSinkConfig.getSchemaType())) {
if (GenericRecord.class.isAssignableFrom(typeArg)) {
consumerConfig.setSchemaType(SchemaType.AUTO_CONSUME.toString());
SchemaType configuredSchemaType = SchemaType.valueOf(pulsarSinkConfig.getSchemaType());
if (SchemaType.AUTO_CONSUME != configuredSchemaType) {
log.info("The configured schema type {} is not able to write GenericRecords."
+ " So overwrite the schema type to be {}", configuredSchemaType, SchemaType.AUTO_CONSUME);
}
} else {
consumerConfig.setSchemaType(pulsarSinkConfig.getSchemaType());
}
return (Schema) topicSchema.getSchema(pulsarSinkConfig.getTopic(), typeArg,
consumerConfig, false);
} else {
consumerConfig.setSchemaType(pulsarSinkConfig.getSerdeClassName());
return (Schema) topicSchema.getSchema(pulsarSinkConfig.getTopic(), typeArg,
consumerConfig, false, functionClassLoader);
}
}
}