All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.antgroup.geaflow.dsl.connector.pulsar.PulsarTableSource Maven / Gradle / Ivy

The newest version!
package com.antgroup.geaflow.dsl.connector.pulsar;

import com.antgroup.geaflow.api.context.RuntimeContext;
import com.antgroup.geaflow.common.config.Configuration;
import com.antgroup.geaflow.common.config.keys.DSLConfigKeys;
import com.antgroup.geaflow.common.utils.DateTimeUtil;
import com.antgroup.geaflow.dsl.common.exception.GeaFlowDSLException;
import com.antgroup.geaflow.dsl.common.types.TableSchema;
import com.antgroup.geaflow.dsl.common.util.Windows;
import com.antgroup.geaflow.dsl.connector.api.FetchData;
import com.antgroup.geaflow.dsl.connector.api.Offset;
import com.antgroup.geaflow.dsl.connector.api.Partition;
import com.antgroup.geaflow.dsl.connector.api.TableSource;
import com.antgroup.geaflow.dsl.connector.api.serde.DeserializerFactory;
import com.antgroup.geaflow.dsl.connector.api.serde.TableDeserializer;
import com.antgroup.geaflow.dsl.connector.api.util.ConnectorConstants;
import com.antgroup.geaflow.dsl.connector.pulsar.utils.PulsarConstants;

import java.io.IOException;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

import org.apache.pulsar.client.api.*;
import org.apache.pulsar.common.naming.TopicName;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class PulsarTableSource implements TableSource {

    private static final Logger LOGGER = LoggerFactory.getLogger(PulsarTableSource.class);
    private Configuration tableConf;
    private String servers;
    private String topic;
    private SubscriptionType subscribeType;
    private int receiverQueueSize;
    private int negativeAckRedeliveryDelay;
    private TimeUnit timeUnit;
    private String subscriptionName;
    private SubscriptionInitialPosition subscriptionInitialPosition;
    private long windowSize;

    private transient PulsarClient pulsarClient;

    private transient Map> consumers;

    @Override
    public void init(Configuration conf, TableSchema tableSchema) {
        this.tableConf = conf;
        String port = conf.getString(PulsarConfigKeys.GEAFLOW_DSL_PULSAR_PORT);
        String[] serversAddress = conf.getString(PulsarConfigKeys.GEAFLOW_DSL_PULSAR_SERVERS).split(",");
        servers = "pulsar://" + String.join(":" + port + ",", serversAddress);
        topic = conf.getString(PulsarConfigKeys.GEAFLOW_DSL_PULSAR_TOPIC);
        subscriptionName = conf.getString(PulsarConfigKeys.GEAFLOW_DSL_PULSAR_SUBSCRIBE_NAME);

        String position = conf.getString(PulsarConfigKeys.GEAFLOW_DSL_PULSAR_SUBSCRIBE_INITIAL_POSITION);
        if (position.equals("earliest")) {
            this.subscriptionInitialPosition = SubscriptionInitialPosition.Earliest;
        } else if (position.equals("latest")) {
            this.subscriptionInitialPosition = SubscriptionInitialPosition.Latest;
        } else {
            throw new GeaFlowDSLException("Invalid subscription initial position: {}", position);
        }
        subscribeType = PulsarConstants.PULSAR_SUBSCRIBE_TYPE;
        negativeAckRedeliveryDelay = PulsarConstants.PULSAR_NEGATIVE_ACK_REDELIVERY;
        timeUnit = PulsarConstants.PULSAR_NEGATIVE_ACK_REDELIVERY_UNIT;
        receiverQueueSize = PulsarConstants.PULSAR_RECEIVER_QUEUE_SIZE;

        this.windowSize = conf.getLong(DSLConfigKeys.GEAFLOW_DSL_WINDOW_SIZE);
        if (this.windowSize == Windows.SIZE_OF_ALL_WINDOW) {
            throw new GeaFlowDSLException("Pulsar cannot support all window");
        } else if (windowSize <= 0) {
            throw new GeaFlowDSLException("Invalid window size: {}", windowSize);
        }
    }

    private void createPulsarClient() {
        try {
            this.pulsarClient = PulsarClient.builder().serviceUrl(this.servers).build();
        } catch (PulsarClientException e) {
            throw new GeaFlowDSLException(" fail to create pulsar client, exception is {}", e);
        }
    }

    private Consumer createPulsarConsumer(String partitionName) {
        Consumer consumer;
        try {
            consumer = pulsarClient.newConsumer(Schema.STRING)
                    .topic(partitionName)
                    .subscriptionName(subscriptionName)
                    .subscriptionType(subscribeType)
                    .subscriptionInitialPosition(subscriptionInitialPosition)
                    .negativeAckRedeliveryDelay(negativeAckRedeliveryDelay, timeUnit)
                    .batchReceivePolicy(new BatchReceivePolicy.Builder().maxNumMessages((int) windowSize).build())
                    .receiverQueueSize(receiverQueueSize)
                    .subscribe();
        } catch (PulsarClientException e) {
            throw new GeaFlowDSLException("fail to create pulsar consumer, topic name is {}", partitionName);
        }
        return consumer;
    }

    @Override
    public void open(RuntimeContext context) {
        createPulsarClient();
        consumers = new HashMap<>();
        LOGGER.info("pulsar client created successfully");
    }

    @Override
    public List listPartitions() {
        List partitionNameList;
        try {
            partitionNameList = pulsarClient.getPartitionsForTopic(topic).get();
        } catch (InterruptedException | ExecutionException e) {
            throw new GeaFlowDSLException("get partitions for topic fail, the topic is {}", topic);
        }
        if (partitionNameList == null) {
            throw new GeaFlowDSLException("Obtain an empty partition list through pulsarClient, the topic name is",
                    topic);
        }
        return  partitionNameList.stream().map(PulsarPartition::new)
                .collect(Collectors.toList());
    }

    @Override
    public  TableDeserializer getDeserializer(Configuration conf) {
        return DeserializerFactory.loadTextDeserializer();
    }

    @Override
    public  FetchData fetch(Partition partition, Optional startOffset,
                                  long newWindowSize) throws IOException {

        if (newWindowSize == Windows.SIZE_OF_ALL_WINDOW) {
            throw new GeaFlowDSLException("Pulsar cannot support all window");
        } else if (newWindowSize <= 0) {
            throw new GeaFlowDSLException("Invalid window size: {}", newWindowSize);
        }
        windowSize = newWindowSize;
        String partitionName = partition.getName();
        Consumer consumer = consumers.get(partitionName);
        if (consumer == null) {
            consumer = createPulsarConsumer(partitionName);
            consumers.put(partitionName, consumer);
        }
        assert consumer != null;
        PulsarOffset pulsarOffset;

        boolean isTimeStamp = false;
        if (startOffset.isPresent()) {
            pulsarOffset = (PulsarOffset) startOffset.get();
            if (pulsarOffset.isTimestamp()) {
                consumer.seek(pulsarOffset.getOffset());
                isTimeStamp = true;
            } else {
                consumer.seek(pulsarOffset.getMessageId());
            }
        }

        List dataList = new ArrayList<>();
        Messages messages = consumer.batchReceive();
        Iterator> iterator = messages.iterator();
        long timeOffset = 0L;
        MessageId lastMessageId = MessageId.earliest;
        while (iterator.hasNext()) {
            Message message = iterator.next();
            dataList.add(message.getValue());
            timeOffset = message.getPublishTime();
            lastMessageId = message.getMessageId();
            LOGGER.info("receive message: " + message.getValue());
            LOGGER.info("object address is: " + this.hashCode());

        }

        PulsarOffset newOffset;
        if (isTimeStamp) {
            newOffset = new PulsarOffset(timeOffset);
        } else {
            newOffset = new PulsarOffset(lastMessageId);
        }
        TopicName.get(topic).getPartitionedTopicName();

        return (FetchData) FetchData.createStreamFetch(dataList, newOffset, false);
    }

    @Override
    public void close() {
        try {
            pulsarClient.close();
        } catch (PulsarClientException e) {
            throw new GeaFlowDSLException("fail to close pulsar client, the exception is {}", e);
        }
        LOGGER.info("close pulsar client");

    }

    public static class PulsarPartition implements Partition {

        private final String topicWithPartition;
        
        public PulsarPartition(String topicWithPartition) {
            this.topicWithPartition = topicWithPartition;
        }

        @Override
        public String getName() {
            return topicWithPartition;
        }

        @Override
        public int hashCode() {
            return Objects.hash(topicWithPartition);
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (!(o instanceof PulsarPartition)) {
                return false;
            }
            PulsarPartition that = (PulsarPartition) o;
            return Objects.equals(topicWithPartition, that.topicWithPartition);
        }
    }

    public static class PulsarOffset implements Offset {
        private final MessageId messageId;
        private final long timeStamp;

        public PulsarOffset(MessageId messageId) {
            this.messageId = messageId;
            timeStamp = 0L;
        }

        public PulsarOffset(long timeStamp) {
            this.messageId = null;
            this.timeStamp = timeStamp;
        }

        @Override
        public String humanReadable() {
            return DateTimeUtil.fromUnixTime(timeStamp, ConnectorConstants.START_TIME_FORMAT);
        }

        @Override
        public long getOffset() {
            return timeStamp;
        }

        @Override
        public boolean isTimestamp() {
            return messageId == null;
        }

        public MessageId getMessageId() {
            return messageId;
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy