io.cdap.plugin.kafka.connector.KafkaConnector Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of kafka-plugins-client Show documentation
The newest version!
/*
 * Copyright © 2021 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 *
 */

package io.cdap.plugin.kafka.connector;

import io.cdap.cdap.api.annotation.Category;
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.cdap.etl.api.batch.BatchSink;
import io.cdap.cdap.etl.api.batch.BatchSource;
import io.cdap.cdap.etl.api.connector.BrowseDetail;
import io.cdap.cdap.etl.api.connector.BrowseEntity;
import io.cdap.cdap.etl.api.connector.BrowseRequest;
import io.cdap.cdap.etl.api.connector.Connector;
import io.cdap.cdap.etl.api.connector.ConnectorContext;
import io.cdap.cdap.etl.api.connector.ConnectorSpec;
import io.cdap.cdap.etl.api.connector.ConnectorSpecRequest;
import io.cdap.cdap.etl.api.connector.DirectConnector;
import io.cdap.cdap.etl.api.connector.PluginSpec;
import io.cdap.cdap.etl.api.connector.SampleRequest;
import io.cdap.cdap.etl.api.validation.ValidationException;
import io.cdap.plugin.common.ConfigUtil;
import io.cdap.plugin.common.Constants;
import io.cdap.plugin.common.ReferenceNames;
import io.cdap.plugin.kafka.batch.source.KafkaBatchConfig;
import io.cdap.plugin.kafka.batch.source.KafkaBatchSource;
import io.cdap.plugin.kafka.sink.KafkaBatchSink;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;

/**
 * Kafka Connector
 */
@Plugin(type = Connector.PLUGIN_TYPE)
@Name(KafkaConnector.NAME)
@Category("Messaging Systems")
@Description("Connection to access data in Kafka topics.")
public class KafkaConnector implements DirectConnector {
  public static final String NAME = "Kafka";
  static final String TOPIC_TYPE = "topic";
  private static final long TIME_OUT_MS = 15000L;
  private static final String MESSAGE_FIELD = "message";
  private static final Schema DEFAULT_SCHEMA =
    Schema.recordOf("kafka", Schema.Field.of(MESSAGE_FIELD, Schema.of(Schema.Type.STRING)));
  private final KafkaConnectorConfig config;

  public KafkaConnector(KafkaConnectorConfig config) {
    this.config = config;
  }

  @Override
  public List sample(ConnectorContext connectorContext,
                                       SampleRequest sampleRequest) {
    String topic = cleanse(sampleRequest.getPath());
    if (topic.isEmpty()) {
      throw new IllegalArgumentException("Topic is not provided in the path");
    }

    int limit = sampleRequest.getLimit();
    try (KafkaConsumer consumer = getKafkaConsumer()) {
      consumer.subscribe(Collections.singleton(topic));
      List structuredRecords = new ArrayList<>();
      ConsumerRecords records = consumer.poll(TIME_OUT_MS);
      for (ConsumerRecord record : records) {
        if (structuredRecords.size() >= limit) {
          break;
        }

        structuredRecords.add(StructuredRecord.builder(DEFAULT_SCHEMA).set(MESSAGE_FIELD, record.value()).build());
      }
      return structuredRecords;
    }
  }

  @Override
  public void test(ConnectorContext connectorContext) throws ValidationException {
    try (KafkaConsumer consumer = getKafkaConsumer()) {
      consumer.listTopics();
    }
  }

  @Override
  public BrowseDetail browse(ConnectorContext connectorContext, BrowseRequest request) {
    String path = cleanse(request.getPath());
    int limit = request.getLimit() == null || request.getLimit() <= 0 ? Integer.MAX_VALUE : request.getLimit();
    BrowseDetail.Builder builder = BrowseDetail.builder();
    try (KafkaConsumer consumer = getKafkaConsumer()) {
      Set topics = consumer.listTopics().keySet();
      // not root, then it is topic layer, check if it exists and return
      if (!path.isEmpty()) {
        if (!topics.contains(path)) {
          return builder.build();
        }
        return builder.setTotalCount(1).addEntity(BrowseEntity.builder(path, path, TOPIC_TYPE)
                                                    .canSample(true).build()).build();
      }

      // set limit
      topics.stream().limit(limit)
        .forEach(topic -> builder.addEntity(BrowseEntity.builder(topic, topic, TOPIC_TYPE)
                                              .canSample(true).build()));
      builder.setTotalCount(topics.size());
      return builder.build();
    }
  }

  @Override
  public ConnectorSpec generateSpec(ConnectorContext connectorContext, ConnectorSpecRequest request) {
    Map properties = new HashMap<>();
    properties.put(ConfigUtil.NAME_USE_CONNECTION, "true");
    properties.put(ConfigUtil.NAME_CONNECTION, request.getConnectionWithMacro());
    properties.put(KafkaBatchConfig.FORMAT, "text");
    String topic = cleanse(request.getPath());
    if (!topic.isEmpty()) {
      properties.put(KafkaBatchConfig.TOPIC, topic);
      properties.put(Constants.Reference.REFERENCE_NAME, ReferenceNames.cleanseReferenceName(topic));
    }
    return ConnectorSpec.builder().setSchema(DEFAULT_SCHEMA)
      .addRelatedPlugin(new PluginSpec(KafkaBatchSource.NAME, BatchSource.PLUGIN_TYPE, properties))
      .addRelatedPlugin(new PluginSpec(KafkaBatchSink.NAME, BatchSink.PLUGIN_TYPE, properties))
      .build();
  }

  private KafkaConsumer getKafkaConsumer() {
    Properties props = new Properties();
    props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, config.getKafkaBrokers());
    props.put(ConsumerConfig.CLIENT_ID_CONFIG, UUID.randomUUID().toString());
    props.put(ConsumerConfig.GROUP_ID_CONFIG, UUID.randomUUID().toString());
    props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
    props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
    props.put(ConsumerConfig.EXCLUDE_INTERNAL_TOPICS_CONFIG, "true");
    props.put(ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG, String.valueOf(TIME_OUT_MS));

    // kafka will first use thread classloader to load the serializer, this might not contain the serializer,
    // have to set the plugin class loader for it
    ClassLoader cl = Thread.currentThread().getContextClassLoader();
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
    try {
      return new KafkaConsumer<>(props);
    } finally {
      Thread.currentThread().setContextClassLoader(cl);
    }
  }

  private String cleanse(String path) {
    String result = path;
    // remove leading and trailing "/"
    result = result.replaceAll("^/+", "").replaceAll("/+$", "");
    if (result.contains("/")) {
      throw new IllegalArgumentException(String.format("Path %s is invalid, it should only be at root level or " +
                                                         "contain just the topic", path));
    }
    return result;
  }
}