All Downloads are FREE. Search and download functionalities are using the official Maven repository.

co.cask.cdap.kafka.flow.KafkaConsumerFlowlet Maven / Gradle / Ivy

There is a newer version: 0.11.1
Show newest version
/*
 * Copyright © 2014-2015 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.kafka.flow;

import co.cask.cdap.api.annotation.Tick;
import co.cask.cdap.api.dataset.lib.KeyValueTable;
import co.cask.cdap.api.flow.flowlet.AbstractFlowlet;
import co.cask.cdap.api.flow.flowlet.FailurePolicy;
import co.cask.cdap.api.flow.flowlet.FailureReason;
import co.cask.cdap.api.flow.flowlet.Flowlet;
import co.cask.cdap.api.flow.flowlet.FlowletContext;
import co.cask.cdap.api.flow.flowlet.InputContext;
import com.google.common.base.Charsets;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.reflect.TypeToken;
import com.google.common.util.concurrent.Service;
import org.apache.twill.kafka.client.TopicPartition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.lang.reflect.GenericArrayType;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.TimeUnit;

/**
 *
 * Abstract base class for implementing consuming data from a Kafka cluster. This class serves as the generic base
 * to help in implementing a flowlet that can poll from a specific Kafka version. Users should be extending from one
 * of the sub-classes of this class.
 *
 * @param  Type of message key
 * @param  Type of message value
 * @param  Type of offset object
 */
public abstract class KafkaConsumerFlowlet extends AbstractFlowlet {

  private static final Logger LOG = LoggerFactory.getLogger(KafkaConsumerFlowlet.class);
  protected static final int SO_TIMEOUT = 5 * 1000;           // 5 seconds.

  private final Function, OFFSET> consumerToOffset =
    new Function, OFFSET>() {
    @Override
    public OFFSET apply(KafkaConsumerInfo input) {
      return input.getReadOffset();
    }
  };

  private Function keyDecoder;
  private Function payloadDecoder;
  private KafkaConfig kafkaConfig;
  private Map> consumerInfos;
  private Map> changedConsumerInfos;
  private int instances;

  /**
   * Initialize this {@link Flowlet}. Child class must call this method explicitly when overriding it.
   */
  @Override
  public void initialize(FlowletContext context) throws Exception {
    super.initialize(context);

    instances = context.getInstanceCount();

    // Tries to detect key and payload decoder based on the class parameterized type.
    Type superType = TypeToken.of(getClass()).getSupertype(KafkaConsumerFlowlet.class).getType();

    // Tries to detect Key and Payload type for creating decoder for them
    if (superType instanceof ParameterizedType) {
      // Extract Key and Payload types
      Type[] typeArgs = ((ParameterizedType) superType).getActualTypeArguments();

      // Parameter type arguments of AbstractKafkaConsumerFlowlet must be 2
      keyDecoder = createKeyDecoder(typeArgs[0]);
      payloadDecoder = createPayloadDecoder(typeArgs[1]);
    }

    // Configure kafka
    DefaultKafkaConfigurer kafkaConfigurer = new DefaultKafkaConfigurer();
    configureKafka(kafkaConfigurer);

    if (kafkaConfigurer.getZookeeper() == null && kafkaConfigurer.getBrokers() == null) {
      throw new IllegalStateException("Kafka not configured. Must provide either zookeeper or broker list.");
    }

    kafkaConfig = new KafkaConfig(kafkaConfigurer.getZookeeper(), kafkaConfigurer.getBrokers());
    consumerInfos = createConsumerInfos(kafkaConfigurer.getTopicPartitions());
    changedConsumerInfos = consumerInfos;
  }

  /**
   * A {@link Tick} method that triggered periodically by the Flow system to poll messages from Kafka.
   * The default poll delay is 100 milliseconds. This method can be overridden to provide different delay value.
   * 
* E.g. to override with 1 second instead: * *
   * {@literal @}Override
   * {@literal @}Tick(delay = 1, unit = TimeUnit.SECONDS)
   * public void pollMessages() {
   *   super.pollMessages();
   * }
   * 
*/ @Tick(delay = 100, unit = TimeUnit.MILLISECONDS) public void pollMessages() throws Exception { // Detect and handle instance count change if (instances != getContext().getInstanceCount()) { DefaultKafkaConfigurer kafkaConfigurer = new DefaultKafkaConfigurer(); handleInstancesChanged(kafkaConfigurer); changedConsumerInfos = Maps.newHashMap(consumerInfos); updateConsumerInfos(kafkaConfigurer.getTopicPartitions(), changedConsumerInfos); return; } boolean infosUpdated = false; // Poll for messages from Kafka for (KafkaConsumerInfo info : consumerInfos.values()) { Iterator> iterator = readMessages(info); while (iterator.hasNext()) { KafkaMessage message = iterator.next(); processMessage(message); // Update the read offset info.setReadOffset(message.getNextOffset()); } if (info.hasPendingChanges()) { infosUpdated = true; } } // Save new offset if there is at least one message processed, or even if the offset simply changed. if (infosUpdated) { saveReadOffsets(Maps.transformValues(consumerInfos, consumerToOffset)); } } @Override public void onSuccess(Object input, InputContext inputContext) { super.onSuccess(input, inputContext); // Input object is null for @Tick method if (input != null) { return; } for (KafkaConsumerInfo info : consumerInfos.values()) { info.commitReadOffset(); } if (getContext().getInstanceCount() != instances) { instances = getContext().getInstanceCount(); consumerInfos = ImmutableMap.copyOf(changedConsumerInfos); } } @Override public FailurePolicy onFailure(Object input, InputContext inputContext, FailureReason reason) { if (input == null) { for (KafkaConsumerInfo info : consumerInfos.values()) { info.rollbackReadOffset(); } } return FailurePolicy.RETRY; } /** * Override to return a {@link KeyValueTable} for storing consumer offsets. */ protected KeyValueTable getOffsetStore() { return null; } /** * Configure Kafka consumer. This method will be called during the {@link #initialize(FlowletContext)} phase, * hence it has access to {@link FlowletContext} through the {@link #getContext()} method. * * @param configurer for configuring consuming from Kafka */ protected abstract void configureKafka(KafkaConfigurer configurer); /** * Read messages from Kafka. * * @param consumerInfo Contains information about where to fetch messages from * @return An {@link Iterator} containing sequence of messages read from Kafka. The first message must * has offset no earlier than the {@link KafkaConsumerInfo#getReadOffset()} as given in the parameter. */ protected abstract Iterator> readMessages(KafkaConsumerInfo consumerInfo); /** * Returns the read offsets to start with for the given {@link TopicPartition}. */ protected abstract OFFSET getBeginOffset(TopicPartition topicPartition); /** * Persists read offsets for all topic-partition that this Flowlet consumes from Kafka. */ protected abstract void saveReadOffsets(Map offsets); /** * Override to handle changes in flowlet instances. Sub-class may do rebalancing of topic partition that it consumes. * * @param configurer for configuring consuming from Kafka */ protected void handleInstancesChanged(KafkaConsumerConfigurer configurer) { // No-op } /** * Returns a Kafka configuration. */ protected final KafkaConfig getKafkaConfig() { return kafkaConfig; } /** * Overrides this method if interested in the raw Kafka message. * * @param message The message fetched from Kafka. */ protected void processMessage(KafkaMessage message) throws Exception { processMessage(decodeKey(message.getKey()), decodePayload(message.getPayload())); } /** * Override this method if interested in both the key and payload of a message read from Kafka. * * @param key Key decoded from the message * @param payload Payload decoded from the message */ protected void processMessage(KEY key, PAYLOAD payload) throws Exception { processMessage(payload); } /** * Override this method if only interested in the payload of a message read from Kafka. * * @param payload Payload decoded from the message */ protected void processMessage(PAYLOAD payload) throws Exception { // No-op by default. } /** * Override this method to provide custom decoding of a message key. * * @param buffer The bytes representing the key in the Kafka message * @return The decoded key */ protected KEY decodeKey(ByteBuffer buffer) { return (keyDecoder != null) ? keyDecoder.apply(buffer) : null; } /** * Override this method to provide custom decoding of a message payload. * * @param buffer The bytes representing the payload in the Kafka message * @return The decoded payload */ protected PAYLOAD decodePayload(ByteBuffer buffer) { return (payloadDecoder != null) ? payloadDecoder.apply(buffer) : null; } /** * Stops a {@link Service} and waits for the completion. If there is exception during stop, it will get logged. */ protected final void stopService(Service service) { try { service.stopAndWait(); } catch (Throwable t) { LOG.error("Failed when stopping service {}", service, t); } } /** * Returns the key to be used when persisting offsets into a {@link KeyValueTable}. */ protected String getStoreKey(TopicPartition topicPartition) { return topicPartition.getTopic() + ":" + topicPartition.getPartition(); } /** * Creates a decoder for the key type. * * @param type type to decode to */ private Function createKeyDecoder(Type type) { return createDecoder(type, "No decoder for decoding message key"); } /** * Creates a decoder for the payload type. * * @param type type to decode to */ private Function createPayloadDecoder(Type type) { return createDecoder(type, "No decoder for decoding message payload"); } /** * Creates a decoder for decoding {@link ByteBuffer} for known type. It supports *

*

   * - String (assuming UTF-8)
   * - byte[]
   * - ByteBuffer
   * 
* * @param type type to decode to * @param failureDecodeMessage message for the exception if decoding of the given type is not supported * @param Type of the decoded type * @return A {@link Function} that decode {@link ByteBuffer} into the given type or a failure decoder created through * {@link #createFailureDecoder(String)} if the type is not support */ @SuppressWarnings("unchecked") private Function createDecoder(Type type, String failureDecodeMessage) { if (String.class.equals(type)) { return (Function) createStringDecoder(); } if (ByteBuffer.class.equals(type)) { return (Function) createByteBufferDecoder(); } if (byte[].class.equals(type) || (type instanceof GenericArrayType && byte.class.equals(((GenericArrayType) type).getGenericComponentType()))) { return (Function) createBytesDecoder(); } return createFailureDecoder(failureDecodeMessage); } /** * Creates a decoder that convert the input {@link ByteBuffer} into UTF-8 String. The input {@link ByteBuffer} * will not be consumed after the call. */ private Function createStringDecoder() { return new Function() { @Override public String apply(ByteBuffer input) { input.mark(); String result = Charsets.UTF_8.decode(input).toString(); input.reset(); return result; } }; } /** * Creates a decoder that returns the same input {@link ByteBuffer}. */ private Function createByteBufferDecoder() { return new Function() { @Override public ByteBuffer apply(ByteBuffer input) { return input; } }; } /** * Creates a decoder that reads {@link ByteBuffer} content and return it as {@code byte[]}. The * input {@link ByteBuffer} will not be consumed after the call. */ private Function createBytesDecoder() { return new Function() { @Override public byte[] apply(ByteBuffer input) { byte[] bytes = new byte[input.remaining()]; input.mark(); input.get(bytes); input.reset(); return bytes; } }; } /** * Creates a decoder that always decode fail by raising an {@link IllegalStateException}. */ private Function createFailureDecoder(final String failureMessage) { return new Function() { @Override public T apply(ByteBuffer input) { throw new IllegalStateException(failureMessage); } }; } private Map> createConsumerInfos(Map config) { ImmutableMap.Builder> consumers = ImmutableMap.builder(); for (Map.Entry entry : config.entrySet()) { consumers.put(entry.getKey(), new KafkaConsumerInfo<>(entry.getKey(), entry.getValue(), getBeginOffset(entry.getKey()))); } return consumers.build(); } private void updateConsumerInfos(final Map config, Map> consumerInfos) { // Remove consumer infos that are no longer needed. Iterables.removeIf(consumerInfos.entrySet(), new Predicate>>() { @Override public boolean apply(Map.Entry> input) { return !config.containsKey(input.getKey()); } }); // Add new topic partition for (Map.Entry entry : config.entrySet()) { TopicPartition topicPartition = entry.getKey(); int fetchSize = entry.getValue(); KafkaConsumerInfo info = consumerInfos.get(topicPartition); if (info != null) { // If the consumer info already exists, just update the fetch size if it changed if (info.getFetchSize() != fetchSize) { consumerInfos.put(topicPartition, new KafkaConsumerInfo<>(topicPartition, fetchSize, info.getReadOffset())); } } else { // Otherwise, create a new consumer info consumerInfos.put(topicPartition, new KafkaConsumerInfo<>(topicPartition, fetchSize, getBeginOffset(entry.getKey()))); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy