All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.samza.system.eventhub.producer.EventHubSystemProducer Maven / Gradle / Ivy

Go to download

A distributed stream processing framework built upon Apache Kafka and Apache Hadoop YARN.

There is a newer version: 1.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.samza.system.eventhub.producer;

import java.nio.charset.Charset;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import org.apache.commons.lang3.Validate;
import org.apache.samza.SamzaException;
import org.apache.samza.metrics.Counter;
import org.apache.samza.metrics.MetricsRegistry;
import org.apache.samza.system.OutgoingMessageEnvelope;
import org.apache.samza.system.eventhub.EventHubClientManager;
import org.apache.samza.system.eventhub.EventHubClientManagerFactory;
import org.apache.samza.system.eventhub.EventHubConfig;
import org.apache.samza.system.eventhub.Interceptor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.annotations.VisibleForTesting;
import com.microsoft.azure.eventhubs.EventData;
import com.microsoft.azure.eventhubs.EventHubClient;
import com.microsoft.azure.eventhubs.EventHubException;
import com.microsoft.azure.eventhubs.PartitionSender;
import com.microsoft.azure.eventhubs.impl.ClientConstants;
import com.microsoft.azure.eventhubs.impl.EventDataImpl;


/**
 * EventHub system producer that can be used in Samza jobs to send events to Azure EventHubs
 */
public class EventHubSystemProducer extends AsyncSystemProducer {
  private static final Logger LOG = LoggerFactory.getLogger(EventHubSystemProducer.class.getName());
  private static final long DEFAULT_SHUTDOWN_TIMEOUT_MILLIS = Duration.ofMinutes(1L).toMillis();

  public static final String PRODUCE_TIMESTAMP = "produce-timestamp";
  public static final String KEY = "key";

  // Metrics recording
  private static final String EVENT_SKIP_RATE = "eventSkipRate";
  private static final String EVENT_WRITE_RATE = "eventWriteRate";
  private static final String EVENT_BYTE_WRITE_RATE = "eventByteWriteRate";

  private static final Object AGGREGATE_METRICS_LOCK = new Object();

  public enum PartitioningMethod {
    ROUND_ROBIN, EVENT_HUB_HASHING, PARTITION_KEY_AS_PARTITION
  }

  /**
   * Per stream metrics. Key is the stream Name.
   */
  private final HashMap eventSkipRate = new HashMap<>();
  private final HashMap eventWriteRate = new HashMap<>();
  private final HashMap eventByteWriteRate = new HashMap<>();

  /**
   * Aggregated metrics.
   */
  private static Counter aggEventSkipRate = null;
  private static Counter aggEventWriteRate = null;
  private static Counter aggEventByteWriteRate = null;

  private final EventHubConfig config;
  private final PartitioningMethod partitioningMethod;
  private final String systemName;
  private final int maxMessageSize;

  private volatile boolean isStarted = false;

  // We implement lazy initialization for producer as a workaround for
  // slow shutdown issue.
  private boolean isInitialized = false;

  /**
   * Per partition event hub client. Partitions from the same stream may share the same client,
   * depends on config PerPartitionConnection. See {@link EventHubConfig}
   */
  @VisibleForTesting
  final Map> perPartitionEventHubClients = new HashMap<>();

  private final Map perStreamEventHubClientManagers = new HashMap<>();

  /**
   * PartitionSender for each partition in the stream.
   */
  private final Map> streamPartitionSenders = new HashMap<>();

  /**
   * Per stream message interceptors
   */
  private final Map interceptors;

  private final EventHubClientManagerFactory eventHubClientManagerFactory;

  public EventHubSystemProducer(EventHubConfig config, String systemName,
      EventHubClientManagerFactory eventHubClientManagerFactory, Map interceptors,
      MetricsRegistry registry) {
    super(systemName, config, registry);
    LOG.info("Creating EventHub Producer for system {}", systemName);
    this.config = config;
    this.systemName = systemName;
    this.partitioningMethod = config.getPartitioningMethod(systemName);
    this.interceptors = interceptors;
    this.maxMessageSize = config.getSkipMessagesLargerThan(systemName);
    this.eventHubClientManagerFactory = eventHubClientManagerFactory;
  }

  private void init() {
    LOG.info("Initializing EventHubSystemProducer");
    // Fetches the stream ids
    List streamIds = config.getStreams(systemName);
    // Create and initiate connections to Event Hubs
    // even if PerPartitionConnection == true, we still need a stream level event hub for initial metadata (fetching
    // partition count)
    for (String streamId : streamIds) {
      EventHubClientManager ehClient =
          eventHubClientManagerFactory.getEventHubClientManager(systemName, streamId, config);
      perStreamEventHubClientManagers.put(streamId, ehClient);
      ehClient.init();
    }

    // Create partition senders if required
    if (PartitioningMethod.PARTITION_KEY_AS_PARTITION.equals(partitioningMethod)) {
      // Create all partition senders
      perStreamEventHubClientManagers.forEach((streamId, samzaEventHubClient) -> {
          EventHubClient ehClient = samzaEventHubClient.getEventHubClient();

          try {
            Map partitionSenders = new HashMap<>();
            long timeoutMs = config.getRuntimeInfoWaitTimeMS(systemName);
            Integer numPartitions =
                ehClient.getRuntimeInformation().get(timeoutMs, TimeUnit.MILLISECONDS).getPartitionCount();

            for (int i = 0; i < numPartitions; i++) {
              String partitionId = String.valueOf(i);
              EventHubClientManager perPartitionClientManager =
                  createOrGetEventHubClientManagerForPartition(streamId, i);
              PartitionSender partitionSender =
                  perPartitionClientManager.getEventHubClient().createPartitionSenderSync(partitionId);
              partitionSenders.put(i, partitionSender);
            }

            streamPartitionSenders.put(streamId, partitionSenders);
          } catch (InterruptedException | ExecutionException | TimeoutException e) {
            String msg = "Failed to fetch number of Event Hub partitions for partition sender creation";
            throw new SamzaException(msg, e);
          } catch (EventHubException | IllegalArgumentException e) {
            String msg = "Creation of partition sender failed with exception";
            throw new SamzaException(msg, e);
          }
        });
    }
    isInitialized = true;
    LOG.info("EventHubSystemProducer initialized.");
  }

  @Override
  public synchronized void register(String source) {
    LOG.info("Registering source {}", source);
    if (isStarted) {
      String msg = "Cannot register once the producer is started.";
      throw new SamzaException(msg);
    }
  }

  private EventHubClientManager createOrGetEventHubClientManagerForPartition(String streamId, int partitionId) {
    Map perStreamMap =
        perPartitionEventHubClients.computeIfAbsent(streamId, key -> new HashMap<>());
    EventHubClientManager eventHubClientManager;
    if (config.getPerPartitionConnection(systemName)) {
      // will create one EventHub client per partition
      if (perStreamMap.containsKey(partitionId)) {
        LOG.warn(String.format("Trying to create new EventHubClientManager for partition=%d. But one already exists",
            partitionId));
        eventHubClientManager = perStreamMap.get(partitionId);
      } else {
        LOG.info(
            String.format("Creating EventHub client manager for streamId=%s, partitionId=%d: ", streamId, partitionId));
        eventHubClientManager = eventHubClientManagerFactory.getEventHubClientManager(systemName, streamId, config);
        eventHubClientManager.init();
        perStreamMap.put(partitionId, eventHubClientManager);
      }
    } else {
      // will share one EventHub client per stream
      eventHubClientManager = perStreamEventHubClientManagers.get(streamId);
      perStreamMap.put(partitionId, eventHubClientManager);
    }
    Validate.notNull(eventHubClientManager,
        String.format("Fail to create or get EventHubClientManager for streamId=%s, partitionId=%d", streamId,
            partitionId));
    return eventHubClientManager;
  }

  @Override
  public synchronized void start() {
    super.start();
    LOG.info("Starting system producer.");

    // Initiate metrics
    streamIds.forEach((streamId) -> {
        eventSkipRate.put(streamId, metricsRegistry.newCounter(streamId, EVENT_SKIP_RATE));
        eventWriteRate.put(streamId, metricsRegistry.newCounter(streamId, EVENT_WRITE_RATE));
        eventByteWriteRate.put(streamId, metricsRegistry.newCounter(streamId, EVENT_BYTE_WRITE_RATE));
      });

    // Locking to ensure that these aggregated metrics will be created only once across multiple system producers.
    synchronized (AGGREGATE_METRICS_LOCK) {
      if (aggEventWriteRate == null) {
        aggEventSkipRate = metricsRegistry.newCounter(AGGREGATE, EVENT_SKIP_RATE);
        aggEventWriteRate = metricsRegistry.newCounter(AGGREGATE, EVENT_WRITE_RATE);
        aggEventByteWriteRate = metricsRegistry.newCounter(AGGREGATE, EVENT_BYTE_WRITE_RATE);
      }
    }

    isStarted = true;
  }

  @Override
  public synchronized void flush(String source) {
    super.flush(source);
  }

  @Override
  public synchronized CompletableFuture sendAsync(String source, OutgoingMessageEnvelope envelope) {
    LOG.debug(String.format("Trying to send %s", envelope));
    if (!isStarted) {
      throw new SamzaException("Trying to call send before the producer is started.");
    }
    if (!isInitialized) {
      // lazy initialization on the first send
      init();
    }

    String streamId = config.getStreamId(envelope.getSystemStream().getStream());

    if (!perStreamEventHubClientManagers.containsKey(streamId)) {
      String msg = String.format("Trying to send event to a destination {%s} that is not registered.", streamId);
      throw new SamzaException(msg);
    }

    EventData eventData = createEventData(streamId, envelope);
    // SAMZA-1654: waiting for the client library to expose the API to calculate the exact size of the AMQP message
    // https://github.com/Azure/azure-event-hubs-java/issues/305
    int eventDataLength = eventData.getBytes() == null ? 0 : eventData.getBytes().length;

    // If the maxMessageSize is lesser than zero, then it means there is no message size restriction.
    if (this.maxMessageSize > 0 && eventDataLength > this.maxMessageSize) {
      LOG.info("Received a message with size {} > maxMessageSize configured {(}), Skipping it", eventDataLength,
          this.maxMessageSize);
      eventSkipRate.get(streamId).inc();
      aggEventSkipRate.inc();
      return CompletableFuture.completedFuture(null);
    }

    eventWriteRate.get(streamId).inc();
    aggEventWriteRate.inc();
    eventByteWriteRate.get(streamId).inc(eventDataLength);
    aggEventByteWriteRate.inc(eventDataLength);
    EventHubClientManager ehClient = perStreamEventHubClientManagers.get(streamId);

    // Async send call
    return sendToEventHub(streamId, eventData, getEnvelopePartitionId(envelope), ehClient.getEventHubClient());
  }

  private CompletableFuture sendToEventHub(String streamId, EventData eventData, Object partitionKey,
      EventHubClient eventHubClient) {
    if (PartitioningMethod.ROUND_ROBIN.equals(partitioningMethod)) {
      return eventHubClient.send(eventData);
    } else if (PartitioningMethod.EVENT_HUB_HASHING.equals(partitioningMethod)) {
      if (partitionKey == null) {
        throw new SamzaException("Partition key cannot be null for EventHub hashing");
      }
      return eventHubClient.send(eventData, convertPartitionKeyToString(partitionKey));
    } else if (PartitioningMethod.PARTITION_KEY_AS_PARTITION.equals(partitioningMethod)) {
      if (!(partitionKey instanceof Integer)) {
        String msg = "Partition key should be of type Integer";
        throw new SamzaException(msg);
      }

      Integer numPartition = streamPartitionSenders.get(streamId).size();
      Integer destinationPartition = (Integer) partitionKey % numPartition;

      PartitionSender sender = streamPartitionSenders.get(streamId).get(destinationPartition);
      return sender.send(eventData);
    } else {
      throw new SamzaException("Unknown partitioning method " + partitioningMethod);
    }
  }

  protected Object getEnvelopePartitionId(OutgoingMessageEnvelope envelope) {
    return envelope.getPartitionKey() == null ? envelope.getKey() : envelope.getPartitionKey();
  }

  private String convertPartitionKeyToString(Object partitionKey) {
    String partitionKeyStr;
    if (partitionKey instanceof String) {
      partitionKeyStr = (String) partitionKey;
    } else if (partitionKey instanceof Integer) {
      partitionKeyStr = String.valueOf(partitionKey);
    } else if (partitionKey instanceof byte[]) {
      partitionKeyStr = new String((byte[]) partitionKey, Charset.defaultCharset());
    } else {
      throw new SamzaException("Unsupported key type: " + partitionKey.getClass().toString());
    }
    if (partitionKeyStr != null && partitionKeyStr.length() > ClientConstants.MAX_PARTITION_KEY_LENGTH) {
      LOG.debug("Length of partition key: {} exceeds limit: {}. Truncating.", partitionKeyStr.length(),
          ClientConstants.MAX_PARTITION_KEY_LENGTH);
      partitionKeyStr = partitionKeyStr.substring(0, ClientConstants.MAX_PARTITION_KEY_LENGTH);
    }
    return partitionKeyStr;
  }

  protected EventData createEventData(String streamId, OutgoingMessageEnvelope envelope) {
    Optional interceptor = Optional.ofNullable(interceptors.getOrDefault(streamId, null));
    byte[] eventValue = (byte[]) envelope.getMessage();
    if (interceptor.isPresent()) {
      eventValue = interceptor.get().intercept(eventValue);
    }

    EventData eventData = new EventDataImpl(eventValue);

    eventData.getProperties().put(PRODUCE_TIMESTAMP, Long.toString(System.currentTimeMillis()));

    if (config.getSendKeyInEventProperties(systemName)) {
      String keyValue = "";
      if (envelope.getKey() != null) {
        keyValue = (envelope.getKey() instanceof byte[]) ? new String((byte[]) envelope.getKey())
            : envelope.getKey().toString();
      }
      eventData.getProperties().put(KEY, keyValue);
    }
    return eventData;
  }

  @Override
  public synchronized void stop() {
    LOG.info("Stopping producer.");
    streamPartitionSenders.values().forEach((streamPartitionSender) -> {
        List> futures = new ArrayList<>();
        streamPartitionSender.forEach((key, value) -> futures.add(value.close()));
        CompletableFuture future = CompletableFuture.allOf(futures.toArray(new CompletableFuture[futures.size()]));
        try {
          future.get(DEFAULT_SHUTDOWN_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS);
        } catch (ExecutionException | InterruptedException | TimeoutException e) {
          LOG.error("Closing the partition sender failed ", e);
        }
      });
    perStreamEventHubClientManagers.values()
        .parallelStream()
        .forEach(ehClient -> ehClient.close(DEFAULT_SHUTDOWN_TIMEOUT_MILLIS));
    perStreamEventHubClientManagers.clear();
    if (config.getPerPartitionConnection(systemName)) {
      perPartitionEventHubClients.values()
          .stream()
          .flatMap(map -> map.values().stream())
          .forEach(ehClient -> ehClient.close(DEFAULT_SHUTDOWN_TIMEOUT_MILLIS));
      perPartitionEventHubClients.clear();
    }
    isStarted = false;
    isInitialized = false;
    LOG.info("EventHubSystemProducer stopped.");
  }

  /**
   * Gets pending futures. Used by the test.
   *
   * @return the pending futures
   */
  Collection> getPendingFutures() {
    return pendingFutures;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy