All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.shapestone.event.stream.consumer.KinesisSparkJConsumer Maven / Gradle / Ivy

The newest version!
package com.shapestone.event.stream.consumer;

import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.profile.ProfileCredentialsProvider;
import com.amazonaws.regions.Region;
import com.amazonaws.services.kinesis.AmazonKinesisAsyncClient;
import org.apache.spark.SparkConf;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;

import javax.ws.rs.core.UriBuilder;

import java.util.List;
import java.util.regex.Pattern;

import static com.amazonaws.regions.RegionUtils.getRegionsForService;
import static com.amazonaws.services.kinesis.AmazonKinesis.ENDPOINT_PREFIX;
import static com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream.LATEST;
import static com.shapestone.event.stream.consumer.KinesisJHelper.readShards;
import static java.util.stream.Collectors.toList;
import static org.apache.spark.api.java.StorageLevels.MEMORY_AND_DISK_2;
import static org.apache.spark.streaming.kinesis.KinesisUtils.createStream;

/**
 * Name: Michael Williams
 * Date: 3/5/17.
 */
public class KinesisSparkJConsumer {

  private String streamName;
  private int batchIntervalInMillis;
  private String endPointUrl;
  private AmazonKinesisAsyncClient client;

  private static final Pattern WORD_SEPARATOR = Pattern.compile(" ");

  public KinesisSparkJConsumer(String streamName, int batchIntervalInMillis, String endPointUrl) {
    this.streamName = streamName;
    this.batchIntervalInMillis = batchIntervalInMillis;
    this.client = new AmazonKinesisAsyncClient(new ProfileCredentialsProvider());
    this.endPointUrl = endPointUrl;
  }

  public KinesisSparkJConsumer(String streamName, int batchIntervalInMillis, AWSCredentials awsCredentials) {
    this.streamName = streamName;
    this.batchIntervalInMillis = batchIntervalInMillis;
    this.client = new AmazonKinesisAsyncClient(awsCredentials);
  }

  public void readStream() {
    final String uri = UriBuilder.fromUri(endPointUrl).build().getHost();;

    final String regionName = getRegionsForService(ENDPOINT_PREFIX)
      .stream()
      .filter(region -> region.getAvailableEndpoints().stream().filter(s -> s.equals(uri)).count() > 0)
      .map(Region::getName)
      .findFirst()
      .orElseGet(() -> {
        throw new IllegalArgumentException("Could not resolve region for endpoint:" + endPointUrl);
      });

    final SparkConf sparkConfig = new SparkConf().setMaster("local").setAppName("KinesisWordCountASL");
    //final Duration batchDuration = new Duration(2000);
    final Duration batchDuration = new Duration(batchIntervalInMillis);
    final JavaStreamingContext jsc = new JavaStreamingContext(sparkConfig, batchDuration);

    //noinspection CodeBlock2Expr
    final List> dStreams = readShards(streamName, client).stream().map(shard -> {
      return createStream(jsc, streamName, streamName, endPointUrl, regionName, LATEST, batchDuration, MEMORY_AND_DISK_2);
    }).collect(toList());

    JavaDStream unionStreams;
    if (dStreams.isEmpty()) {
      unionStreams = jsc.union(dStreams.get(0), dStreams.subList(1, dStreams.size()));
    } else {
      unionStreams = dStreams.get(0);
    }


    System.out.println("KinesisSparkJConsumer.readStream");
    //JavaDStream words = unionStreams.


  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy