com.shapestone.event.stream.consumer.KinesisSparkJConsumer Maven / Gradle / Ivy
The newest version!
package com.shapestone.event.stream.consumer;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.profile.ProfileCredentialsProvider;
import com.amazonaws.regions.Region;
import com.amazonaws.services.kinesis.AmazonKinesisAsyncClient;
import org.apache.spark.SparkConf;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import javax.ws.rs.core.UriBuilder;
import java.util.List;
import java.util.regex.Pattern;
import static com.amazonaws.regions.RegionUtils.getRegionsForService;
import static com.amazonaws.services.kinesis.AmazonKinesis.ENDPOINT_PREFIX;
import static com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream.LATEST;
import static com.shapestone.event.stream.consumer.KinesisJHelper.readShards;
import static java.util.stream.Collectors.toList;
import static org.apache.spark.api.java.StorageLevels.MEMORY_AND_DISK_2;
import static org.apache.spark.streaming.kinesis.KinesisUtils.createStream;
/**
* Name: Michael Williams
* Date: 3/5/17.
*/
public class KinesisSparkJConsumer {
private String streamName;
private int batchIntervalInMillis;
private String endPointUrl;
private AmazonKinesisAsyncClient client;
private static final Pattern WORD_SEPARATOR = Pattern.compile(" ");
public KinesisSparkJConsumer(String streamName, int batchIntervalInMillis, String endPointUrl) {
this.streamName = streamName;
this.batchIntervalInMillis = batchIntervalInMillis;
this.client = new AmazonKinesisAsyncClient(new ProfileCredentialsProvider());
this.endPointUrl = endPointUrl;
}
public KinesisSparkJConsumer(String streamName, int batchIntervalInMillis, AWSCredentials awsCredentials) {
this.streamName = streamName;
this.batchIntervalInMillis = batchIntervalInMillis;
this.client = new AmazonKinesisAsyncClient(awsCredentials);
}
public void readStream() {
final String uri = UriBuilder.fromUri(endPointUrl).build().getHost();;
final String regionName = getRegionsForService(ENDPOINT_PREFIX)
.stream()
.filter(region -> region.getAvailableEndpoints().stream().filter(s -> s.equals(uri)).count() > 0)
.map(Region::getName)
.findFirst()
.orElseGet(() -> {
throw new IllegalArgumentException("Could not resolve region for endpoint:" + endPointUrl);
});
final SparkConf sparkConfig = new SparkConf().setMaster("local").setAppName("KinesisWordCountASL");
//final Duration batchDuration = new Duration(2000);
final Duration batchDuration = new Duration(batchIntervalInMillis);
final JavaStreamingContext jsc = new JavaStreamingContext(sparkConfig, batchDuration);
//noinspection CodeBlock2Expr
final List> dStreams = readShards(streamName, client).stream().map(shard -> {
return createStream(jsc, streamName, streamName, endPointUrl, regionName, LATEST, batchDuration, MEMORY_AND_DISK_2);
}).collect(toList());
JavaDStream unionStreams;
if (dStreams.isEmpty()) {
unionStreams = jsc.union(dStreams.get(0), dStreams.subList(1, dStreams.size()));
} else {
unionStreams = dStreams.get(0);
}
System.out.println("KinesisSparkJConsumer.readStream");
//JavaDStream words = unionStreams.
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy