
com.qubole.presto.kinesis.KinesisSplitManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of kinesis Show documentation
Show all versions of kinesis Show documentation
Presto - Kinesis Connector
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.qubole.presto.kinesis;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.inject.Named;
import com.amazonaws.services.kinesis.model.DescribeStreamRequest;
import com.amazonaws.services.kinesis.model.DescribeStreamResult;
import com.amazonaws.services.kinesis.model.ResourceNotFoundException;
import com.amazonaws.services.kinesis.model.Shard;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorSplit;
import com.facebook.presto.spi.ConnectorSplitSource;
import com.facebook.presto.spi.ConnectorTableLayoutHandle;
import com.facebook.presto.spi.FixedSplitSource;
import com.facebook.presto.spi.connector.ConnectorSplitManager;
import com.facebook.presto.spi.connector.ConnectorTransactionHandle;
import com.google.common.collect.ImmutableList;
import com.google.inject.Inject;
/**
*
* Split data chunk from kinesis Stream to multiple small chunks for parallelization and distribution to multiple Presto workers.
* By default, each shard of Kinesis Stream forms one Kinesis Split
*
*/
public class KinesisSplitManager
implements ConnectorSplitManager
{
/** Max age of the shard cache (currently 24 hours). */
public static final long MAX_CACHE_AGE_MILLIS = 24 * 3600 * 1000;
private final String connectorId;
private final KinesisHandleResolver handleResolver;
private final KinesisClientProvider clientManager;
private Map streamMap = Collections.synchronizedMap(new HashMap());
/**
* Cache the result of a Kinesis describe stream call so we don't need to retrieve
* the shards for every single query.
*/
public static class InternalStreamDescription
{
private String streamName = "";
private List shards = new ArrayList<>();
private long createTimeStamp;
public InternalStreamDescription(String aName)
{
this.streamName = aName;
this.createTimeStamp = System.currentTimeMillis();
}
public long getCreateTimeStamp()
{
return this.createTimeStamp;
}
public String getStreamName()
{
return streamName;
}
public List getShards()
{
return shards;
}
public void addShard(Shard s)
{
this.shards.add(s);
}
public void addAllShards(List shards)
{
this.shards.addAll(shards);
}
}
@Inject
public KinesisSplitManager(@Named("connectorId") String connectorId,
KinesisHandleResolver handleResolver,
KinesisClientProvider clientManager)
{
this.connectorId = connectorId;
this.handleResolver = handleResolver;
this.clientManager = clientManager;
}
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layout)
{
KinesisTableLayoutHandle kinesislayout = handleResolver.convertLayout(layout);
KinesisTableHandle kinesisTableHandle = kinesislayout.getTable();
InternalStreamDescription desc = this.getStreamDescription(kinesisTableHandle.getStreamName());
ImmutableList.Builder builder = ImmutableList.builder();
for (Shard shard : desc.getShards()) {
KinesisSplit split = new KinesisSplit(connectorId,
kinesisTableHandle.getStreamName(),
kinesisTableHandle.getMessageDataFormat(),
shard.getShardId(),
shard.getSequenceNumberRange().getStartingSequenceNumber(),
shard.getSequenceNumberRange().getEndingSequenceNumber());
builder.add(split);
}
return new FixedSplitSource(connectorId, builder.build());
}
/**
* Internal method to retrieve the stream description and get the shards from AWS.
*
* Gets from the internal cache unless not yet created or too old.
*
* @param streamName
* @return
*/
protected InternalStreamDescription getStreamDescription(String streamName)
{
InternalStreamDescription desc = this.streamMap.get(streamName);
if (desc == null || System.currentTimeMillis() - desc.getCreateTimeStamp() >= MAX_CACHE_AGE_MILLIS) {
desc = new InternalStreamDescription(streamName);
DescribeStreamRequest describeStreamRequest = clientManager.getDescribeStreamRequest();
describeStreamRequest.setStreamName(streamName);
// Collect shards from Kinesis
String exclusiveStartShardId = null;
List shards = new ArrayList<>();
do {
describeStreamRequest.setExclusiveStartShardId(exclusiveStartShardId);
DescribeStreamResult describeStreamResult = clientManager.getClient().describeStream(describeStreamRequest);
String streamStatus = describeStreamResult.getStreamDescription().getStreamStatus();
if (!streamStatus.equals("ACTIVE") && !streamStatus.equals("UPDATING")) {
throw new ResourceNotFoundException("Stream not Active");
}
desc.addAllShards(describeStreamResult.getStreamDescription().getShards());
if (describeStreamResult.getStreamDescription().getHasMoreShards() && (shards.size() > 0)) {
exclusiveStartShardId = shards.get(shards.size() - 1).getShardId();
}
else {
exclusiveStartShardId = null;
}
} while (exclusiveStartShardId != null);
this.streamMap.put(streamName, desc);
}
return desc;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy