io.prestosql.plugin.kafka.KafkaRecordSet Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.prestosql.plugin.kafka;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Multimap;
import io.airlift.slice.Slice;
import io.prestosql.decoder.DecoderColumnHandle;
import io.prestosql.decoder.FieldValueProvider;
import io.prestosql.decoder.RowDecoder;
import io.prestosql.spi.block.Block;
import io.prestosql.spi.block.BlockBuilder;
import io.prestosql.spi.connector.ColumnHandle;
import io.prestosql.spi.connector.RecordCursor;
import io.prestosql.spi.connector.RecordSet;
import io.prestosql.spi.type.MapType;
import io.prestosql.spi.type.Type;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.header.Header;
import org.apache.kafka.common.header.Headers;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import static com.google.common.base.Preconditions.checkArgument;
import static io.prestosql.decoder.FieldValueProviders.booleanValueProvider;
import static io.prestosql.decoder.FieldValueProviders.bytesValueProvider;
import static io.prestosql.decoder.FieldValueProviders.longValueProvider;
import static io.prestosql.plugin.kafka.KafkaInternalFieldManager.HEADERS_FIELD;
import static io.prestosql.plugin.kafka.KafkaInternalFieldManager.KEY_CORRUPT_FIELD;
import static io.prestosql.plugin.kafka.KafkaInternalFieldManager.KEY_FIELD;
import static io.prestosql.plugin.kafka.KafkaInternalFieldManager.KEY_LENGTH_FIELD;
import static io.prestosql.plugin.kafka.KafkaInternalFieldManager.MESSAGE_CORRUPT_FIELD;
import static io.prestosql.plugin.kafka.KafkaInternalFieldManager.MESSAGE_FIELD;
import static io.prestosql.plugin.kafka.KafkaInternalFieldManager.MESSAGE_LENGTH_FIELD;
import static io.prestosql.plugin.kafka.KafkaInternalFieldManager.OFFSET_TIMESTAMP_FIELD;
import static io.prestosql.plugin.kafka.KafkaInternalFieldManager.PARTITION_ID_FIELD;
import static io.prestosql.plugin.kafka.KafkaInternalFieldManager.PARTITION_OFFSET_FIELD;
import static io.prestosql.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND;
import static io.prestosql.spi.type.TypeUtils.writeNativeValue;
import static java.lang.Math.max;
import static java.util.Collections.emptyIterator;
import static java.util.Objects.requireNonNull;
public class KafkaRecordSet
implements RecordSet
{
private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
private static final int CONSUMER_POLL_TIMEOUT = 100;
private final KafkaSplit split;
private final KafkaConsumerFactory consumerFactory;
private final RowDecoder keyDecoder;
private final RowDecoder messageDecoder;
private final List columnHandles;
private final List columnTypes;
KafkaRecordSet(
KafkaSplit split,
KafkaConsumerFactory consumerFactory,
List columnHandles,
RowDecoder keyDecoder,
RowDecoder messageDecoder)
{
this.split = requireNonNull(split, "split is null");
this.consumerFactory = requireNonNull(consumerFactory, "consumerManager is null");
this.keyDecoder = requireNonNull(keyDecoder, "rowDecoder is null");
this.messageDecoder = requireNonNull(messageDecoder, "rowDecoder is null");
this.columnHandles = requireNonNull(columnHandles, "columnHandles is null");
ImmutableList.Builder typeBuilder = ImmutableList.builder();
for (DecoderColumnHandle handle : columnHandles) {
typeBuilder.add(handle.getType());
}
this.columnTypes = typeBuilder.build();
}
@Override
public List getColumnTypes()
{
return columnTypes;
}
@Override
public RecordCursor cursor()
{
return new KafkaRecordCursor();
}
private class KafkaRecordCursor
implements RecordCursor
{
private final TopicPartition topicPartition;
private final KafkaConsumer kafkaConsumer;
private Iterator> records = emptyIterator();
private long completedBytes;
private final FieldValueProvider[] currentRowValues = new FieldValueProvider[columnHandles.size()];
private KafkaRecordCursor()
{
topicPartition = new TopicPartition(split.getTopicName(), split.getPartitionId());
kafkaConsumer = consumerFactory.create();
kafkaConsumer.assign(ImmutableList.of(topicPartition));
kafkaConsumer.seek(topicPartition, split.getMessagesRange().getBegin());
}
@Override
public long getCompletedBytes()
{
return completedBytes;
}
@Override
public long getReadTimeNanos()
{
return 0;
}
@Override
public Type getType(int field)
{
checkArgument(field < columnHandles.size(), "Invalid field index");
return columnHandles.get(field).getType();
}
@Override
public boolean advanceNextPosition()
{
if (!records.hasNext()) {
if (kafkaConsumer.position(topicPartition) >= split.getMessagesRange().getEnd()) {
return false;
}
records = kafkaConsumer.poll(CONSUMER_POLL_TIMEOUT).iterator();
return advanceNextPosition();
}
return nextRow(records.next());
}
private boolean nextRow(ConsumerRecord message)
{
requireNonNull(message, "message is null");
if (message.offset() >= split.getMessagesRange().getEnd()) {
return false;
}
completedBytes += max(message.serializedKeySize(), 0) + max(message.serializedValueSize(), 0);
byte[] keyData = EMPTY_BYTE_ARRAY;
if (message.key() != null) {
keyData = message.key();
}
byte[] messageData = EMPTY_BYTE_ARRAY;
if (message.value() != null) {
messageData = message.value();
}
long timeStamp = message.timestamp();
Map currentRowValuesMap = new HashMap<>();
Optional
© 2015 - 2025 Weber Informatics LLC | Privacy Policy