Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
cz.o2.proxima.direct.kafka.KafkaLogReader Maven / Gradle / Ivy
/**
* Copyright 2017-2021 O2 Czech Republic, a.s.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cz.o2.proxima.direct.kafka;
import static java.util.stream.Collectors.toMap;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import cz.o2.proxima.direct.commitlog.CommitLogReader;
import cz.o2.proxima.direct.commitlog.LogObserver;
import cz.o2.proxima.direct.commitlog.ObserveHandle;
import cz.o2.proxima.direct.commitlog.Offset;
import cz.o2.proxima.direct.core.Context;
import cz.o2.proxima.direct.kafka.ElementConsumers.BulkConsumer;
import cz.o2.proxima.direct.kafka.ElementConsumers.OnlineConsumer;
import cz.o2.proxima.direct.time.MinimalPartitionWatermarkEstimator;
import cz.o2.proxima.functional.BiConsumer;
import cz.o2.proxima.storage.AbstractStorage;
import cz.o2.proxima.storage.Partition;
import cz.o2.proxima.storage.StreamElement;
import cz.o2.proxima.storage.commitlog.Position;
import cz.o2.proxima.time.PartitionedWatermarkEstimator;
import cz.o2.proxima.time.WatermarkEstimator;
import cz.o2.proxima.time.WatermarkEstimatorFactory;
import cz.o2.proxima.time.WatermarkIdlePolicyFactory;
import cz.o2.proxima.time.Watermarks;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
/** A {@link CommitLogReader} implementation for Kafka. */
@Slf4j
public class KafkaLogReader extends AbstractStorage implements CommitLogReader {
@Getter final KafkaAccessor accessor;
@Getter private final Context context;
private final long consumerPollInterval;
private final long maxBytesPerSec;
private final Map cfg;
KafkaLogReader(KafkaAccessor accessor, Context context) {
super(accessor.getEntityDescriptor(), accessor.getUri());
this.accessor = accessor;
this.context = context;
this.consumerPollInterval = accessor.getConsumerPollInterval();
this.maxBytesPerSec = accessor.getMaxBytesPerSec();
this.cfg = accessor.getCfg();
log.debug("Created {} for accessor {}", getClass().getSimpleName(), accessor);
}
/**
* Subscribe observer by name to the commit log. Each observer maintains its own position in the
* commit log, so that the observers with different names do not interfere If multiple observers
* share the same name, then the ingests are load-balanced between them (in an undefined manner).
* This is a non blocking call.
*
* @param name identifier of the consumer
*/
@Override
public ObserveHandle observe(String name, Position position, LogObserver observer) {
return observeKafka(name, null, position, false, observer);
}
@Override
public ObserveHandle observePartitions(
String name,
@Nullable Collection partitions,
Position position,
boolean stopAtCurrent,
LogObserver observer) {
return observeKafka(null, partitions, position, stopAtCurrent, observer);
}
@Override
public ObserveHandle observeBulk(
String name, Position position, boolean stopAtCurrent, LogObserver observer) {
return observeKafkaBulk(name, null, position, stopAtCurrent, observer);
}
@Override
public ObserveHandle observeBulkPartitions(
String name,
Collection partitions,
Position position,
boolean stopAtCurrent,
LogObserver observer) {
// name is ignored, because when observing partition the offsets
// are not committed to kafka
return observeKafkaBulk(
null, createDefaultOffsets(partitions), position, stopAtCurrent, observer);
}
@Override
public ObserveHandle observeBulkOffsets(
Collection offsets, boolean stopAtCurrent, LogObserver observer) {
return observeKafkaBulk(null, offsets, Position.CURRENT, stopAtCurrent, observer);
}
@Override
public List getPartitions() {
if (accessor.isTopicRegex()) {
throw new UnsupportedOperationException(
String.format("Partitions of URI %s are unstable and should not be used.", getUri()));
}
try (KafkaConsumer consumer = createConsumer()) {
return consumer
.partitionsFor(accessor.getTopic())
.stream()
.map(pi -> new PartitionWithTopic(pi.topic(), pi.partition()))
.collect(Collectors.toList());
}
}
@VisibleForTesting
ObserveHandle observeKafka(
@Nullable String name,
@Nullable Collection partitions,
Position position,
boolean stopAtCurrent,
LogObserver observer) {
Preconditions.checkArgument(
name != null || partitions != null, "Either name or offsets have to be non null");
Preconditions.checkArgument(
!accessor.isTopicRegex() || partitions == null,
"Regex URI %s cannot observe specific partitions, because these cannot be made stable.",
getUri());
try {
return processConsumer(
name,
createDefaultOffsets(partitions),
position,
stopAtCurrent,
name != null,
observer,
context.getExecutorService());
} catch (InterruptedException ex) {
log.warn("Interrupted waiting for kafka observer to start", ex);
Thread.currentThread().interrupt();
throw new RuntimeException(ex);
}
}
private ObserveHandle observeKafkaBulk(
@Nullable String name,
@Nullable Collection offsets,
Position position,
boolean stopAtCurrent,
LogObserver observer) {
Preconditions.checkArgument(
name != null || offsets != null, "Either name or offsets have to be non null");
Preconditions.checkArgument(position != null, "Position cannot be null");
Preconditions.checkArgument(
!accessor.isTopicRegex() || offsets == null,
"Regex URI %s cannot observe specific offsets, because these cannot be made stable.",
getUri());
try {
return processConsumerBulk(
name,
offsets,
position,
stopAtCurrent,
name != null,
observer,
context.getExecutorService());
} catch (InterruptedException ex) {
log.warn("Interrupted waiting for kafka observer to start", ex);
Thread.currentThread().interrupt();
throw new RuntimeException(ex);
}
}
/**
* Process given consumer in online fashion.
*
* @param name name of the consumer
* @param offsets assigned offsets
* @param position where to read from
* @param stopAtCurrent termination flag
* @param commitToKafka should we commit to kafka
* @param observer the observer
* @param executor executor to use for async processing
* @return observe handle
*/
@VisibleForTesting
ObserveHandle processConsumer(
@Nullable String name,
@Nullable Collection offsets,
Position position,
boolean stopAtCurrent,
boolean commitToKafka,
LogObserver observer,
ExecutorService executor)
throws InterruptedException {
// offsets that should be committed to kafka
Map kafkaCommitMap;
kafkaCommitMap = Collections.synchronizedMap(new HashMap<>());
final OffsetCommitter offsetCommitter = createOffsetCommitter();
final BiConsumer> preWrite =
(tp, r) -> {
final long offset = r.offset();
offsetCommitter.register(
tp,
offset,
1,
() -> {
OffsetAndMetadata mtd = new OffsetAndMetadata(offset + 1);
if (commitToKafka) {
kafkaCommitMap.put(tp, mtd);
}
});
};
OnlineConsumer onlineConsumer =
new OnlineConsumer<>(
observer,
offsetCommitter,
() -> {
synchronized (kafkaCommitMap) {
Map clone = new HashMap<>(kafkaCommitMap);
kafkaCommitMap.clear();
return clone;
}
});
AtomicReference handle = new AtomicReference<>();
submitConsumerWithObserver(
name, offsets, position, stopAtCurrent, preWrite, onlineConsumer, executor, handle);
return dynamicHandle(handle);
}
/**
* Process given consumer in bulk fashion.
*
* @param name name of the consumer
* @param offsets assigned offsets
* @param position where to read from
* @param stopAtCurrent termination flag
* @param commitToKafka should we commit to kafka
* @param observer the observer
* @param executor executor to use for async processing
* @return observe handle
*/
@VisibleForTesting
ObserveHandle processConsumerBulk(
@Nullable String name,
@Nullable Collection offsets,
Position position,
boolean stopAtCurrent,
boolean commitToKafka,
LogObserver observer,
ExecutorService executor)
throws InterruptedException {
// offsets that should be committed to kafka
Map kafkaCommitMap;
kafkaCommitMap = Collections.synchronizedMap(new HashMap<>());
final BulkConsumer bulkConsumer =
new BulkConsumer<>(
observer,
(tp, o) -> {
if (commitToKafka) {
OffsetAndMetadata off = new OffsetAndMetadata(o);
kafkaCommitMap.put(tp, off);
}
},
() -> {
synchronized (kafkaCommitMap) {
Map clone = new HashMap<>(kafkaCommitMap);
kafkaCommitMap.clear();
return clone;
}
},
kafkaCommitMap::clear);
AtomicReference handle = new AtomicReference<>();
submitConsumerWithObserver(
name, offsets, position, stopAtCurrent, (tp, r) -> {}, bulkConsumer, executor, handle);
return dynamicHandle(handle);
}
private void submitConsumerWithObserver(
final @Nullable String name,
final @Nullable Collection offsets,
final Position position,
boolean stopAtCurrent,
final BiConsumer> preWrite,
final ElementConsumer consumer,
final ExecutorService executor,
final AtomicReference handle)
throws InterruptedException {
final CountDownLatch latch = new CountDownLatch(1);
AtomicBoolean completed = new AtomicBoolean();
AtomicBoolean shutdown = new AtomicBoolean();
List seekOffsets = Collections.synchronizedList(new ArrayList<>());
Preconditions.checkArgument(
!accessor.isTopicRegex() || !stopAtCurrent, "Cannot use stopAtCurrent with regex URI");
executor.submit(
() -> {
handle.set(createObserveHandle(shutdown, seekOffsets, consumer, latch));
final AtomicReference> consumerRef;
final AtomicReference watermarkEstimator =
new AtomicReference<>(null);
final Map emptyPollCount = new ConcurrentHashMap<>();
final Map topicPartitionToId = new HashMap<>();
final Duration pollDuration = Duration.ofMillis(consumerPollInterval);
consumerRef = new AtomicReference<>();
consumer.onStart();
ConsumerRebalanceListener listener =
listener(
name,
consumerRef,
consumer,
emptyPollCount,
topicPartitionToId,
watermarkEstimator);
final ElementSerializer serializer = accessor.getSerializer();
try (KafkaConsumer kafka =
createConsumer(name, offsets, name != null ? listener : null, position)) {
consumerRef.set(kafka);
// we need to poll first to initialize kafka assignments and rebalance listener
ConsumerRecords poll;
Map endOffsets;
do {
poll = kafka.poll(pollDuration);
endOffsets = stopAtCurrent ? findNonEmptyEndOffsets(kafka) : null;
if (log.isDebugEnabled()) {
log.debug(
"End offsets of current assignment {}: {}", kafka.assignment(), endOffsets);
}
listener.onPartitionsAssigned(kafka.assignment());
} while (poll.isEmpty()
&& accessor.isTopicRegex()
&& kafka.assignment().isEmpty()
&& !shutdown.get()
&& !Thread.currentThread().isInterrupted());
latch.countDown();
AtomicReference error = new AtomicReference<>();
do {
if (poll.isEmpty()) {
Optional.ofNullable(watermarkEstimator.get()).ifPresent(consumer::onIdle);
}
logConsumerWatermark(name, offsets, watermarkEstimator, poll.count());
poll =
seekToNewOffsetsIfNeeded(seekOffsets, consumer, watermarkEstimator, kafka, poll);
final long bytesPerPoll =
maxBytesPerSec < Long.MAX_VALUE
? Math.max(1L, maxBytesPerSec / (1000L * consumerPollInterval))
: Long.MAX_VALUE;
long bytesPolled = 0L;
// increase all partition's empty poll counter by 1
emptyPollCount.replaceAll((k, v) -> v + 1);
for (ConsumerRecord r : poll) {
bytesPolled += r.serializedKeySize() + r.serializedValueSize();
TopicPartition tp = new TopicPartition(r.topic(), r.partition());
emptyPollCount.put(tp, 0);
preWrite.accept(tp, r);
StreamElement ingest = serializer.read(r, getEntityDescriptor());
if (ingest != null) {
watermarkEstimator.get().update(tp.partition(), ingest);
}
boolean cont =
consumer.consumeWithConfirm(
ingest, tp, r.offset(), watermarkEstimator.get(), error::set);
if (!cont) {
log.info("Terminating consumption by request");
completed.set(true);
break;
}
if (stopAtCurrent) {
Long end = endOffsets.get(tp);
if (end != null && end - 1 <= r.offset()) {
log.debug("Reached end of partition {} at offset {}", tp, r.offset());
endOffsets.remove(tp);
}
}
}
increaseWatermarkOnEmptyPolls(emptyPollCount, topicPartitionToId, watermarkEstimator);
flushCommits(kafka, consumer);
rethrowErrorIfPresent(name, error);
terminateIfConsumed(stopAtCurrent, kafka, endOffsets, completed);
waitToReduceThroughput(bytesPolled, bytesPerPoll);
poll = kafka.poll(pollDuration);
} while (!shutdown.get()
&& !completed.get()
&& !Thread.currentThread().isInterrupted());
if (log.isDebugEnabled()) {
log.debug(
"Terminating poll loop for assignment {}: shutdown: {}, completed: {}, interrupted: {}",
kafka.assignment(),
shutdown.get(),
completed.get(),
Thread.currentThread().isInterrupted());
}
if (!Thread.currentThread().isInterrupted()) {
consumer.onCompleted();
} else {
consumer.onCancelled();
}
} catch (InterruptedException ex) {
log.info("Interrupted while polling kafka. Terminating consumption.", ex);
Thread.currentThread().interrupt();
consumer.onCancelled();
} catch (Throwable err) {
log.error("Error processing consumer {}", name, err);
if (consumer.onError(err)) {
try {
submitConsumerWithObserver(
name, offsets, position, stopAtCurrent, preWrite, consumer, executor, handle);
} catch (InterruptedException ex) {
log.warn("Interrupted while restarting observer");
Thread.currentThread().interrupt();
throw new RuntimeException(ex);
}
}
}
});
latch.await();
}
private ConsumerRecords seekToNewOffsetsIfNeeded(
final List seekOffsets,
final ElementConsumer consumer,
final AtomicReference watermarkEstimator,
final KafkaConsumer kafka,
final ConsumerRecords poll) {
synchronized (seekOffsets) {
if (!seekOffsets.isEmpty()) {
@SuppressWarnings({"unchecked", "rawtypes"})
List toSeek = (List) seekOffsets;
Utils.seekToOffsets(toSeek, kafka);
consumer.onAssign(
kafka,
kafka
.assignment()
.stream()
.map(
tp ->
new TopicOffset(
new PartitionWithTopic(tp.topic(), tp.partition()),
kafka.position(tp),
watermarkEstimator.get().getWatermark()))
.collect(Collectors.toList()));
log.info("Seeked consumer to offsets {} as requested", seekOffsets);
seekOffsets.clear();
return ConsumerRecords.empty();
}
}
return poll;
}
private void logConsumerWatermark(
@Nullable String name,
@Nullable Collection offsets,
AtomicReference watermarkEstimator,
int polledCount) {
if (log.isDebugEnabled()) {
log.debug(
"Current watermark of consumer name {} with offsets {} " + "on {} poll'd records is {}",
name,
offsets,
polledCount,
Optional.ofNullable(watermarkEstimator.get())
.map(PartitionedWatermarkEstimator::getWatermark)
.orElse(Watermarks.MIN_WATERMARK));
}
}
private void rethrowErrorIfPresent(
@Nullable String consumerName, AtomicReference error) {
Throwable errorThrown = error.getAndSet(null);
if (errorThrown != null) {
log.warn("Error during processing {}", consumerName, errorThrown);
throw new RuntimeException(errorThrown);
}
}
private void terminateIfConsumed(
boolean stopAtCurrent,
KafkaConsumer, ?> consumer,
Map endOffsets,
AtomicBoolean completed) {
if (stopAtCurrent && endOffsets.isEmpty()) {
log.info(
"Assignment {} reached end of current data. Terminating consumption.",
consumer.assignment());
completed.set(true);
}
}
private void waitToReduceThroughput(long bytesPolled, final long bytesPerPoll)
throws InterruptedException {
long sleepDuration = bytesPolled * consumerPollInterval / bytesPerPoll;
if (sleepDuration > 0) {
TimeUnit.MILLISECONDS.sleep(sleepDuration);
}
}
private void flushCommits(
final KafkaConsumer kafka, ElementConsumer, ?> consumer) {
Map commitMapClone;
commitMapClone = consumer.prepareOffsetsForCommit();
if (!commitMapClone.isEmpty()) {
kafka.commitSync(commitMapClone);
}
}
private void increaseWatermarkOnEmptyPolls(
Map emptyPollCount,
Map topicPartitionToId,
AtomicReference watermarkEstimator) {
// we have to poll at least number of assigned partitions-times and still have empty poll
// on that partition to be sure that it is actually empty
int numEmptyPolls = emptyPollCount.size();
emptyPollCount
.entrySet()
.stream()
.filter(e -> e.getValue() >= numEmptyPolls)
.forEach(e -> watermarkEstimator.get().idle(topicPartitionToId.get(e.getKey())));
}
private ObserveHandle createObserveHandle(
AtomicBoolean shutdown,
List seekOffsets,
ElementConsumer, ?> consumer,
CountDownLatch latch) {
return new ObserveHandle() {
@Override
public void close() {
shutdown.set(true);
}
@SuppressWarnings("unchecked")
@Override
public List getCommittedOffsets() {
return (List) consumer.getCommittedOffsets();
}
@SuppressWarnings("unchecked")
@Override
public void resetOffsets(List offsets) {
seekOffsets.addAll((Collection) offsets);
}
@SuppressWarnings("unchecked")
@Override
public List getCurrentOffsets() {
return (List) consumer.getCurrentOffsets();
}
@Override
public void waitUntilReady() throws InterruptedException {
latch.await();
}
};
}
private Map findNonEmptyEndOffsets(
final KafkaConsumer kafka) {
Set assignment = kafka.assignment();
Map beginning = kafka.beginningOffsets(assignment);
return kafka
.endOffsets(assignment)
.entrySet()
.stream()
.filter(entry -> beginning.get(entry.getKey()) < entry.getValue())
.collect(toMap(Map.Entry::getKey, Map.Entry::getValue));
}
private KafkaConsumer createConsumer() {
return createConsumer(UUID.randomUUID().toString(), null, null, Position.NEWEST);
}
/** Create kafka consumer for the data. */
@VisibleForTesting
KafkaConsumer createConsumer(
@Nullable String name,
@Nullable Collection offsets,
@Nullable ConsumerRebalanceListener listener,
Position position) {
Preconditions.checkArgument(
name != null || listener == null,
"Please use either named group (with listener) or offsets without listener");
KafkaConsumerFactory factory = accessor.createConsumerFactory();
final KafkaConsumer consumer;
if ("".equals(name)) {
throw new IllegalArgumentException("Consumer group cannot be empty string");
}
if (name != null) {
consumer = factory.create(name, listener);
} else if (offsets != null) {
List partitions =
offsets.stream().map(Offset::getPartition).collect(Collectors.toList());
consumer = factory.create(partitions);
} else {
throw new IllegalArgumentException("Need either name or offsets to observe");
}
if (!accessor.isTopicRegex()) {
validateTopic(consumer, accessor.getTopic());
}
if (position == Position.OLDEST) {
// seek all partitions to oldest data
if (offsets == null) {
boolean emptyPoll = true;
if (consumer.assignment().isEmpty()) {
// If we don't find assignment within timeout, poll results in IllegalStateException.
// https://cwiki.apache.org/confluence/display/KAFKA/KIP-266%3A+Fix+consumer+indefinite+blocking+behavior
emptyPoll =
consumer.poll(Duration.ofMillis(accessor.getAssignmentTimeoutMillis())).isEmpty();
}
final Set assignment = consumer.assignment();
final Map committed = consumer.committed(assignment);
if (committed.values().stream().allMatch(Objects::isNull)) {
log.info("Seeking consumer name {} to beginning of partitions {}", name, assignment);
consumer.seekToBeginning(assignment);
} else {
if (!emptyPoll) {
log.info("Seeking consumer name {} to committed offsets {}", name, committed);
committed.forEach(consumer::seek);
}
}
} else {
List tps =
offsets
.stream()
.map(TopicOffset.class::cast)
.map(p -> new TopicPartition(p.getPartition().getTopic(), p.getPartition().getId()))
.collect(Collectors.toList());
log.info("Seeking given partitions {} to the beginning", tps);
consumer.seekToBeginning(tps);
}
} else if (position == Position.CURRENT) {
Preconditions.checkArgument(
offsets != null, "Please use %s only with specified offsets", position);
log.info("Seeking to given offsets {}", offsets);
Utils.seekToOffsets(offsets, consumer);
} else {
log.info("Starting to process kafka partitions from newest data");
}
return consumer;
}
@VisibleForTesting
void validateTopic(KafkaConsumer, ?> consumer, String topicToValidate) {
List partitions = consumer.partitionsFor(topicToValidate);
Preconditions.checkArgument(
partitions != null && !partitions.isEmpty(),
"Received null or empty partitions for topic [%s]. "
+ "Please check that the topic exists and has at least one partition.",
topicToValidate);
}
@Override
public boolean hasExternalizableOffsets() {
return true;
}
@Override
public Factory asFactory() {
final KafkaAccessor accessor = this.accessor;
final Context context = this.context;
return repo -> new KafkaLogReader(accessor, context);
}
private static Collection createDefaultOffsets(Collection partitions) {
if (partitions != null) {
return partitions
.stream()
.map(p -> new TopicOffset((PartitionWithTopic) p, -1, Long.MIN_VALUE))
.collect(Collectors.toList());
}
return null;
}
private static ObserveHandle dynamicHandle(AtomicReference proxy) {
return new ObserveHandle() {
@Override
public void close() {
proxy.get().close();
}
@Override
public List getCommittedOffsets() {
return proxy.get().getCommittedOffsets();
}
@Override
public void resetOffsets(List offsets) {
proxy.get().resetOffsets(offsets);
}
@Override
public List getCurrentOffsets() {
return proxy.get().getCurrentOffsets();
}
@Override
public void waitUntilReady() throws InterruptedException {
proxy.get().waitUntilReady();
}
};
}
private OffsetCommitter createOffsetCommitter() {
return new OffsetCommitter<>(
accessor.getLogStaleCommitIntervalNs(), accessor.getAutoCommitIntervalNs());
}
// create rebalance listener from consumer
private ConsumerRebalanceListener listener(
String name,
AtomicReference> kafka,
ElementConsumer consumer,
Map emptyPollCount,
Map topicPartitionToId,
AtomicReference watermarkEstimator) {
return new ConsumerRebalanceListener() {
@Override
public void onPartitionsRevoked(Collection parts) {
// nop
}
@Override
public void onPartitionsAssigned(Collection parts) {
log.debug("Consumer {} has assigned partitions {}", name, parts);
emptyPollCount.clear();
topicPartitionToId.clear();
AtomicInteger id = new AtomicInteger();
parts.forEach(
p -> {
topicPartitionToId.put(p, id.getAndIncrement());
emptyPollCount.put(p, 0);
});
if (parts.isEmpty()) {
watermarkEstimator.set(createWatermarkEstimatorForEmptyParts());
} else {
watermarkEstimator.set(
new MinimalPartitionWatermarkEstimator(
parts
.stream()
.collect(
toMap(topicPartitionToId::get, item -> createWatermarkEstimator()))));
}
Optional.ofNullable(kafka.get())
.ifPresent(
c ->
consumer.onAssign(
c,
name != null
? getCommittedTopicOffsets(parts, c)
: getCurrentTopicOffsets(parts, c)));
}
List getCurrentTopicOffsets(
Collection parts, KafkaConsumer c) {
return parts
.stream()
.map(
tp ->
new TopicOffset(
new PartitionWithTopic(tp.topic(), tp.partition()),
c.position(tp),
watermarkEstimator.get().getWatermark()))
.collect(Collectors.toList());
}
List getCommittedTopicOffsets(
Collection parts, KafkaConsumer c) {
Map committed =
new HashMap<>(c.committed(new HashSet<>(parts)));
for (TopicPartition tp : parts) {
committed.putIfAbsent(tp, null);
}
return committed
.entrySet()
.stream()
.map(
entry -> {
final long offset = entry.getValue() == null ? 0L : entry.getValue().offset();
return new TopicOffset(
new PartitionWithTopic(entry.getKey().topic(), entry.getKey().partition()),
offset,
watermarkEstimator.get().getWatermark());
})
.collect(Collectors.toList());
}
private WatermarkEstimator createWatermarkEstimator() {
final WatermarkIdlePolicyFactory idlePolicyFactory =
accessor.getWatermarkConfiguration().getWatermarkIdlePolicyFactory();
final WatermarkEstimatorFactory estimatorFactory =
accessor.getWatermarkConfiguration().getWatermarkEstimatorFactory();
return estimatorFactory.create(cfg, idlePolicyFactory);
}
};
}
private static PartitionedWatermarkEstimator createWatermarkEstimatorForEmptyParts() {
return () -> Watermarks.MAX_WATERMARK;
}
}