Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.indexing.kafka;
import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import io.druid.java.util.emitter.EmittingLogger;
import io.druid.data.input.Committer;
import io.druid.data.input.InputRow;
import io.druid.data.input.impl.InputRowParser;
import io.druid.discovery.DiscoveryDruidNode;
import io.druid.discovery.DruidNodeDiscoveryProvider;
import io.druid.discovery.LookupNodeService;
import io.druid.indexing.appenderator.ActionBasedSegmentAllocator;
import io.druid.indexing.appenderator.ActionBasedUsedSegmentChecker;
import io.druid.indexing.common.TaskStatus;
import io.druid.indexing.common.TaskToolbox;
import io.druid.indexing.common.actions.CheckPointDataSourceMetadataAction;
import io.druid.indexing.common.actions.ResetDataSourceMetadataAction;
import io.druid.indexing.common.actions.SegmentTransactionalInsertAction;
import io.druid.indexing.common.actions.TaskActionClient;
import io.druid.indexing.common.task.AbstractTask;
import io.druid.indexing.common.task.RealtimeIndexTask;
import io.druid.indexing.common.task.TaskResource;
import io.druid.indexing.common.task.Tasks;
import io.druid.indexing.kafka.supervisor.KafkaSupervisor;
import io.druid.java.util.common.DateTimes;
import io.druid.java.util.common.ISE;
import io.druid.java.util.common.Intervals;
import io.druid.java.util.common.StringUtils;
import io.druid.java.util.common.collect.Utils;
import io.druid.java.util.common.concurrent.Execs;
import io.druid.java.util.common.guava.Sequence;
import io.druid.java.util.common.parsers.ParseException;
import io.druid.query.DruidMetrics;
import io.druid.query.NoopQueryRunner;
import io.druid.query.Query;
import io.druid.query.QueryPlus;
import io.druid.query.QueryRunner;
import io.druid.segment.indexing.DataSchema;
import io.druid.segment.indexing.RealtimeIOConfig;
import io.druid.segment.realtime.FireDepartment;
import io.druid.segment.realtime.FireDepartmentMetrics;
import io.druid.segment.realtime.RealtimeMetricsMonitor;
import io.druid.segment.realtime.appenderator.Appenderator;
import io.druid.segment.realtime.appenderator.AppenderatorDriverAddResult;
import io.druid.segment.realtime.appenderator.Appenderators;
import io.druid.segment.realtime.appenderator.StreamAppenderatorDriver;
import io.druid.segment.realtime.appenderator.SegmentIdentifier;
import io.druid.segment.realtime.appenderator.SegmentsAndMetadata;
import io.druid.segment.realtime.appenderator.TransactionalSegmentPublisher;
import io.druid.segment.realtime.firehose.ChatHandler;
import io.druid.segment.realtime.firehose.ChatHandlerProvider;
import io.druid.server.security.Access;
import io.druid.server.security.Action;
import io.druid.server.security.AuthorizationUtils;
import io.druid.server.security.AuthorizerMapper;
import io.druid.server.security.ForbiddenException;
import io.druid.server.security.Resource;
import io.druid.server.security.ResourceAction;
import io.druid.server.security.ResourceType;
import io.druid.timeline.DataSegment;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.OffsetOutOfRangeException;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.ByteArrayDeserializer;
import org.joda.time.DateTime;
import javax.annotation.Nullable;
import javax.servlet.http.HttpServletRequest;
import javax.ws.rs.Consumes;
import javax.ws.rs.DefaultValue;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.Path;
import javax.ws.rs.Produces;
import javax.ws.rs.QueryParam;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;
public class KafkaIndexTask extends AbstractTask implements ChatHandler
{
public static final long PAUSE_FOREVER = -1L;
public enum Status
{
NOT_STARTED,
STARTING,
READING,
PAUSED,
PUBLISHING
// ideally this should be called FINISHING now as the task does incremental publishes
// through out its lifetime
}
private static final EmittingLogger log = new EmittingLogger(KafkaIndexTask.class);
private static final String TYPE = "index_kafka";
private static final Random RANDOM = new Random();
private static final long POLL_TIMEOUT = 100;
private static final long LOCK_ACQUIRE_TIMEOUT_SECONDS = 15;
private static final String METADATA_NEXT_PARTITIONS = "nextPartitions";
private static final String METADATA_PUBLISH_PARTITIONS = "publishPartitions";
private final DataSchema dataSchema;
private final InputRowParser parser;
private final KafkaTuningConfig tuningConfig;
private final KafkaIOConfig ioConfig;
private final AuthorizerMapper authorizerMapper;
private final Optional chatHandlerProvider;
private final Map endOffsets = new ConcurrentHashMap<>();
private final Map nextOffsets = new ConcurrentHashMap<>();
private final Map maxEndOffsets = new HashMap<>();
private final Map lastPersistedOffsets = new ConcurrentHashMap<>();
private TaskToolbox toolbox;
private volatile Appenderator appenderator = null;
private volatile StreamAppenderatorDriver driver = null;
private volatile FireDepartmentMetrics fireDepartmentMetrics = null;
private volatile DateTime startTime;
private volatile Status status = Status.NOT_STARTED; // this is only ever set by the task runner thread (runThread)
private volatile Thread runThread = null;
private volatile File sequencesPersistFile = null;
private final AtomicBoolean stopRequested = new AtomicBoolean(false);
private final AtomicBoolean publishOnStop = new AtomicBoolean(false);
// The pause lock and associated conditions are to support coordination between the Jetty threads and the main
// ingestion loop. The goal is to provide callers of the API a guarantee that if pause() returns successfully
// the ingestion loop has been stopped at the returned offsets and will not ingest any more data until resumed. The
// fields are used as follows (every step requires acquiring [pauseLock]):
// Pausing:
// - In pause(), [pauseRequested] is set to true and then execution waits for [status] to change to PAUSED, with the
// condition checked when [hasPaused] is signalled.
// - In possiblyPause() called from the main loop, if [pauseRequested] is true, [status] is set to PAUSED,
// [hasPaused] is signalled, and execution pauses until [pauseRequested] becomes false, either by being set or by
// the [pauseMillis] timeout elapsing. [pauseRequested] is checked when [shouldResume] is signalled.
// Resuming:
// - In resume(), [pauseRequested] is set to false, [shouldResume] is signalled, and execution waits for [status] to
// change to something other than PAUSED, with the condition checked when [shouldResume] is signalled.
// - In possiblyPause(), when [shouldResume] is signalled, if [pauseRequested] has become false the pause loop ends,
// [status] is changed to STARTING and [shouldResume] is signalled.
private final Lock pauseLock = new ReentrantLock();
private final Condition hasPaused = pauseLock.newCondition();
private final Condition shouldResume = pauseLock.newCondition();
// [pollRetryLock] and [isAwaitingRetry] is used when the Kafka consumer returns an OffsetOutOfRangeException and we
// pause polling from Kafka for POLL_RETRY_MS before trying again. This allows us to signal the sleeping thread and
// resume the main run loop in the case of a pause or stop request from a Jetty thread.
private final Lock pollRetryLock = new ReentrantLock();
private final Condition isAwaitingRetry = pollRetryLock.newCondition();
// [statusLock] is used to synchronize the Jetty thread calling stopGracefully() with the main run thread. It prevents
// the main run thread from switching into a publishing state while the stopGracefully() thread thinks it's still in
// a pre-publishing state. This is important because stopGracefully() will try to use the [stopRequested] flag to stop
// the main thread where possible, but this flag is not honored once publishing has begun so in this case we must
// interrupt the thread. The lock ensures that if the run thread is about to transition into publishing state, it
// blocks until after stopGracefully() has set [stopRequested] and then does a final check on [stopRequested] before
// transitioning to publishing state.
private final Object statusLock = new Object();
private volatile boolean pauseRequested = false;
private volatile long pauseMillis = 0;
// This value can be tuned in some tests
private long pollRetryMs = 30000;
private final Set publishingSequences = Sets.newConcurrentHashSet();
private final BlockingQueue publishQueue = new LinkedBlockingQueue<>();
private final List> handOffWaitList = new CopyOnWriteArrayList<>(); // to prevent concurrency visibility issue
private final CountDownLatch waitForPublishes = new CountDownLatch(1);
private final AtomicReference throwableAtomicReference = new AtomicReference<>();
private final String topic;
private volatile CopyOnWriteArrayList sequences;
private ListeningExecutorService publishExecService;
private final boolean useLegacy;
@JsonCreator
public KafkaIndexTask(
@JsonProperty("id") String id,
@JsonProperty("resource") TaskResource taskResource,
@JsonProperty("dataSchema") DataSchema dataSchema,
@JsonProperty("tuningConfig") KafkaTuningConfig tuningConfig,
@JsonProperty("ioConfig") KafkaIOConfig ioConfig,
@JsonProperty("context") Map context,
@JacksonInject ChatHandlerProvider chatHandlerProvider,
@JacksonInject AuthorizerMapper authorizerMapper
)
{
super(
id == null ? makeTaskId(dataSchema.getDataSource(), RANDOM.nextInt()) : id,
StringUtils.format("%s_%s", TYPE, dataSchema.getDataSource()),
taskResource,
dataSchema.getDataSource(),
context
);
this.dataSchema = Preconditions.checkNotNull(dataSchema, "dataSchema");
this.parser = Preconditions.checkNotNull((InputRowParser) dataSchema.getParser(), "parser");
this.tuningConfig = Preconditions.checkNotNull(tuningConfig, "tuningConfig");
this.ioConfig = Preconditions.checkNotNull(ioConfig, "ioConfig");
this.chatHandlerProvider = Optional.fromNullable(chatHandlerProvider);
this.authorizerMapper = authorizerMapper;
this.endOffsets.putAll(ioConfig.getEndPartitions().getPartitionOffsetMap());
this.maxEndOffsets.putAll(endOffsets.entrySet()
.stream()
.collect(Collectors.toMap(
Map.Entry::getKey,
integerLongEntry -> Long.MAX_VALUE
)));
this.topic = ioConfig.getStartPartitions().getTopic();
this.sequences = new CopyOnWriteArrayList<>();
if (context != null && context.get(KafkaSupervisor.IS_INCREMENTAL_HANDOFF_SUPPORTED) != null
&& ((boolean) context.get(KafkaSupervisor.IS_INCREMENTAL_HANDOFF_SUPPORTED))) {
useLegacy = false;
} else {
useLegacy = true;
}
}
@VisibleForTesting
void setPollRetryMs(long retryMs)
{
this.pollRetryMs = retryMs;
}
private static String makeTaskId(String dataSource, int randomBits)
{
final StringBuilder suffix = new StringBuilder(8);
for (int i = 0; i < Ints.BYTES * 2; ++i) {
suffix.append((char) ('a' + ((randomBits >>> (i * 4)) & 0x0F)));
}
return Joiner.on("_").join(TYPE, dataSource, suffix);
}
@Override
public int getPriority()
{
return getContextValue(Tasks.PRIORITY_KEY, Tasks.DEFAULT_REALTIME_TASK_PRIORITY);
}
@Override
public String getType()
{
return TYPE;
}
@Override
public boolean isReady(TaskActionClient taskActionClient) throws Exception
{
return true;
}
@JsonProperty
public DataSchema getDataSchema()
{
return dataSchema;
}
@JsonProperty
public KafkaTuningConfig getTuningConfig()
{
return tuningConfig;
}
@JsonProperty("ioConfig")
public KafkaIOConfig getIOConfig()
{
return ioConfig;
}
private void createAndStartPublishExecutor()
{
publishExecService = MoreExecutors.listeningDecorator(Execs.singleThreaded("publish-driver"));
publishExecService.submit(
(Runnable) () -> {
while (true) {
try {
final SequenceMetadata sequenceMetadata = publishQueue.take();
Preconditions.checkNotNull(driver);
if (sequenceMetadata.isSentinel()) {
waitForPublishes.countDown();
break;
}
log.info("Publishing segments for sequence [%s]", sequenceMetadata);
final SegmentsAndMetadata result = driver.publish(
sequenceMetadata.getPublisher(toolbox, ioConfig.isUseTransaction()),
sequenceMetadata.getCommitterSupplier(topic, lastPersistedOffsets).get(),
ImmutableList.of(sequenceMetadata.getSequenceName())
).get();
if (result == null) {
throw new ISE(
"Transaction failure publishing segments for sequence [%s]",
sequenceMetadata
);
} else {
log.info(
"Published segments[%s] with metadata[%s].",
Joiner.on(", ").join(
result.getSegments().stream().map(DataSegment::getIdentifier).collect(Collectors.toList())
),
Preconditions.checkNotNull(result.getCommitMetadata(), "commitMetadata")
);
}
sequences.remove(sequenceMetadata);
publishingSequences.remove(sequenceMetadata.getSequenceName());
try {
persistSequences();
}
catch (IOException e) {
log.error(e, "Unable to persist state, dying");
Throwables.propagate(e);
}
final ListenableFuture handOffFuture = driver.registerHandoff(result);
handOffWaitList.add(handOffFuture);
}
catch (Throwable t) {
if ((t instanceof InterruptedException || (t instanceof RejectedExecutionException
&& t.getCause() instanceof InterruptedException))) {
log.warn("Stopping publish thread as we are interrupted, probably we are shutting down");
} else {
log.makeAlert(t, "Error in publish thread, dying").emit();
throwableAtomicReference.set(t);
}
Futures.allAsList(handOffWaitList).cancel(true);
waitForPublishes.countDown();
break;
}
}
}
);
}
@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception
{
// for backwards compatibility, should be remove from versions greater than 0.12.x
if (useLegacy) {
return runLegacy(toolbox);
}
log.info("Starting up!");
startTime = DateTimes.nowUtc();
status = Status.STARTING;
this.toolbox = toolbox;
if (getContext() != null && getContext().get("checkpoints") != null) {
log.info("Got checkpoints [%s]", (String) getContext().get("checkpoints"));
final TreeMap> checkpoints = toolbox.getObjectMapper().readValue(
(String) getContext().get("checkpoints"),
new TypeReference>>()
{
}
);
Iterator>> sequenceOffsets = checkpoints.entrySet().iterator();
Map.Entry> previous = sequenceOffsets.next();
while (sequenceOffsets.hasNext()) {
Map.Entry> current = sequenceOffsets.next();
sequences.add(new SequenceMetadata(
previous.getKey(),
StringUtils.format("%s_%s", ioConfig.getBaseSequenceName(), previous.getKey()),
previous.getValue(),
current.getValue(),
true
));
previous = current;
}
sequences.add(new SequenceMetadata(
previous.getKey(),
StringUtils.format("%s_%s", ioConfig.getBaseSequenceName(), previous.getKey()),
previous.getValue(),
maxEndOffsets,
false
));
} else {
sequences.add(new SequenceMetadata(
0,
StringUtils.format("%s_%s", ioConfig.getBaseSequenceName(), 0),
ioConfig.getStartPartitions().getPartitionOffsetMap(),
maxEndOffsets,
false
));
}
sequencesPersistFile = new File(toolbox.getPersistDir(), "sequences.json");
restoreSequences();
log.info("Starting with sequences: %s", sequences);
if (chatHandlerProvider.isPresent()) {
log.info("Found chat handler of class[%s]", chatHandlerProvider.get().getClass().getName());
chatHandlerProvider.get().register(getId(), this, false);
} else {
log.warn("No chat handler detected");
}
runThread = Thread.currentThread();
// Set up FireDepartmentMetrics
final FireDepartment fireDepartmentForMetrics = new FireDepartment(
dataSchema,
new RealtimeIOConfig(null, null, null),
null
);
fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
toolbox.getMonitorScheduler().addMonitor(
new RealtimeMetricsMonitor(
ImmutableList.of(fireDepartmentForMetrics),
ImmutableMap.of(DruidMetrics.TASK_ID, new String[]{getId()})
)
);
LookupNodeService lookupNodeService = getContextValue(RealtimeIndexTask.CTX_KEY_LOOKUP_TIER) == null ?
toolbox.getLookupNodeService() :
new LookupNodeService((String) getContextValue(RealtimeIndexTask.CTX_KEY_LOOKUP_TIER));
DiscoveryDruidNode discoveryDruidNode = new DiscoveryDruidNode(
toolbox.getDruidNode(),
DruidNodeDiscoveryProvider.NODE_TYPE_PEON,
ImmutableMap.of(
toolbox.getDataNodeService().getName(), toolbox.getDataNodeService(),
lookupNodeService.getName(), lookupNodeService
)
);
try (
final KafkaConsumer consumer = newConsumer()
) {
toolbox.getDataSegmentServerAnnouncer().announce();
toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
appenderator = newAppenderator(fireDepartmentMetrics, toolbox);
driver = newDriver(appenderator, toolbox, fireDepartmentMetrics);
createAndStartPublishExecutor();
final String topic = ioConfig.getStartPartitions().getTopic();
// Start up, set up initial offsets.
final Object restoredMetadata = driver.startJob();
if (restoredMetadata == null) {
// no persist has happened so far
// so either this is a brand new task or replacement of a failed task
Preconditions.checkState(sequences.get(0).startOffsets.entrySet().stream().allMatch(
partitionOffsetEntry -> Longs.compare(
partitionOffsetEntry.getValue(),
ioConfig.getStartPartitions()
.getPartitionOffsetMap()
.get(partitionOffsetEntry.getKey())
) >= 0
), "Sequence offsets are not compatible with start offsets of task");
nextOffsets.putAll(sequences.get(0).startOffsets);
} else {
final Map restoredMetadataMap = (Map) restoredMetadata;
final KafkaPartitions restoredNextPartitions = toolbox.getObjectMapper().convertValue(
restoredMetadataMap.get(METADATA_NEXT_PARTITIONS),
KafkaPartitions.class
);
nextOffsets.putAll(restoredNextPartitions.getPartitionOffsetMap());
// Sanity checks.
if (!restoredNextPartitions.getTopic().equals(ioConfig.getStartPartitions().getTopic())) {
throw new ISE(
"WTF?! Restored topic[%s] but expected topic[%s]",
restoredNextPartitions.getTopic(),
ioConfig.getStartPartitions().getTopic()
);
}
if (!nextOffsets.keySet().equals(ioConfig.getStartPartitions().getPartitionOffsetMap().keySet())) {
throw new ISE(
"WTF?! Restored partitions[%s] but expected partitions[%s]",
nextOffsets.keySet(),
ioConfig.getStartPartitions().getPartitionOffsetMap().keySet()
);
}
// sequences size can be 0 only when all sequences got published and task stopped before it could finish
// which is super rare
if (sequences.size() == 0 || sequences.get(sequences.size() - 1).isCheckpointed()) {
this.endOffsets.putAll(sequences.size() == 0
? nextOffsets
: sequences.get(sequences.size() - 1).getEndOffsets());
log.info("End offsets changed to [%s]", endOffsets);
}
}
// Set up committer.
final Supplier committerSupplier = () -> {
final Map snapshot = ImmutableMap.copyOf(nextOffsets);
lastPersistedOffsets.clear();
lastPersistedOffsets.putAll(snapshot);
return new Committer()
{
@Override
public Object getMetadata()
{
return ImmutableMap.of(
METADATA_NEXT_PARTITIONS, new KafkaPartitions(
ioConfig.getStartPartitions().getTopic(),
snapshot
)
);
}
@Override
public void run()
{
// Do nothing.
}
};
};
// restart publishing of sequences (if any)
maybePersistAndPublishSequences(committerSupplier);
Set assignment = assignPartitionsAndSeekToNext(consumer, topic);
// Main loop.
// Could eventually support leader/follower mode (for keeping replicas more in sync)
boolean stillReading = !assignment.isEmpty();
status = Status.READING;
try {
while (stillReading) {
if (possiblyPause(assignment)) {
// The partition assignments may have changed while paused by a call to setEndOffsets() so reassign
// partitions upon resuming. This is safe even if the end offsets have not been modified.
assignment = assignPartitionsAndSeekToNext(consumer, topic);
if (assignment.isEmpty()) {
log.info("All partitions have been fully read");
publishOnStop.set(true);
stopRequested.set(true);
}
}
// if stop is requested or task's end offset is set by call to setEndOffsets method with finish set to true
if (stopRequested.get() || (sequences.get(sequences.size() - 1).isCheckpointed()
&& !ioConfig.isPauseAfterRead())) {
status = Status.PUBLISHING;
}
if (stopRequested.get()) {
break;
}
checkAndMaybeThrowException();
if (!ioConfig.isPauseAfterRead()) {
maybePersistAndPublishSequences(committerSupplier);
}
// The retrying business is because the KafkaConsumer throws OffsetOutOfRangeException if the seeked-to
// offset is not present in the topic-partition. This can happen if we're asking a task to read from data
// that has not been written yet (which is totally legitimate). So let's wait for it to show up.
ConsumerRecords records = ConsumerRecords.empty();
try {
records = consumer.poll(POLL_TIMEOUT);
}
catch (OffsetOutOfRangeException e) {
log.warn("OffsetOutOfRangeException with message [%s]", e.getMessage());
possiblyResetOffsetsOrWait(e.offsetOutOfRangePartitions(), consumer, toolbox);
stillReading = ioConfig.isPauseAfterRead() || !assignment.isEmpty();
}
SequenceMetadata sequenceToCheckpoint = null;
for (ConsumerRecord record : records) {
if (log.isTraceEnabled()) {
log.trace(
"Got topic[%s] partition[%d] offset[%,d].",
record.topic(),
record.partition(),
record.offset()
);
}
if (record.offset() < endOffsets.get(record.partition())) {
if (record.offset() != nextOffsets.get(record.partition())) {
if (ioConfig.isSkipOffsetGaps()) {
log.warn(
"Skipped to offset[%,d] after offset[%,d] in partition[%d].",
record.offset(),
nextOffsets.get(record.partition()),
record.partition()
);
} else {
throw new ISE(
"WTF?! Got offset[%,d] after offset[%,d] in partition[%d].",
record.offset(),
nextOffsets.get(record.partition()),
record.partition()
);
}
}
try {
final byte[] valueBytes = record.value();
final List rows = valueBytes == null
? Utils.nullableListOf((InputRow) null)
: parser.parseBatch(ByteBuffer.wrap(valueBytes));
boolean isPersistRequired = false;
for (InputRow row : rows) {
if (row != null && withinMinMaxRecordTime(row)) {
SequenceMetadata sequenceToUse = null;
for (SequenceMetadata sequence : sequences) {
if (sequence.canHandle(record)) {
sequenceToUse = sequence;
}
}
if (sequenceToUse == null) {
throw new ISE(
"WTH?! cannot find any valid sequence for record with partition [%d] and offset [%d]. Current sequences: %s",
record.partition(),
record.offset(),
sequences
);
}
final AppenderatorDriverAddResult addResult = driver.add(
row,
sequenceToUse.getSequenceName(),
committerSupplier,
// skip segment lineage check as there will always be one segment
// for combination of sequence and segment granularity.
// It is necessary to skip it as the task puts messages polled from all the
// assigned Kafka partitions into a single Druid segment, thus ordering of
// messages among replica tasks across assigned partitions is not guaranteed
// which may cause replica tasks to ask for segments with different interval
// in different order which might cause SegmentAllocateAction to fail.
true,
// do not allow incremental persists to happen until all the rows from this batch
// of rows are indexed
false
);
if (addResult.isOk()) {
// If the number of rows in the segment exceeds the threshold after adding a row,
// move the segment out from the active segments of BaseAppenderatorDriver to make a new segment.
if (addResult.getNumRowsInSegment() > tuningConfig.getMaxRowsPerSegment()) {
if (!sequenceToUse.isCheckpointed()) {
sequenceToCheckpoint = sequenceToUse;
}
}
isPersistRequired |= addResult.isPersistRequired();
} else {
// Failure to allocate segment puts determinism at risk, bail out to be safe.
// May want configurable behavior here at some point.
// If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp());
}
fireDepartmentMetrics.incrementProcessed();
} else {
fireDepartmentMetrics.incrementThrownAway();
}
}
if (isPersistRequired) {
Futures.addCallback(
driver.persistAsync(committerSupplier.get()),
new FutureCallback