org.opensearch.migrations.replay.kafka.KafkaTrafficCaptureSource Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of trafficReplayer Show documentation
Show all versions of trafficReplayer Show documentation
Everything opensearch migrations
package org.opensearch.migrations.replay.kafka;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import com.google.protobuf.InvalidProtocolBufferException;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.opensearch.migrations.replay.datatypes.ITrafficStreamKey;
import org.opensearch.migrations.replay.datatypes.PojoTrafficStreamAndKey;
import org.opensearch.migrations.replay.tracing.ChannelContextManager;
import org.opensearch.migrations.replay.tracing.ITrafficSourceContexts;
import org.opensearch.migrations.replay.tracing.ReplayContexts;
import org.opensearch.migrations.replay.tracing.RootReplayerContext;
import org.opensearch.migrations.replay.traffic.source.ISimpleTrafficCaptureSource;
import org.opensearch.migrations.replay.traffic.source.ITrafficStreamWithKey;
import org.opensearch.migrations.trafficcapture.protos.TrafficStream;
import io.netty.util.concurrent.DefaultThreadFactory;
import lombok.NonNull;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
/**
* Adapt a Kafka stream into a TrafficCaptureSource.
*
* Notice that there's a critical gap between how Kafka accepts commits and how the
* BlockingTrafficSource throttles calls to Kafka. The BlockingTrafficSource may
* block calls to readNextTrafficStreamChunk() until some time window elapses. This
* could be a very large window in cases where there were long gaps between recorded
* requests from the capturing proxy. For example, if a TrafficStream is read and if
* that stream is scheduled to be run one hour later, readNextTrafficStreamChunk()
* may not be called for almost an hour. By design, we're not calling Kafka to pull
* any more messages since we know that we don't have work to do for an hour. Shortly
* after the hour of waiting begins, Kakfa will notice that this application is no
* longer calling poll and will kick the consumer out of the client group.
*
* See
* ...
*
* "Basically if you don't call poll at least as frequently as the configured max interval,
* then the client will proactively leave the group so that another consumer can take
* over its partitions. When this happens, you may see an offset commit failure (as
* indicated by a CommitFailedException thrown from a call to commitSync())."
*
* Since the Kafka client requires all calls to be made from the same thread, we can't
* simply run a background job to keep the client warm. We need the caller to touch
* this object periodically to keep the connection alive.
*/
@Slf4j
public class KafkaTrafficCaptureSource implements ISimpleTrafficCaptureSource {
public static final String MAX_POLL_INTERVAL_KEY = "max.poll.interval.ms";
// see
// https://stackoverflow.com/questions/39730126/difference-between-session-timeout-ms-and-max-poll-interval-ms-for-kafka-0-10
public static final String DEFAULT_POLL_INTERVAL_MS = "60000";
final TrackingKafkaConsumer trackingKafkaConsumer;
private final ExecutorService kafkaExecutor;
private final AtomicLong trafficStreamsRead;
private final KafkaBehavioralPolicy behavioralPolicy;
private final ChannelContextManager channelContextManager;
private final AtomicBoolean isClosed;
public KafkaTrafficCaptureSource(
@NonNull RootReplayerContext globalContext,
Consumer kafkaConsumer,
String topic,
Duration keepAliveInterval
) {
this(globalContext, kafkaConsumer, topic, keepAliveInterval, Clock.systemUTC(), new KafkaBehavioralPolicy());
}
public KafkaTrafficCaptureSource(
@NonNull RootReplayerContext globalContext,
Consumer kafkaConsumer,
@NonNull String topic,
Duration keepAliveInterval,
Clock clock,
@NonNull KafkaBehavioralPolicy behavioralPolicy
) {
this.channelContextManager = new ChannelContextManager(globalContext);
trackingKafkaConsumer = new TrackingKafkaConsumer(
globalContext,
kafkaConsumer,
topic,
keepAliveInterval,
clock,
this::onKeyFinishedCommitting
);
trafficStreamsRead = new AtomicLong();
this.behavioralPolicy = behavioralPolicy;
kafkaConsumer.subscribe(Collections.singleton(topic), trackingKafkaConsumer);
kafkaExecutor = Executors.newSingleThreadExecutor(new DefaultThreadFactory("kafkaConsumerThread"));
isClosed = new AtomicBoolean(false);
}
private void onKeyFinishedCommitting(ITrafficStreamKey trafficStreamKey) {
var looseParentScope = trafficStreamKey.getTrafficStreamsContext().getEnclosingScope();
if (!(looseParentScope instanceof ReplayContexts.KafkaRecordContext)) {
throw new IllegalArgumentException(
"Expected parent context of type "
+ ReplayContexts.KafkaRecordContext.class
+ " instead of "
+ looseParentScope
+ " (of type="
+ looseParentScope.getClass()
+ ")"
);
}
var kafkaCtx = (ReplayContexts.KafkaRecordContext) looseParentScope;
kafkaCtx.close();
channelContextManager.releaseContextFor(kafkaCtx.getImmediateEnclosingScope());
}
public static KafkaTrafficCaptureSource buildKafkaSource(
@NonNull RootReplayerContext globalContext,
@NonNull String brokers,
@NonNull String topic,
@NonNull String groupId,
boolean enableMSKAuth,
String propertyFilePath,
@NonNull Clock clock,
@NonNull KafkaBehavioralPolicy behavioralPolicy
) throws IOException {
var kafkaProps = buildKafkaProperties(brokers, groupId, enableMSKAuth, propertyFilePath);
kafkaProps.putIfAbsent(MAX_POLL_INTERVAL_KEY, DEFAULT_POLL_INTERVAL_MS);
var pollPeriod = Duration.ofMillis(Long.valueOf((String) kafkaProps.get(MAX_POLL_INTERVAL_KEY)));
var keepAlivePeriod = getKeepAlivePeriodFromPollPeriod(pollPeriod);
return new KafkaTrafficCaptureSource(
globalContext,
new KafkaConsumer<>(kafkaProps),
topic,
keepAlivePeriod,
clock,
behavioralPolicy
);
}
/**
* We'll have to 'maintain' touches more frequently than the poll period, otherwise the
* consumer will fall out of the group, putting all the commits in-flight at risk. Notice
* that this doesn't have a bearing on heartbeats, which themselves are maintained through
* Kafka Consumer poll() calls. When those poll calls stop, so does the heartbeat, which
* is more sensitive, but managed via the 'session.timeout.ms' property.
*/
private static Duration getKeepAlivePeriodFromPollPeriod(Duration pollPeriod) {
return pollPeriod.dividedBy(2);
}
public static Properties buildKafkaProperties(
@NonNull String brokers,
@NonNull String groupId,
boolean enableMSKAuth,
String propertyFilePath
) throws IOException {
var kafkaProps = new Properties();
kafkaProps.setProperty("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
kafkaProps.setProperty("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
kafkaProps.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false");
kafkaProps.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
if (propertyFilePath != null) {
try (InputStream input = new FileInputStream(propertyFilePath)) {
kafkaProps.load(input);
} catch (IOException ex) {
log.error("Unable to load properties from kafka properties file with path: {}", propertyFilePath);
throw ex;
}
}
// Required for using SASL auth with MSK public endpoint
if (enableMSKAuth) {
kafkaProps.setProperty("security.protocol", "SASL_SSL");
kafkaProps.setProperty("sasl.mechanism", "AWS_MSK_IAM");
kafkaProps.setProperty("sasl.jaas.config", "software.amazon.msk.auth.iam.IAMLoginModule required;");
kafkaProps.setProperty(
"sasl.client.callback.handler.class",
"software.amazon.msk.auth.iam.IAMClientCallbackHandler"
);
}
kafkaProps.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, groupId);
return kafkaProps;
}
@Override
@SneakyThrows
public void touch(ITrafficSourceContexts.IBackPressureBlockContext context) {
CompletableFuture.runAsync(() -> trackingKafkaConsumer.touch(context), kafkaExecutor).get();
}
/**
* If messages are outstanding, we need to keep the connection alive, otherwise, there's no
* reason to. It's OK to fall out of the group and rejoin once ready.
* @return
*/
@Override
public Optional getNextRequiredTouch() {
return trackingKafkaConsumer.getNextRequiredTouch();
}
@Override
@SuppressWarnings("unchecked")
public CompletableFuture> readNextTrafficStreamChunk(
Supplier contextSupplier
) {
log.atTrace().setMessage("readNextTrafficStreamChunk()").log();
return CompletableFuture.supplyAsync(() -> {
log.atTrace().setMessage("async...readNextTrafficStreamChunk()").log();
return readNextTrafficStreamSynchronously(contextSupplier.get());
}, kafkaExecutor);
}
public List readNextTrafficStreamSynchronously(
ITrafficSourceContexts.IReadChunkContext context
) {
log.atTrace().setMessage("readNextTrafficStreamSynchronously()").log();
try {
return trackingKafkaConsumer.getNextBatchOfRecords(context, (offsetData, kafkaRecord) -> {
try {
TrafficStream ts = TrafficStream.parseFrom(kafkaRecord.value());
var trafficStreamsSoFar = trafficStreamsRead.incrementAndGet();
log.atTrace()
.setMessage("{}")
.addArgument(
() -> "Parsed traffic stream #" + trafficStreamsSoFar + ": " + offsetData + " " + ts
)
.log();
var key = new TrafficStreamKeyWithKafkaRecordId(tsk -> {
var channelKeyCtx = channelContextManager.retainOrCreateContext(tsk);
return channelContextManager.getGlobalContext()
.createTrafficStreamContextForKafkaSource(
channelKeyCtx,
kafkaRecord.key(),
kafkaRecord.serializedKeySize() + kafkaRecord.serializedValueSize()
);
}, ts, offsetData);
return (ITrafficStreamWithKey) new PojoTrafficStreamAndKey(ts, key);
} catch (InvalidProtocolBufferException e) {
// Assume the behavioralPolicy instance does any logging that the host may be interested in
RuntimeException recordError = behavioralPolicy.onInvalidKafkaRecord(kafkaRecord, e);
if (recordError != null) {
throw recordError;
} else {
return null;
}
}
}).filter(Objects::nonNull).collect(Collectors.toList());
} catch (Exception e) {
log.atError().setCause(e).setMessage("Terminating Kafka traffic stream due to exception").log();
throw e;
}
}
@Override
public CommitResult commitTrafficStream(ITrafficStreamKey trafficStreamKey) {
if (!(trafficStreamKey instanceof TrafficStreamKeyWithKafkaRecordId)) {
throw new IllegalArgumentException(
"Expected key of type "
+ TrafficStreamKeyWithKafkaRecordId.class
+ " but received "
+ trafficStreamKey
+ " (of type="
+ trafficStreamKey.getClass()
+ ")"
);
}
return trackingKafkaConsumer.commitKafkaKey(
trafficStreamKey,
(TrafficStreamKeyWithKafkaRecordId) trafficStreamKey
);
}
@Override
public void close() throws IOException, InterruptedException, ExecutionException {
if (isClosed.compareAndSet(false, true)) {
kafkaExecutor.submit(trackingKafkaConsumer::close).get();
kafkaExecutor.shutdownNow();
}
}
}