org.apache.kafka.streams.KafkaStreams Maven / Gradle / Ivy
Show all versions of org.apache.servicemix.bundles.kafka-streams
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.clients.CommonClientConfigs;
import org.apache.kafka.clients.admin.Admin;
import org.apache.kafka.clients.admin.ListOffsetsResult.ListOffsetsResultInfo;
import org.apache.kafka.clients.admin.MemberToRemove;
import org.apache.kafka.clients.admin.RemoveMembersFromConsumerGroupOptions;
import org.apache.kafka.clients.admin.RemoveMembersFromConsumerGroupResult;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.common.Metric;
import org.apache.kafka.common.MetricName;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.annotation.InterfaceStability.Evolving;
import org.apache.kafka.common.errors.TimeoutException;
import org.apache.kafka.common.metrics.JmxReporter;
import org.apache.kafka.common.metrics.KafkaMetricsContext;
import org.apache.kafka.common.metrics.MetricConfig;
import org.apache.kafka.common.metrics.Metrics;
import org.apache.kafka.common.metrics.MetricsContext;
import org.apache.kafka.common.metrics.MetricsReporter;
import org.apache.kafka.common.metrics.Sensor;
import org.apache.kafka.common.metrics.Sensor.RecordingLevel;
import org.apache.kafka.common.serialization.Serializer;
import org.apache.kafka.common.utils.LogContext;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.streams.errors.InvalidStateStoreException;
import org.apache.kafka.streams.errors.InvalidStateStorePartitionException;
import org.apache.kafka.streams.errors.ProcessorStateException;
import org.apache.kafka.streams.errors.StreamsException;
import org.apache.kafka.streams.errors.StreamsNotStartedException;
import org.apache.kafka.streams.errors.StreamsStoppedException;
import org.apache.kafka.streams.errors.StreamsUncaughtExceptionHandler;
import org.apache.kafka.streams.errors.UnknownStateStoreException;
import org.apache.kafka.streams.internals.metrics.ClientMetrics;
import org.apache.kafka.streams.processor.StateRestoreListener;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.processor.StreamPartitioner;
import org.apache.kafka.streams.processor.TaskId;
import org.apache.kafka.streams.processor.internals.ClientUtils;
import org.apache.kafka.streams.processor.internals.DefaultKafkaClientSupplier;
import org.apache.kafka.streams.processor.internals.GlobalStreamThread;
import org.apache.kafka.streams.processor.internals.GlobalStreamThread.State;
import org.apache.kafka.streams.processor.internals.StateDirectory;
import org.apache.kafka.streams.processor.internals.StreamThread;
import org.apache.kafka.streams.processor.internals.StreamsMetadataState;
import org.apache.kafka.streams.processor.internals.Task;
import org.apache.kafka.streams.processor.internals.ThreadStateTransitionValidator;
import org.apache.kafka.streams.processor.internals.TopologyMetadata;
import org.apache.kafka.streams.processor.internals.assignment.AssignorError;
import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
import org.apache.kafka.streams.query.FailureReason;
import org.apache.kafka.streams.query.PositionBound;
import org.apache.kafka.streams.query.QueryConfig;
import org.apache.kafka.streams.query.QueryResult;
import org.apache.kafka.streams.query.StateQueryRequest;
import org.apache.kafka.streams.query.StateQueryResult;
import org.apache.kafka.streams.state.HostInfo;
import org.apache.kafka.streams.state.internals.GlobalStateStoreProvider;
import org.apache.kafka.streams.state.internals.QueryableStoreProvider;
import org.apache.kafka.streams.state.internals.StreamThreadStateStoreProvider;
import org.slf4j.Logger;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.function.BiConsumer;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.TreeMap;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import static org.apache.kafka.streams.StreamsConfig.METRICS_RECORDING_LEVEL_CONFIG;
import static org.apache.kafka.streams.errors.StreamsUncaughtExceptionHandler.StreamThreadExceptionResponse.SHUTDOWN_CLIENT;
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
import static org.apache.kafka.streams.internals.ApiUtils.validateMillisecondDuration;
import static org.apache.kafka.streams.processor.internals.ClientUtils.fetchEndOffsets;
/**
* A Kafka client that allows for performing continuous computation on input coming from one or more input topics and
* sends output to zero, one, or more output topics.
*
* The computational logic can be specified either by using the {@link Topology} to define a DAG topology of
* {@link org.apache.kafka.streams.processor.api.Processor}s or by using the {@link StreamsBuilder} which provides the high-level DSL to define
* transformations.
*
* One {@code KafkaStreams} instance can contain one or more threads specified in the configs for the processing work.
*
* A {@code KafkaStreams} instance can co-ordinate with any other instances with the same
* {@link StreamsConfig#APPLICATION_ID_CONFIG application ID} (whether in the same process, on other processes on this
* machine, or on remote machines) as a single (possibly distributed) stream processing application.
* These instances will divide up the work based on the assignment of the input topic partitions so that all partitions
* are being consumed.
* If instances are added or fail, all (remaining) instances will rebalance the partition assignment among themselves
* to balance processing load and ensure that all input topic partitions are processed.
*
* Internally a {@code KafkaStreams} instance contains a normal {@link KafkaProducer} and {@link KafkaConsumer} instance
* that is used for reading input and writing output.
*
* A simple example might look like this:
*
{@code
* Properties props = new Properties();
* props.put(StreamsConfig.APPLICATION_ID_CONFIG, "my-stream-processing-application");
* props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
* props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
* props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
*
* StreamsBuilder builder = new StreamsBuilder();
* builder.stream("my-input-topic").mapValues(value -> String.valueOf(value.length())).to("my-output-topic");
*
* KafkaStreams streams = new KafkaStreams(builder.build(), props);
* streams.start();
* }
*
* @see org.apache.kafka.streams.StreamsBuilder
* @see org.apache.kafka.streams.Topology
*/
public class KafkaStreams implements AutoCloseable {
private static final String JMX_PREFIX = "kafka.streams";
private static final Set> EXCEPTIONS_NOT_TO_BE_HANDLED_BY_USERS =
new HashSet<>(Arrays.asList(IllegalStateException.class, IllegalArgumentException.class));
// processId is expected to be unique across JVMs and to be used
// in userData of the subscription request to allow assignor be aware
// of the co-location of stream thread's consumers. It is for internal
// usage only and should not be exposed to users at all.
private final Time time;
private final Logger log;
protected final String clientId;
private final Metrics metrics;
protected final StreamsConfig applicationConfigs;
protected final List threads;
protected final StateDirectory stateDirectory;
protected final StreamsMetadataState streamsMetadataState;
private final ScheduledExecutorService stateDirCleaner;
private final ScheduledExecutorService rocksDBMetricsRecordingService;
protected final Admin adminClient;
private final StreamsMetricsImpl streamsMetrics;
private final long totalCacheSize;
private final StreamStateListener streamStateListener;
private final StateRestoreListener delegatingStateRestoreListener;
private final Map threadState;
private final UUID processId;
private final KafkaClientSupplier clientSupplier;
protected final TopologyMetadata topologyMetadata;
private final QueryableStoreProvider queryableStoreProvider;
GlobalStreamThread globalStreamThread;
private KafkaStreams.StateListener stateListener;
private StateRestoreListener globalStateRestoreListener;
private boolean oldHandler;
private BiConsumer streamsUncaughtExceptionHandler;
private final Object changeThreadCount = new Object();
// container states
/**
* Kafka Streams states are the possible state that a Kafka Streams instance can be in.
* An instance must only be in one state at a time.
* The expected state transition with the following defined states is:
*
*
* +--------------+
* +<----- | Created (0) |
* | +-----+--------+
* | |
* | v
* | +----+--+------+
* | | Re- |
* +<----- | Balancing (1)| -------->+
* | +-----+-+------+ |
* | | ^ |
* | v | |
* | +--------------+ v
* | | Running (2) | -------->+
* | +------+-------+ |
* | | |
* | v |
* | +------+-------+ +----+-------+
* +-----> | Pending | | Pending |
* | Shutdown (3) | | Error (5) |
* +------+-------+ +-----+------+
* | |
* v v
* +------+-------+ +-----+--------+
* | Not | | Error (6) |
* | Running (4) | +--------------+
* +--------------+
*
*
*
* Note the following:
* - RUNNING state will transit to REBALANCING if any of its threads is in PARTITION_REVOKED or PARTITIONS_ASSIGNED state
* - REBALANCING state will transit to RUNNING if all of its threads are in RUNNING state
* - Any state except NOT_RUNNING, PENDING_ERROR or ERROR can go to PENDING_SHUTDOWN (whenever close is called)
* - Of special importance: If the global stream thread dies, or all stream threads die (or both) then
* the instance will be in the ERROR state. The user will not need to close it.
*/
public enum State {
// Note: if you add a new state, check the below methods and how they are used within Streams to see if
// any of them should be updated to include the new state. For example a new shutdown path or terminal
// state would likely need to be included in methods like isShuttingDown(), hasCompletedShutdown(), etc.
CREATED(1, 3), // 0
REBALANCING(2, 3, 5), // 1
RUNNING(1, 2, 3, 5), // 2
PENDING_SHUTDOWN(4), // 3
NOT_RUNNING, // 4
PENDING_ERROR(6), // 5
ERROR; // 6
private final Set validTransitions = new HashSet<>();
State(final Integer... validTransitions) {
this.validTransitions.addAll(Arrays.asList(validTransitions));
}
public boolean hasNotStarted() {
return equals(CREATED);
}
public boolean isRunningOrRebalancing() {
return equals(RUNNING) || equals(REBALANCING);
}
public boolean isShuttingDown() {
return equals(PENDING_SHUTDOWN) || equals(PENDING_ERROR);
}
public boolean hasCompletedShutdown() {
return equals(NOT_RUNNING) || equals(ERROR);
}
public boolean hasStartedOrFinishedShuttingDown() {
return isShuttingDown() || hasCompletedShutdown();
}
public boolean isValidTransition(final State newState) {
return validTransitions.contains(newState.ordinal());
}
}
private final Object stateLock = new Object();
protected volatile State state = State.CREATED;
private boolean waitOnState(final State targetState, final long waitMs) {
final long begin = time.milliseconds();
synchronized (stateLock) {
boolean interrupted = false;
long elapsedMs = 0L;
try {
while (state != targetState) {
if (waitMs > elapsedMs) {
final long remainingMs = waitMs - elapsedMs;
try {
stateLock.wait(remainingMs);
} catch (final InterruptedException e) {
interrupted = true;
}
} else {
log.debug("Cannot transit to {} within {}ms", targetState, waitMs);
return false;
}
elapsedMs = time.milliseconds() - begin;
}
} finally {
// Make sure to restore the interruption status before returning.
// We do not always own the current thread that executes this method, i.e., we do not know the
// interruption policy of the thread. The least we can do is restore the interruption status before
// the current thread exits this method.
if (interrupted) {
Thread.currentThread().interrupt();
}
}
return true;
}
}
/**
* Sets the state
* @param newState New state
*/
private boolean setState(final State newState) {
final State oldState;
synchronized (stateLock) {
oldState = state;
if (state == State.PENDING_SHUTDOWN && newState != State.NOT_RUNNING) {
// when the state is already in PENDING_SHUTDOWN, all other transitions than NOT_RUNNING (due to thread dying) will be
// refused but we do not throw exception here, to allow appropriate error handling
return false;
} else if (state == State.NOT_RUNNING && (newState == State.PENDING_SHUTDOWN || newState == State.NOT_RUNNING)) {
// when the state is already in NOT_RUNNING, its transition to PENDING_SHUTDOWN or NOT_RUNNING (due to consecutive close calls)
// will be refused but we do not throw exception here, to allow idempotent close calls
return false;
} else if (state == State.REBALANCING && newState == State.REBALANCING) {
// when the state is already in REBALANCING, it should not transit to REBALANCING again
return false;
} else if (state == State.ERROR && (newState == State.PENDING_ERROR || newState == State.ERROR)) {
// when the state is already in ERROR, its transition to PENDING_ERROR or ERROR (due to consecutive close calls)
return false;
} else if (state == State.PENDING_ERROR && newState != State.ERROR) {
// when the state is already in PENDING_ERROR, all other transitions than ERROR (due to thread dying) will be
// refused but we do not throw exception here, to allow appropriate error handling
return false;
} else if (!state.isValidTransition(newState)) {
throw new IllegalStateException("Stream-client " + clientId + ": Unexpected state transition from " + oldState + " to " + newState);
} else {
log.info("State transition from {} to {}", oldState, newState);
}
state = newState;
stateLock.notifyAll();
}
// we need to call the user customized state listener outside the state lock to avoid potential deadlocks
if (stateListener != null) {
stateListener.onChange(newState, oldState);
}
return true;
}
/**
* Return the current {@link State} of this {@code KafkaStreams} instance.
*
* @return the current state of this Kafka Streams instance
*/
public State state() {
return state;
}
protected boolean isRunningOrRebalancing() {
synchronized (stateLock) {
return state.isRunningOrRebalancing();
}
}
protected boolean hasStartedOrFinishedShuttingDown() {
synchronized (stateLock) {
return state.hasStartedOrFinishedShuttingDown();
}
}
protected void validateIsRunningOrRebalancing() {
synchronized (stateLock) {
if (state.hasNotStarted()) {
throw new StreamsNotStartedException("KafkaStreams has not been started, you can retry after calling start()");
}
if (!state.isRunningOrRebalancing()) {
throw new IllegalStateException("KafkaStreams is not running. State is " + state + ".");
}
}
}
/**
* Listen to {@link State} change events.
*/
public interface StateListener {
/**
* Called when state changes.
*
* @param newState new state
* @param oldState previous state
*/
void onChange(final State newState, final State oldState);
}
/**
* An app can set a single {@link KafkaStreams.StateListener} so that the app is notified when state changes.
*
* @param listener a new state listener
* @throws IllegalStateException if this {@code KafkaStreams} instance has already been started.
*/
public void setStateListener(final KafkaStreams.StateListener listener) {
synchronized (stateLock) {
if (state.hasNotStarted()) {
stateListener = listener;
} else {
throw new IllegalStateException("Can only set StateListener before calling start(). Current state is: " + state);
}
}
}
/**
* Set the handler invoked when an internal {@link StreamsConfig#NUM_STREAM_THREADS_CONFIG stream thread} abruptly
* terminates due to an uncaught exception.
*
* @param uncaughtExceptionHandler the uncaught exception handler for all internal threads; {@code null} deletes the current handler
* @throws IllegalStateException if this {@code KafkaStreams} instance has already been started.
*
* @deprecated Since 2.8.0. Use {@link KafkaStreams#setUncaughtExceptionHandler(StreamsUncaughtExceptionHandler)} instead.
*
*/
@Deprecated
public void setUncaughtExceptionHandler(final Thread.UncaughtExceptionHandler uncaughtExceptionHandler) {
synchronized (stateLock) {
if (state.hasNotStarted()) {
oldHandler = true;
processStreamThread(thread -> thread.setUncaughtExceptionHandler(uncaughtExceptionHandler));
if (globalStreamThread != null) {
globalStreamThread.setUncaughtExceptionHandler(uncaughtExceptionHandler);
}
} else {
throw new IllegalStateException("Can only set UncaughtExceptionHandler before calling start(). " +
"Current state is: " + state);
}
}
}
/**
* Set the handler invoked when an internal {@link StreamsConfig#NUM_STREAM_THREADS_CONFIG stream thread}
* throws an unexpected exception.
* These might be exceptions indicating rare bugs in Kafka Streams, or they
* might be exceptions thrown by your code, for example a NullPointerException thrown from your processor logic.
* The handler will execute on the thread that produced the exception.
* In order to get the thread that threw the exception, use {@code Thread.currentThread()}.
*
* Note, this handler must be threadsafe, since it will be shared among all threads, and invoked from any
* thread that encounters such an exception.
*
* @param userStreamsUncaughtExceptionHandler the uncaught exception handler of type {@link StreamsUncaughtExceptionHandler} for all internal threads
* @throws IllegalStateException if this {@code KafkaStreams} instance has already been started.
* @throws NullPointerException if userStreamsUncaughtExceptionHandler is null.
*/
public void setUncaughtExceptionHandler(final StreamsUncaughtExceptionHandler userStreamsUncaughtExceptionHandler) {
synchronized (stateLock) {
if (state.hasNotStarted()) {
Objects.requireNonNull(userStreamsUncaughtExceptionHandler);
streamsUncaughtExceptionHandler =
(exception, skipThreadReplacement) ->
handleStreamsUncaughtException(exception, userStreamsUncaughtExceptionHandler, skipThreadReplacement);
processStreamThread(thread -> thread.setStreamsUncaughtExceptionHandler(streamsUncaughtExceptionHandler));
if (globalStreamThread != null) {
globalStreamThread.setUncaughtExceptionHandler(
exception -> handleStreamsUncaughtException(exception, userStreamsUncaughtExceptionHandler, false)
);
}
} else {
throw new IllegalStateException("Can only set UncaughtExceptionHandler before calling start(). " +
"Current state is: " + state);
}
}
}
private void defaultStreamsUncaughtExceptionHandler(final Throwable throwable, final boolean skipThreadReplacement) {
if (oldHandler) {
threads.remove(Thread.currentThread());
if (throwable instanceof RuntimeException) {
throw (RuntimeException) throwable;
} else if (throwable instanceof Error) {
throw (Error) throwable;
} else {
throw new RuntimeException("Unexpected checked exception caught in the uncaught exception handler", throwable);
}
} else {
handleStreamsUncaughtException(throwable, t -> SHUTDOWN_CLIENT, skipThreadReplacement);
}
}
private void replaceStreamThread(final Throwable throwable) {
if (globalStreamThread != null && Thread.currentThread().getName().equals(globalStreamThread.getName())) {
log.warn("The global thread cannot be replaced. Reverting to shutting down the client.");
log.error("Encountered the following exception during processing " +
" The streams client is going to shut down now. ", throwable);
closeToError();
}
final StreamThread deadThread = (StreamThread) Thread.currentThread();
deadThread.shutdown();
addStreamThread();
if (throwable instanceof RuntimeException) {
throw (RuntimeException) throwable;
} else if (throwable instanceof Error) {
throw (Error) throwable;
} else {
throw new RuntimeException("Unexpected checked exception caught in the uncaught exception handler", throwable);
}
}
private boolean wrappedExceptionIsIn(final Throwable throwable, final Set> exceptionsOfInterest) {
return throwable.getCause() != null && exceptionsOfInterest.contains(throwable.getCause().getClass());
}
private StreamsUncaughtExceptionHandler.StreamThreadExceptionResponse getActionForThrowable(final Throwable throwable,
final StreamsUncaughtExceptionHandler streamsUncaughtExceptionHandler) {
final StreamsUncaughtExceptionHandler.StreamThreadExceptionResponse action;
if (wrappedExceptionIsIn(throwable, EXCEPTIONS_NOT_TO_BE_HANDLED_BY_USERS)) {
action = SHUTDOWN_CLIENT;
} else {
action = streamsUncaughtExceptionHandler.handle(throwable);
}
return action;
}
private void handleStreamsUncaughtException(final Throwable throwable,
final StreamsUncaughtExceptionHandler streamsUncaughtExceptionHandler,
final boolean skipThreadReplacement) {
final StreamsUncaughtExceptionHandler.StreamThreadExceptionResponse action = getActionForThrowable(throwable, streamsUncaughtExceptionHandler);
if (oldHandler) {
log.warn("Stream's new uncaught exception handler is set as well as the deprecated old handler." +
"The old handler will be ignored as long as a new handler is set.");
}
switch (action) {
case REPLACE_THREAD:
if (!skipThreadReplacement) {
log.error("Replacing thread in the streams uncaught exception handler", throwable);
replaceStreamThread(throwable);
} else {
log.debug("Skipping thread replacement for recoverable error");
}
break;
case SHUTDOWN_CLIENT:
log.error("Encountered the following exception during processing " +
"and Kafka Streams opted to " + action + "." +
" The streams client is going to shut down now. ", throwable);
closeToError();
break;
case SHUTDOWN_APPLICATION:
if (getNumLiveStreamThreads() == 1) {
log.warn("Attempt to shut down the application requires adding a thread to communicate the shutdown. No processing will be done on this thread");
addStreamThread();
}
if (throwable instanceof Error) {
log.error("This option requires running threads to shut down the application." +
"but the uncaught exception was an Error, which means this runtime is no " +
"longer in a well-defined state. Attempting to send the shutdown command anyway.", throwable);
}
if (Thread.currentThread().equals(globalStreamThread) && getNumLiveStreamThreads() == 0) {
log.error("Exception in global thread caused the application to attempt to shutdown." +
" This action will succeed only if there is at least one StreamThread running on this client." +
" Currently there are no running threads so will now close the client.");
closeToError();
break;
}
processStreamThread(thread -> thread.sendShutdownRequest(AssignorError.SHUTDOWN_REQUESTED));
log.error("Encountered the following exception during processing " +
"and sent shutdown request for the entire application.", throwable);
break;
}
}
/**
* Set the listener which is triggered whenever a {@link StateStore} is being restored in order to resume
* processing.
*
* @param globalStateRestoreListener The listener triggered when {@link StateStore} is being restored.
* @throws IllegalStateException if this {@code KafkaStreams} instance has already been started.
*/
public void setGlobalStateRestoreListener(final StateRestoreListener globalStateRestoreListener) {
synchronized (stateLock) {
if (state.hasNotStarted()) {
this.globalStateRestoreListener = globalStateRestoreListener;
} else {
throw new IllegalStateException("Can only set GlobalStateRestoreListener before calling start(). " +
"Current state is: " + state);
}
}
}
/**
* Get read-only handle on global metrics registry, including streams client's own metrics plus
* its embedded producer, consumer and admin clients' metrics.
*
* @return Map of all metrics.
*/
public Map metrics() {
final Map result = new LinkedHashMap<>();
// producer and consumer clients are per-thread
processStreamThread(thread -> {
result.putAll(thread.producerMetrics());
result.putAll(thread.consumerMetrics());
// admin client is shared, so we can actually move it
// to result.putAll(adminClient.metrics()).
// we did it intentionally just for flexibility.
result.putAll(thread.adminClientMetrics());
});
// global thread's consumer client
if (globalStreamThread != null) {
result.putAll(globalStreamThread.consumerMetrics());
}
// self streams metrics
result.putAll(metrics.metrics());
return Collections.unmodifiableMap(result);
}
/**
* Class that handles stream thread transitions
*/
final class StreamStateListener implements StreamThread.StateListener {
private final Map threadState;
private GlobalStreamThread.State globalThreadState;
// this lock should always be held before the state lock
private final Object threadStatesLock;
StreamStateListener(final Map threadState,
final GlobalStreamThread.State globalThreadState) {
this.threadState = threadState;
this.globalThreadState = globalThreadState;
this.threadStatesLock = new Object();
}
/**
* If all threads are up, including the global thread, set to RUNNING
*/
private void maybeSetRunning() {
// state can be transferred to RUNNING if all threads are either RUNNING or DEAD
for (final StreamThread.State state : threadState.values()) {
if (state != StreamThread.State.RUNNING && state != StreamThread.State.DEAD) {
return;
}
}
// the global state thread is relevant only if it is started. There are cases
// when we don't have a global state thread at all, e.g., when we don't have global KTables
if (globalThreadState != null && globalThreadState != GlobalStreamThread.State.RUNNING) {
return;
}
setState(State.RUNNING);
}
@Override
public synchronized void onChange(final Thread thread,
final ThreadStateTransitionValidator abstractNewState,
final ThreadStateTransitionValidator abstractOldState) {
synchronized (threadStatesLock) {
// StreamThreads first
if (thread instanceof StreamThread) {
final StreamThread.State newState = (StreamThread.State) abstractNewState;
threadState.put(thread.getId(), newState);
if (newState == StreamThread.State.PARTITIONS_REVOKED || newState == StreamThread.State.PARTITIONS_ASSIGNED) {
setState(State.REBALANCING);
} else if (newState == StreamThread.State.RUNNING) {
maybeSetRunning();
}
} else if (thread instanceof GlobalStreamThread) {
// global stream thread has different invariants
final GlobalStreamThread.State newState = (GlobalStreamThread.State) abstractNewState;
globalThreadState = newState;
if (newState == GlobalStreamThread.State.RUNNING) {
maybeSetRunning();
} else if (newState == GlobalStreamThread.State.DEAD) {
if (state != State.PENDING_SHUTDOWN) {
log.error("Global thread has died. The streams application or client will now close to ERROR.");
closeToError();
}
}
}
}
}
}
final class DelegatingStateRestoreListener implements StateRestoreListener {
private void throwOnFatalException(final Exception fatalUserException,
final TopicPartition topicPartition,
final String storeName) {
throw new StreamsException(
String.format("Fatal user code error in store restore listener for store %s, partition %s.",
storeName,
topicPartition),
fatalUserException);
}
@Override
public void onRestoreStart(final TopicPartition topicPartition,
final String storeName,
final long startingOffset,
final long endingOffset) {
if (globalStateRestoreListener != null) {
try {
globalStateRestoreListener.onRestoreStart(topicPartition, storeName, startingOffset, endingOffset);
} catch (final Exception fatalUserException) {
throwOnFatalException(fatalUserException, topicPartition, storeName);
}
}
}
@Override
public void onBatchRestored(final TopicPartition topicPartition,
final String storeName,
final long batchEndOffset,
final long numRestored) {
if (globalStateRestoreListener != null) {
try {
globalStateRestoreListener.onBatchRestored(topicPartition, storeName, batchEndOffset, numRestored);
} catch (final Exception fatalUserException) {
throwOnFatalException(fatalUserException, topicPartition, storeName);
}
}
}
@Override
public void onRestoreEnd(final TopicPartition topicPartition, final String storeName, final long totalRestored) {
if (globalStateRestoreListener != null) {
try {
globalStateRestoreListener.onRestoreEnd(topicPartition, storeName, totalRestored);
} catch (final Exception fatalUserException) {
throwOnFatalException(fatalUserException, topicPartition, storeName);
}
}
}
}
/**
* Create a {@code KafkaStreams} instance.
*
* Note: even if you never call {@link #start()} on a {@code KafkaStreams} instance,
* you still must {@link #close()} it to avoid resource leaks.
*
* @param topology the topology specifying the computational logic
* @param props properties for {@link StreamsConfig}
* @throws StreamsException if any fatal error occurs
*/
public KafkaStreams(final Topology topology,
final Properties props) {
this(topology, new StreamsConfig(props), new DefaultKafkaClientSupplier());
}
/**
* Create a {@code KafkaStreams} instance.
*
* Note: even if you never call {@link #start()} on a {@code KafkaStreams} instance,
* you still must {@link #close()} it to avoid resource leaks.
*
* @param topology the topology specifying the computational logic
* @param props properties for {@link StreamsConfig}
* @param clientSupplier the Kafka clients supplier which provides underlying producer and consumer clients
* for the new {@code KafkaStreams} instance
* @throws StreamsException if any fatal error occurs
*/
public KafkaStreams(final Topology topology,
final Properties props,
final KafkaClientSupplier clientSupplier) {
this(topology, new StreamsConfig(props), clientSupplier, Time.SYSTEM);
}
/**
* Create a {@code KafkaStreams} instance.
*
* Note: even if you never call {@link #start()} on a {@code KafkaStreams} instance,
* you still must {@link #close()} it to avoid resource leaks.
*
* @param topology the topology specifying the computational logic
* @param props properties for {@link StreamsConfig}
* @param time {@code Time} implementation; cannot be null
* @throws StreamsException if any fatal error occurs
*/
public KafkaStreams(final Topology topology,
final Properties props,
final Time time) {
this(topology, new StreamsConfig(props), new DefaultKafkaClientSupplier(), time);
}
/**
* Create a {@code KafkaStreams} instance.
*
* Note: even if you never call {@link #start()} on a {@code KafkaStreams} instance,
* you still must {@link #close()} it to avoid resource leaks.
*
* @param topology the topology specifying the computational logic
* @param props properties for {@link StreamsConfig}
* @param clientSupplier the Kafka clients supplier which provides underlying producer and consumer clients
* for the new {@code KafkaStreams} instance
* @param time {@code Time} implementation; cannot be null
* @throws StreamsException if any fatal error occurs
*/
public KafkaStreams(final Topology topology,
final Properties props,
final KafkaClientSupplier clientSupplier,
final Time time) {
this(topology, new StreamsConfig(props), clientSupplier, time);
}
/**
* Create a {@code KafkaStreams} instance.
*
* Note: even if you never call {@link #start()} on a {@code KafkaStreams} instance,
* you still must {@link #close()} it to avoid resource leaks.
*
* @param topology the topology specifying the computational logic
* @param applicationConfigs configs for Kafka Streams
* @throws StreamsException if any fatal error occurs
*/
public KafkaStreams(final Topology topology,
final StreamsConfig applicationConfigs) {
this(topology, applicationConfigs, new DefaultKafkaClientSupplier());
}
/**
* Create a {@code KafkaStreams} instance.
*
* Note: even if you never call {@link #start()} on a {@code KafkaStreams} instance,
* you still must {@link #close()} it to avoid resource leaks.
*
* @param topology the topology specifying the computational logic
* @param applicationConfigs configs for Kafka Streams
* @param clientSupplier the Kafka clients supplier which provides underlying producer and consumer clients
* for the new {@code KafkaStreams} instance
* @throws StreamsException if any fatal error occurs
*/
public KafkaStreams(final Topology topology,
final StreamsConfig applicationConfigs,
final KafkaClientSupplier clientSupplier) {
this(new TopologyMetadata(topology.internalTopologyBuilder, applicationConfigs), applicationConfigs, clientSupplier);
}
/**
* Create a {@code KafkaStreams} instance.
*
* Note: even if you never call {@link #start()} on a {@code KafkaStreams} instance,
* you still must {@link #close()} it to avoid resource leaks.
*
* @param topology the topology specifying the computational logic
* @param applicationConfigs configs for Kafka Streams
* @param time {@code Time} implementation; cannot be null
* @throws StreamsException if any fatal error occurs
*/
public KafkaStreams(final Topology topology,
final StreamsConfig applicationConfigs,
final Time time) {
this(new TopologyMetadata(topology.internalTopologyBuilder, applicationConfigs), applicationConfigs, new DefaultKafkaClientSupplier(), time);
}
private KafkaStreams(final Topology topology,
final StreamsConfig applicationConfigs,
final KafkaClientSupplier clientSupplier,
final Time time) throws StreamsException {
this(new TopologyMetadata(topology.internalTopologyBuilder, applicationConfigs), applicationConfigs, clientSupplier, time);
}
protected KafkaStreams(final TopologyMetadata topologyMetadata,
final StreamsConfig applicationConfigs,
final KafkaClientSupplier clientSupplier) throws StreamsException {
this(topologyMetadata, applicationConfigs, clientSupplier, Time.SYSTEM);
}
private KafkaStreams(final TopologyMetadata topologyMetadata,
final StreamsConfig applicationConfigs,
final KafkaClientSupplier clientSupplier,
final Time time) throws StreamsException {
this.applicationConfigs = applicationConfigs;
this.time = time;
this.topologyMetadata = topologyMetadata;
this.topologyMetadata.buildAndRewriteTopology();
final boolean hasGlobalTopology = topologyMetadata.hasGlobalTopology();
try {
stateDirectory = new StateDirectory(applicationConfigs, time, topologyMetadata.hasPersistentStores(), topologyMetadata.hasNamedTopologies());
processId = stateDirectory.initializeProcessId();
} catch (final ProcessorStateException fatal) {
throw new StreamsException(fatal);
}
// The application ID is a required config and hence should always have value
final String userClientId = applicationConfigs.getString(StreamsConfig.CLIENT_ID_CONFIG);
final String applicationId = applicationConfigs.getString(StreamsConfig.APPLICATION_ID_CONFIG);
if (userClientId.length() <= 0) {
clientId = applicationId + "-" + processId;
} else {
clientId = userClientId;
}
final LogContext logContext = new LogContext(String.format("stream-client [%s] ", clientId));
this.log = logContext.logger(getClass());
topologyMetadata.setLog(logContext);
// use client id instead of thread client id since this admin client may be shared among threads
this.clientSupplier = clientSupplier;
adminClient = clientSupplier.getAdmin(applicationConfigs.getAdminConfigs(ClientUtils.getSharedAdminClientId(clientId)));
log.info("Kafka Streams version: {}", ClientMetrics.version());
log.info("Kafka Streams commit ID: {}", ClientMetrics.commitId());
metrics = getMetrics(applicationConfigs, time, clientId);
streamsMetrics = new StreamsMetricsImpl(
metrics,
clientId,
applicationConfigs.getString(StreamsConfig.BUILT_IN_METRICS_VERSION_CONFIG),
time
);
ClientMetrics.addVersionMetric(streamsMetrics);
ClientMetrics.addCommitIdMetric(streamsMetrics);
ClientMetrics.addApplicationIdMetric(streamsMetrics, applicationConfigs.getString(StreamsConfig.APPLICATION_ID_CONFIG));
ClientMetrics.addTopologyDescriptionMetric(streamsMetrics, (metricsConfig, now) -> this.topologyMetadata.topologyDescriptionString());
ClientMetrics.addStateMetric(streamsMetrics, (metricsConfig, now) -> state);
threads = Collections.synchronizedList(new LinkedList<>());
ClientMetrics.addNumAliveStreamThreadMetric(streamsMetrics, (metricsConfig, now) -> getNumLiveStreamThreads());
streamsMetadataState = new StreamsMetadataState(
this.topologyMetadata,
parseHostInfo(applicationConfigs.getString(StreamsConfig.APPLICATION_SERVER_CONFIG)),
logContext
);
oldHandler = false;
streamsUncaughtExceptionHandler = this::defaultStreamsUncaughtExceptionHandler;
delegatingStateRestoreListener = new DelegatingStateRestoreListener();
totalCacheSize = applicationConfigs.getLong(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG);
final int numStreamThreads = topologyMetadata.getNumStreamThreads(applicationConfigs);
final long cacheSizePerThread = getCacheSizePerThread(numStreamThreads);
GlobalStreamThread.State globalThreadState = null;
if (hasGlobalTopology) {
final String globalThreadId = clientId + "-GlobalStreamThread";
globalStreamThread = new GlobalStreamThread(
topologyMetadata.globalTaskTopology(),
applicationConfigs,
clientSupplier.getGlobalConsumer(applicationConfigs.getGlobalConsumerConfigs(clientId)),
stateDirectory,
cacheSizePerThread,
streamsMetrics,
time,
globalThreadId,
delegatingStateRestoreListener,
exception -> defaultStreamsUncaughtExceptionHandler(exception, false)
);
globalThreadState = globalStreamThread.state();
}
threadState = new HashMap<>(numStreamThreads);
streamStateListener = new StreamStateListener(threadState, globalThreadState);
final GlobalStateStoreProvider globalStateStoreProvider = new GlobalStateStoreProvider(this.topologyMetadata.globalStateStores());
if (hasGlobalTopology) {
globalStreamThread.setStateListener(streamStateListener);
}
queryableStoreProvider = new QueryableStoreProvider(globalStateStoreProvider);
for (int i = 1; i <= numStreamThreads; i++) {
createAndAddStreamThread(cacheSizePerThread, i);
}
stateDirCleaner = setupStateDirCleaner();
rocksDBMetricsRecordingService = maybeCreateRocksDBMetricsRecordingService(clientId, applicationConfigs);
}
private StreamThread createAndAddStreamThread(final long cacheSizePerThread, final int threadIdx) {
final StreamThread streamThread = StreamThread.create(
topologyMetadata,
applicationConfigs,
clientSupplier,
adminClient,
processId,
clientId,
streamsMetrics,
time,
streamsMetadataState,
cacheSizePerThread,
stateDirectory,
delegatingStateRestoreListener,
threadIdx,
KafkaStreams.this::closeToError,
streamsUncaughtExceptionHandler
);
streamThread.setStateListener(streamStateListener);
threads.add(streamThread);
threadState.put(streamThread.getId(), streamThread.state());
queryableStoreProvider.addStoreProviderForThread(streamThread.getName(), new StreamThreadStateStoreProvider(streamThread));
return streamThread;
}
private static Metrics getMetrics(final StreamsConfig config, final Time time, final String clientId) {
final MetricConfig metricConfig = new MetricConfig()
.samples(config.getInt(StreamsConfig.METRICS_NUM_SAMPLES_CONFIG))
.recordLevel(Sensor.RecordingLevel.forName(config.getString(StreamsConfig.METRICS_RECORDING_LEVEL_CONFIG)))
.timeWindow(config.getLong(StreamsConfig.METRICS_SAMPLE_WINDOW_MS_CONFIG), TimeUnit.MILLISECONDS);
final List reporters = config.getConfiguredInstances(StreamsConfig.METRIC_REPORTER_CLASSES_CONFIG,
MetricsReporter.class,
Collections.singletonMap(StreamsConfig.CLIENT_ID_CONFIG, clientId));
final JmxReporter jmxReporter = new JmxReporter();
jmxReporter.configure(config.originals());
reporters.add(jmxReporter);
final MetricsContext metricsContext = new KafkaMetricsContext(JMX_PREFIX,
config.originalsWithPrefix(CommonClientConfigs.METRICS_CONTEXT_PREFIX));
return new Metrics(metricConfig, reporters, time, metricsContext);
}
/**
* Adds and starts a stream thread in addition to the stream threads that are already running in this
* Kafka Streams client.
*
* Since the number of stream threads increases, the sizes of the caches in the new stream thread
* and the existing stream threads are adapted so that the sum of the cache sizes over all stream
* threads does not exceed the total cache size specified in configuration
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG}.
*
* Stream threads can only be added if this Kafka Streams client is in state RUNNING or REBALANCING.
*
* @return name of the added stream thread or empty if a new stream thread could not be added
*/
public Optional addStreamThread() {
if (isRunningOrRebalancing()) {
final StreamThread streamThread;
synchronized (changeThreadCount) {
final int threadIdx = getNextThreadIndex();
final int numLiveThreads = getNumLiveStreamThreads();
final long cacheSizePerThread = getCacheSizePerThread(numLiveThreads + 1);
log.info("Adding StreamThread-{}, there will now be {} live threads and the new cache size per thread is {}",
threadIdx, numLiveThreads + 1, cacheSizePerThread);
resizeThreadCache(cacheSizePerThread);
// Creating thread should hold the lock in order to avoid duplicate thread index.
// If the duplicate index happen, the metadata of thread may be duplicate too.
streamThread = createAndAddStreamThread(cacheSizePerThread, threadIdx);
}
synchronized (stateLock) {
if (isRunningOrRebalancing()) {
streamThread.start();
return Optional.of(streamThread.getName());
} else {
log.warn("Terminating the new thread because the Kafka Streams client is in state {}", state);
streamThread.shutdown();
threads.remove(streamThread);
final long cacheSizePerThread = getCacheSizePerThread(getNumLiveStreamThreads());
log.info("Resizing thread cache due to terminating added thread, new cache size per thread is {}", cacheSizePerThread);
resizeThreadCache(cacheSizePerThread);
return Optional.empty();
}
}
} else {
log.warn("Cannot add a stream thread when Kafka Streams client is in state {}", state);
return Optional.empty();
}
}
/**
* Removes one stream thread out of the running stream threads from this Kafka Streams client.
*
* The removed stream thread is gracefully shut down. This method does not specify which stream
* thread is shut down.
*
* Since the number of stream threads decreases, the sizes of the caches in the remaining stream
* threads are adapted so that the sum of the cache sizes over all stream threads equals the total
* cache size specified in configuration {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG}.
*
* @return name of the removed stream thread or empty if a stream thread could not be removed because
* no stream threads are alive
*/
public Optional removeStreamThread() {
return removeStreamThread(Long.MAX_VALUE);
}
/**
* Removes one stream thread out of the running stream threads from this Kafka Streams client.
*
* The removed stream thread is gracefully shut down. This method does not specify which stream
* thread is shut down.
*
* Since the number of stream threads decreases, the sizes of the caches in the remaining stream
* threads are adapted so that the sum of the cache sizes over all stream threads equals the total
* cache size specified in configuration {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG}.
*
* @param timeout The length of time to wait for the thread to shutdown
* @throws org.apache.kafka.common.errors.TimeoutException if the thread does not stop in time
* @return name of the removed stream thread or empty if a stream thread could not be removed because
* no stream threads are alive
*/
public Optional removeStreamThread(final Duration timeout) {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(timeout, "timeout");
final long timeoutMs = validateMillisecondDuration(timeout, msgPrefix);
return removeStreamThread(timeoutMs);
}
private Optional removeStreamThread(final long timeoutMs) throws TimeoutException {
final long startMs = time.milliseconds();
if (isRunningOrRebalancing()) {
synchronized (changeThreadCount) {
// make a copy of threads to avoid holding lock
for (final StreamThread streamThread : new ArrayList<>(threads)) {
final boolean callingThreadIsNotCurrentStreamThread = !streamThread.getName().equals(Thread.currentThread().getName());
if (streamThread.isAlive() && (callingThreadIsNotCurrentStreamThread || getNumLiveStreamThreads() == 1)) {
log.info("Removing StreamThread " + streamThread.getName());
final Optional groupInstanceID = streamThread.getGroupInstanceID();
streamThread.requestLeaveGroupDuringShutdown();
streamThread.shutdown();
if (!streamThread.getName().equals(Thread.currentThread().getName())) {
final long remainingTimeMs = timeoutMs - (time.milliseconds() - startMs);
if (remainingTimeMs <= 0 || !streamThread.waitOnThreadState(StreamThread.State.DEAD, remainingTimeMs)) {
log.warn("{} did not shutdown in the allotted time.", streamThread.getName());
// Don't remove from threads until shutdown is complete. We will trim it from the
// list once it reaches DEAD, and if for some reason it's hanging indefinitely in the
// shutdown then we should just consider this thread.id to be burned
} else {
log.info("Successfully removed {} in {}ms", streamThread.getName(), time.milliseconds() - startMs);
threads.remove(streamThread);
queryableStoreProvider.removeStoreProviderForThread(streamThread.getName());
}
} else {
log.info("{} is the last remaining thread and must remove itself, therefore we cannot wait "
+ "for it to complete shutdown as this will result in deadlock.", streamThread.getName());
}
final long cacheSizePerThread = getCacheSizePerThread(getNumLiveStreamThreads());
log.info("Resizing thread cache due to thread removal, new cache size per thread is {}", cacheSizePerThread);
resizeThreadCache(cacheSizePerThread);
if (groupInstanceID.isPresent() && callingThreadIsNotCurrentStreamThread) {
final MemberToRemove memberToRemove = new MemberToRemove(groupInstanceID.get());
final Collection membersToRemove = Collections.singletonList(memberToRemove);
final RemoveMembersFromConsumerGroupResult removeMembersFromConsumerGroupResult =
adminClient.removeMembersFromConsumerGroup(
applicationConfigs.getString(StreamsConfig.APPLICATION_ID_CONFIG),
new RemoveMembersFromConsumerGroupOptions(membersToRemove)
);
try {
final long remainingTimeMs = timeoutMs - (time.milliseconds() - startMs);
removeMembersFromConsumerGroupResult.memberResult(memberToRemove).get(remainingTimeMs, TimeUnit.MILLISECONDS);
} catch (final java.util.concurrent.TimeoutException e) {
log.error("Could not remove static member {} from consumer group {} due to a timeout: {}",
groupInstanceID.get(), applicationConfigs.getString(StreamsConfig.APPLICATION_ID_CONFIG), e);
throw new TimeoutException(e.getMessage(), e);
} catch (final InterruptedException e) {
Thread.currentThread().interrupt();
} catch (final ExecutionException e) {
log.error("Could not remove static member {} from consumer group {} due to: {}",
groupInstanceID.get(), applicationConfigs.getString(StreamsConfig.APPLICATION_ID_CONFIG), e);
throw new StreamsException(
"Could not remove static member " + groupInstanceID.get()
+ " from consumer group " + applicationConfigs.getString(StreamsConfig.APPLICATION_ID_CONFIG)
+ " for the following reason: ",
e.getCause()
);
}
}
final long remainingTimeMs = timeoutMs - (time.milliseconds() - startMs);
if (remainingTimeMs <= 0) {
throw new TimeoutException("Thread " + streamThread.getName() + " did not stop in the allotted time");
}
return Optional.of(streamThread.getName());
}
}
}
log.warn("There are no threads eligible for removal");
} else {
log.warn("Cannot remove a stream thread when Kafka Streams client is in state " + state());
}
return Optional.empty();
}
/**
* Takes a snapshot and counts the number of stream threads which are not in PENDING_SHUTDOWN or DEAD
*
* note: iteration over SynchronizedList is not thread safe so it must be manually synchronized. However, we may
* require other locks when looping threads and it could cause deadlock. Hence, we create a copy to avoid holding
* threads lock when looping threads.
* @return number of alive stream threads
*/
private int getNumLiveStreamThreads() {
final AtomicInteger numLiveThreads = new AtomicInteger(0);
synchronized (threads) {
processStreamThread(thread -> {
if (thread.state() == StreamThread.State.DEAD) {
log.debug("Trimming thread {} from the threads list since it's state is {}", thread.getName(), StreamThread.State.DEAD);
threads.remove(thread);
} else if (thread.state() == StreamThread.State.PENDING_SHUTDOWN) {
log.debug("Skipping thread {} from num live threads computation since it's state is {}",
thread.getName(), StreamThread.State.PENDING_SHUTDOWN);
} else {
numLiveThreads.incrementAndGet();
}
});
return numLiveThreads.get();
}
}
private int getNextThreadIndex() {
final HashSet allLiveThreadNames = new HashSet<>();
final AtomicInteger maxThreadId = new AtomicInteger(1);
synchronized (threads) {
processStreamThread(thread -> {
// trim any DEAD threads from the list so we can reuse the thread.id
// this is only safe to do once the thread has fully completed shutdown
if (thread.state() == StreamThread.State.DEAD) {
threads.remove(thread);
} else {
allLiveThreadNames.add(thread.getName());
// Assume threads are always named with the "-StreamThread-" suffix
final int threadId = Integer.parseInt(thread.getName().substring(thread.getName().lastIndexOf("-") + 1));
if (threadId > maxThreadId.get()) {
maxThreadId.set(threadId);
}
}
});
final String baseName = clientId + "-StreamThread-";
for (int i = 1; i <= maxThreadId.get(); i++) {
final String name = baseName + i;
if (!allLiveThreadNames.contains(name)) {
return i;
}
}
// It's safe to use threads.size() rather than getNumLiveStreamThreads() to infer the number of threads
// here since we trimmed any DEAD threads earlier in this method while holding the lock
return threads.size() + 1;
}
}
private long getCacheSizePerThread(final int numStreamThreads) {
if (numStreamThreads == 0) {
return totalCacheSize;
}
return totalCacheSize / (numStreamThreads + (topologyMetadata.hasGlobalTopology() ? 1 : 0));
}
private void resizeThreadCache(final long cacheSizePerThread) {
processStreamThread(thread -> thread.resizeCache(cacheSizePerThread));
if (globalStreamThread != null) {
globalStreamThread.resize(cacheSizePerThread);
}
}
private ScheduledExecutorService setupStateDirCleaner() {
return Executors.newSingleThreadScheduledExecutor(r -> {
final Thread thread = new Thread(r, clientId + "-CleanupThread");
thread.setDaemon(true);
return thread;
});
}
private static ScheduledExecutorService maybeCreateRocksDBMetricsRecordingService(final String clientId,
final StreamsConfig config) {
if (RecordingLevel.forName(config.getString(METRICS_RECORDING_LEVEL_CONFIG)) == RecordingLevel.DEBUG) {
return Executors.newSingleThreadScheduledExecutor(r -> {
final Thread thread = new Thread(r, clientId + "-RocksDBMetricsRecordingTrigger");
thread.setDaemon(true);
return thread;
});
}
return null;
}
private static HostInfo parseHostInfo(final String endPoint) {
final HostInfo hostInfo = HostInfo.buildFromEndpoint(endPoint);
if (hostInfo == null) {
return StreamsMetadataState.UNKNOWN_HOST;
} else {
return hostInfo;
}
}
/**
* Start the {@code KafkaStreams} instance by starting all its threads.
* This function is expected to be called only once during the life cycle of the client.
*
* Because threads are started in the background, this method does not block.
* However, if you have global stores in your topology, this method blocks until all global stores are restored.
* As a consequence, any fatal exception that happens during processing is by default only logged.
* If you want to be notified about dying threads, you can
* {@link #setUncaughtExceptionHandler(Thread.UncaughtExceptionHandler) register an uncaught exception handler}
* before starting the {@code KafkaStreams} instance.
*
* Note, for brokers with version {@code 0.9.x} or lower, the broker version cannot be checked.
* There will be no error and the client will hang and retry to verify the broker version until it
* {@link StreamsConfig#REQUEST_TIMEOUT_MS_CONFIG times out}.
* @throws IllegalStateException if process was already started
* @throws StreamsException if the Kafka brokers have version 0.10.0.x or
* if {@link StreamsConfig#PROCESSING_GUARANTEE_CONFIG exactly-once} is enabled for pre 0.11.0.x brokers
*/
public synchronized void start() throws IllegalStateException, StreamsException {
if (setState(State.REBALANCING)) {
log.debug("Starting Streams client");
if (globalStreamThread != null) {
globalStreamThread.start();
}
final int numThreads = processStreamThread(StreamThread::start);
log.info("Started {} stream threads", numThreads);
final Long cleanupDelay = applicationConfigs.getLong(StreamsConfig.STATE_CLEANUP_DELAY_MS_CONFIG);
stateDirCleaner.scheduleAtFixedRate(() -> {
// we do not use lock here since we only read on the value and act on it
if (state == State.RUNNING) {
stateDirectory.cleanRemovedTasks(cleanupDelay);
}
}, cleanupDelay, cleanupDelay, TimeUnit.MILLISECONDS);
final long recordingDelay = 0;
final long recordingInterval = 1;
if (rocksDBMetricsRecordingService != null) {
rocksDBMetricsRecordingService.scheduleAtFixedRate(
streamsMetrics.rocksDBMetricsRecordingTrigger(),
recordingDelay,
recordingInterval,
TimeUnit.MINUTES
);
}
} else {
throw new IllegalStateException("The client is either already started or already stopped, cannot re-start");
}
}
/**
* Shutdown this {@code KafkaStreams} instance by signaling all the threads to stop, and then wait for them to join.
* This will block until all threads have stopped.
*/
public void close() {
close(Long.MAX_VALUE);
}
private Thread shutdownHelper(final boolean error) {
stateDirCleaner.shutdownNow();
if (rocksDBMetricsRecordingService != null) {
rocksDBMetricsRecordingService.shutdownNow();
}
// wait for all threads to join in a separate thread;
// save the current thread so that if it is a stream thread
// we don't attempt to join it and cause a deadlock
return new Thread(() -> {
// notify all the threads to stop; avoid deadlocks by stopping any
// further state reports from the thread since we're shutting down
int numStreamThreads = processStreamThread(StreamThread::shutdown);
log.info("Shutting down {} stream threads", numStreamThreads);
topologyMetadata.wakeupThreads();
numStreamThreads = processStreamThread(thread -> {
try {
if (!thread.isRunning()) {
log.debug("Shutdown {} complete", thread.getName());
thread.join();
}
} catch (final InterruptedException ex) {
log.warn("Shutdown {} interrupted", thread.getName());
Thread.currentThread().interrupt();
}
});
log.info("Shutdown {} stream threads complete", numStreamThreads);
if (globalStreamThread != null) {
log.info("Shutting down the global stream threads");
globalStreamThread.shutdown();
}
if (globalStreamThread != null && !globalStreamThread.stillRunning()) {
try {
globalStreamThread.join();
} catch (final InterruptedException e) {
log.warn("Shutdown the global stream thread interrupted");
Thread.currentThread().interrupt();
}
globalStreamThread = null;
log.info("Shutdown global stream threads complete");
}
stateDirectory.close();
adminClient.close();
streamsMetrics.removeAllClientLevelSensorsAndMetrics();
metrics.close();
if (!error) {
setState(State.NOT_RUNNING);
} else {
setState(State.ERROR);
}
}, clientId + "-CloseThread");
}
private boolean close(final long timeoutMs) {
if (state.hasCompletedShutdown()) {
log.info("Streams client is already in the terminal {} state, all resources are closed and the client has stopped.", state);
return true;
}
if (state.isShuttingDown()) {
log.info("Streams client is in {}, all resources are being closed and the client will be stopped.", state);
if (state == State.PENDING_ERROR && waitOnState(State.ERROR, timeoutMs)) {
log.info("Streams client stopped to ERROR completely");
return true;
} else if (state == State.PENDING_SHUTDOWN && waitOnState(State.NOT_RUNNING, timeoutMs)) {
log.info("Streams client stopped to NOT_RUNNING completely");
return true;
} else {
log.warn("Streams client cannot transition to {}} completely within the timeout",
state == State.PENDING_SHUTDOWN ? State.NOT_RUNNING : State.ERROR);
return false;
}
}
if (!setState(State.PENDING_SHUTDOWN)) {
// if we can't transition to PENDING_SHUTDOWN but not because we're already shutting down, then it must be fatal
log.error("Failed to transition to PENDING_SHUTDOWN, current state is {}", state);
throw new StreamsException("Failed to shut down while in state " + state);
} else {
final Thread shutdownThread = shutdownHelper(false);
shutdownThread.setDaemon(true);
shutdownThread.start();
}
if (waitOnState(State.NOT_RUNNING, timeoutMs)) {
log.info("Streams client stopped completely");
return true;
} else {
log.info("Streams client cannot stop completely within the timeout");
return false;
}
}
private void closeToError() {
if (!setState(State.PENDING_ERROR)) {
log.info("Skipping shutdown since we are already in " + state());
} else {
final Thread shutdownThread = shutdownHelper(true);
shutdownThread.setDaemon(true);
shutdownThread.start();
}
}
/**
* Shutdown this {@code KafkaStreams} by signaling all the threads to stop, and then wait up to the timeout for the
* threads to join.
* A {@code timeout} of Duration.ZERO (or any other zero duration) makes the close operation asynchronous.
* Negative-duration timeouts are rejected.
*
* @param timeout how long to wait for the threads to shutdown
* @return {@code true} if all threads were successfully stopped—{@code false} if the timeout was reached
* before all threads stopped
* Note that this method must not be called in the {@link StateListener#onChange(KafkaStreams.State, KafkaStreams.State)} callback of {@link StateListener}.
* @throws IllegalArgumentException if {@code timeout} can't be represented as {@code long milliseconds}
*/
public synchronized boolean close(final Duration timeout) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(timeout, "timeout");
final long timeoutMs = validateMillisecondDuration(timeout, msgPrefix);
if (timeoutMs < 0) {
throw new IllegalArgumentException("Timeout can't be negative.");
}
log.debug("Stopping Streams client with timeoutMillis = {} ms.", timeoutMs);
return close(timeoutMs);
}
/**
* Do a clean up of the local {@link StateStore} directory ({@link StreamsConfig#STATE_DIR_CONFIG}) by deleting all
* data with regard to the {@link StreamsConfig#APPLICATION_ID_CONFIG application ID}.
*
* May only be called either before this {@code KafkaStreams} instance is {@link #start() started} or after the
* instance is {@link #close() closed}.
*
* Calling this method triggers a restore of local {@link StateStore}s on the next {@link #start() application start}.
*
* @throws IllegalStateException if this {@code KafkaStreams} instance has been started and hasn't fully shut down
* @throws StreamsException if cleanup failed
*/
public void cleanUp() {
if (!(state.hasNotStarted() || state.hasCompletedShutdown())) {
throw new IllegalStateException("Cannot clean up while running.");
}
stateDirectory.clean();
}
/**
* Find all currently running {@code KafkaStreams} instances (potentially remotely) that use the same
* {@link StreamsConfig#APPLICATION_ID_CONFIG application ID} as this instance (i.e., all instances that belong to
* the same Kafka Streams application) and return {@link StreamsMetadata} for each discovered instance.
*
* Note: this is a point in time view and it may change due to partition reassignment.
*
* @return {@link StreamsMetadata} for each {@code KafkaStreams} instances of this application
* @deprecated since 3.0.0 use {@link KafkaStreams#metadataForAllStreamsClients}
*/
@Deprecated
public Collection allMetadata() {
validateIsRunningOrRebalancing();
return streamsMetadataState.getAllMetadata().stream().map(streamsMetadata ->
new org.apache.kafka.streams.state.StreamsMetadata(streamsMetadata.hostInfo(),
streamsMetadata.stateStoreNames(),
streamsMetadata.topicPartitions(),
streamsMetadata.standbyStateStoreNames(),
streamsMetadata.standbyTopicPartitions()))
.collect(Collectors.toSet());
}
/**
* Find all currently running {@code KafkaStreams} instances (potentially remotely) that use the same
* {@link StreamsConfig#APPLICATION_ID_CONFIG application ID} as this instance (i.e., all instances that belong to
* the same Kafka Streams application) and return {@link StreamsMetadata} for each discovered instance.
*
* Note: this is a point in time view and it may change due to partition reassignment.
*
* @return {@link StreamsMetadata} for each {@code KafkaStreams} instances of this application
*/
public Collection metadataForAllStreamsClients() {
validateIsRunningOrRebalancing();
return streamsMetadataState.getAllMetadata();
}
/**
* Find all currently running {@code KafkaStreams} instances (potentially remotely) that
*
* - use the same {@link StreamsConfig#APPLICATION_ID_CONFIG application ID} as this instance (i.e., all
* instances that belong to the same Kafka Streams application)
* - and that contain a {@link StateStore} with the given {@code storeName}
*
* and return {@link StreamsMetadata} for each discovered instance.
*
* Note: this is a point in time view and it may change due to partition reassignment.
*
* @param storeName the {@code storeName} to find metadata for
* @return {@link StreamsMetadata} for each {@code KafkaStreams} instances with the provide {@code storeName} of
* this application
* @deprecated since 3.0.0 use {@link KafkaStreams#streamsMetadataForStore} instead
*/
@Deprecated
public Collection allMetadataForStore(final String storeName) {
validateIsRunningOrRebalancing();
return streamsMetadataState.getAllMetadataForStore(storeName).stream().map(streamsMetadata ->
new org.apache.kafka.streams.state.StreamsMetadata(streamsMetadata.hostInfo(),
streamsMetadata.stateStoreNames(),
streamsMetadata.topicPartitions(),
streamsMetadata.standbyStateStoreNames(),
streamsMetadata.standbyTopicPartitions()))
.collect(Collectors.toSet());
}
/**
* Find all currently running {@code KafkaStreams} instances (potentially remotely) that
*
* - use the same {@link StreamsConfig#APPLICATION_ID_CONFIG application ID} as this instance (i.e., all
* instances that belong to the same Kafka Streams application)
* - and that contain a {@link StateStore} with the given {@code storeName}
*
* and return {@link StreamsMetadata} for each discovered instance.
*
* Note: this is a point in time view and it may change due to partition reassignment.
*
* @param storeName the {@code storeName} to find metadata for
* @return {@link StreamsMetadata} for each {@code KafkaStreams} instances with the provide {@code storeName} of
* this application
*/
public Collection streamsMetadataForStore(final String storeName) {
validateIsRunningOrRebalancing();
return streamsMetadataState.getAllMetadataForStore(storeName);
}
/**
* Finds the metadata containing the active hosts and standby hosts where the key being queried would reside.
*
* @param storeName the {@code storeName} to find metadata for
* @param key the key to find metadata for
* @param keySerializer serializer for the key
* @param key type
* Returns {@link KeyQueryMetadata} containing all metadata about hosting the given key for the given store,
* or {@code null} if no matching metadata could be found.
*/
public KeyQueryMetadata queryMetadataForKey(final String storeName,
final K key,
final Serializer keySerializer) {
validateIsRunningOrRebalancing();
return streamsMetadataState.getKeyQueryMetadataForKey(storeName, key, keySerializer);
}
/**
* Finds the metadata containing the active hosts and standby hosts where the key being queried would reside.
*
* @param storeName the {@code storeName} to find metadata for
* @param key the key to find metadata for
* @param partitioner the partitioner to be use to locate the host for the key
* @param key type
* Returns {@link KeyQueryMetadata} containing all metadata about hosting the given key for the given store, using the
* the supplied partitioner, or {@code null} if no matching metadata could be found.
*/
public KeyQueryMetadata queryMetadataForKey(final String storeName,
final K key,
final StreamPartitioner super K, ?> partitioner) {
validateIsRunningOrRebalancing();
return streamsMetadataState.getKeyQueryMetadataForKey(storeName, key, partitioner);
}
/**
* Get a facade wrapping the local {@link StateStore} instances with the provided {@link StoreQueryParameters}.
* The returned object can be used to query the {@link StateStore} instances.
*
* @param storeQueryParameters the parameters used to fetch a queryable store
* @return A facade wrapping the local {@link StateStore} instances
* @throws StreamsNotStartedException If Streams has not yet been started. Just call {@link KafkaStreams#start()}
* and then retry this call.
* @throws UnknownStateStoreException If the specified store name does not exist in the topology.
* @throws InvalidStateStorePartitionException If the specified partition does not exist.
* @throws InvalidStateStoreException If the Streams instance isn't in a queryable state.
* If the store's type does not match the QueryableStoreType,
* the Streams instance is not in a queryable state with respect
* to the parameters, or if the store is not available locally, then
* an InvalidStateStoreException is thrown upon store access.
*/
public T store(final StoreQueryParameters storeQueryParameters) {
validateIsRunningOrRebalancing();
final String storeName = storeQueryParameters.storeName();
if (!topologyMetadata.hasStore(storeName)) {
throw new UnknownStateStoreException(
"Cannot get state store " + storeName + " because no such store is registered in the topology."
);
}
return queryableStoreProvider.getStore(storeQueryParameters);
}
/**
* handle each stream thread in a snapshot of threads.
* noted: iteration over SynchronizedList is not thread safe so it must be manually synchronized. However, we may
* require other locks when looping threads and it could cause deadlock. Hence, we create a copy to avoid holding
* threads lock when looping threads.
* @param consumer handler
*/
protected int processStreamThread(final Consumer consumer) {
final List copy = new ArrayList<>(threads);
for (final StreamThread thread : copy) consumer.accept(thread);
return copy.size();
}
/**
* Returns runtime information about the local threads of this {@link KafkaStreams} instance.
*
* @return the set of {@link org.apache.kafka.streams.processor.ThreadMetadata}.
* @deprecated since 3.0 use {@link #metadataForLocalThreads()}
*/
@Deprecated
@SuppressWarnings("deprecation")
public Set localThreadsMetadata() {
return metadataForLocalThreads().stream().map(threadMetadata -> new org.apache.kafka.streams.processor.ThreadMetadata(
threadMetadata.threadName(),
threadMetadata.threadState(),
threadMetadata.consumerClientId(),
threadMetadata.restoreConsumerClientId(),
threadMetadata.producerClientIds(),
threadMetadata.adminClientId(),
threadMetadata.activeTasks().stream().map(taskMetadata -> new org.apache.kafka.streams.processor.TaskMetadata(
taskMetadata.taskId().toString(),
taskMetadata.topicPartitions(),
taskMetadata.committedOffsets(),
taskMetadata.endOffsets(),
taskMetadata.timeCurrentIdlingStarted())
).collect(Collectors.toSet()),
threadMetadata.standbyTasks().stream().map(taskMetadata -> new org.apache.kafka.streams.processor.TaskMetadata(
taskMetadata.taskId().toString(),
taskMetadata.topicPartitions(),
taskMetadata.committedOffsets(),
taskMetadata.endOffsets(),
taskMetadata.timeCurrentIdlingStarted())
).collect(Collectors.toSet())))
.collect(Collectors.toSet());
}
/**
* Returns runtime information about the local threads of this {@link KafkaStreams} instance.
*
* @return the set of {@link ThreadMetadata}.
*/
public Set metadataForLocalThreads() {
final Set threadMetadata = new HashSet<>();
processStreamThread(thread -> {
synchronized (thread.getStateLock()) {
if (thread.state() != StreamThread.State.DEAD) {
threadMetadata.add(thread.threadMetadata());
}
}
});
return threadMetadata;
}
/**
* Returns {@link LagInfo}, for all store partitions (active or standby) local to this Streams instance. Note that the
* values returned are just estimates and meant to be used for making soft decisions on whether the data in the store
* partition is fresh enough for querying.
*
* Note: Each invocation of this method issues a call to the Kafka brokers. Thus its advisable to limit the frequency
* of invocation to once every few seconds.
*
* @return map of store names to another map of partition to {@link LagInfo}s
* @throws StreamsException if the admin client request throws exception
*/
public Map> allLocalStorePartitionLags() {
final List allTasks = new ArrayList<>();
processStreamThread(thread -> allTasks.addAll(thread.allTasks().values()));
return allLocalStorePartitionLags(allTasks);
}
protected Map> allLocalStorePartitionLags(final List tasksToCollectLagFor) {
final Map> localStorePartitionLags = new TreeMap<>();
final Collection allPartitions = new LinkedList<>();
final Map allChangelogPositions = new HashMap<>();
// Obtain the current positions, of all the active-restoring and standby tasks
for (final Task task : tasksToCollectLagFor) {
allPartitions.addAll(task.changelogPartitions());
// Note that not all changelog partitions, will have positions; since some may not have started
allChangelogPositions.putAll(task.changelogOffsets());
}
log.debug("Current changelog positions: {}", allChangelogPositions);
final Map allEndOffsets;
allEndOffsets = fetchEndOffsets(allPartitions, adminClient);
log.debug("Current end offsets :{}", allEndOffsets);
for (final Map.Entry entry : allEndOffsets.entrySet()) {
// Avoiding an extra admin API lookup by computing lags for not-yet-started restorations
// from zero instead of the real "earliest offset" for the changelog.
// This will yield the correct relative order of lagginess for the tasks in the cluster,
// but it is an over-estimate of how much work remains to restore the task from scratch.
final long earliestOffset = 0L;
final long changelogPosition = allChangelogPositions.getOrDefault(entry.getKey(), earliestOffset);
final long latestOffset = entry.getValue().offset();
final LagInfo lagInfo = new LagInfo(changelogPosition == Task.LATEST_OFFSET ? latestOffset : changelogPosition, latestOffset);
final String storeName = streamsMetadataState.getStoreForChangelogTopic(entry.getKey().topic());
localStorePartitionLags.computeIfAbsent(storeName, ignored -> new TreeMap<>())
.put(entry.getKey().partition(), lagInfo);
}
return Collections.unmodifiableMap(localStorePartitionLags);
}
/**
* Run an interactive query against a state store.
*
* This method allows callers outside of the Streams runtime to access the internal state of
* stateful processors. See https://kafka.apache.org/documentation/streams/developer-guide/interactive-queries.html
* for more information.
*
* NOTICE: This functionality is {@link Evolving} and subject to change in minor versions.
* Once it is stabilized, this notice and the evolving annotation will be removed.
*
* @param The result type specified by the query.
* @throws StreamsNotStartedException If Streams has not yet been started. Just call {@link
* KafkaStreams#start()} and then retry this call.
* @throws StreamsStoppedException If Streams is in a terminal state like PENDING_SHUTDOWN,
* NOT_RUNNING, PENDING_ERROR, or ERROR. The caller should
* discover a new instance to query.
* @throws UnknownStateStoreException If the specified store name does not exist in the
* topology.
*/
@Evolving
public StateQueryResult query(final StateQueryRequest request) {
final String storeName = request.getStoreName();
if (!topologyMetadata.hasStore(storeName)) {
throw new UnknownStateStoreException(
"Cannot get state store "
+ storeName
+ " because no such store is registered in the topology."
);
}
if (state().hasNotStarted()) {
throw new StreamsNotStartedException(
"KafkaStreams has not been started, you can retry after calling start()."
);
}
if (state().isShuttingDown() || state.hasCompletedShutdown()) {
throw new StreamsStoppedException(
"KafkaStreams has been stopped (" + state + ")."
+ " This instance can no longer serve queries."
);
}
final StateQueryResult result = new StateQueryResult<>();
final Map globalStateStores = topologyMetadata.globalStateStores();
if (globalStateStores.containsKey(storeName)) {
// See KAFKA-13523
result.setGlobalResult(
QueryResult.forFailure(
FailureReason.UNKNOWN_QUERY_TYPE,
"Global stores do not yet support the KafkaStreams#query API. Use KafkaStreams#store instead."
)
);
} else {
for (final StreamThread thread : threads) {
final Map tasks = thread.allTasks();
for (final Entry entry : tasks.entrySet()) {
final TaskId taskId = entry.getKey();
final int partition = taskId.partition();
if (request.isAllPartitions()
|| request.getPartitions().contains(partition)) {
final Task task = entry.getValue();
final StateStore store = task.getStore(storeName);
if (store != null) {
final StreamThread.State state = thread.state();
final boolean active = task.isActive();
if (request.isRequireActive()
&& (state != StreamThread.State.RUNNING
|| !active)) {
result.addResult(
partition,
QueryResult.forFailure(
FailureReason.NOT_ACTIVE,
"Query requires a running active task,"
+ " but partition was in state "
+ state + " and was "
+ (active ? "active" : "not active") + "."
)
);
} else {
final QueryResult r = store.query(
request.getQuery(),
request.isRequireActive()
? PositionBound.unbounded()
: request.getPositionBound(),
new QueryConfig(request.executionInfoEnabled())
);
result.addResult(partition, r);
}
// optimization: if we have handled all the requested partitions,
// we can return right away.
if (!request.isAllPartitions()
&& result.getPartitionResults().keySet().containsAll(request.getPartitions())) {
return result;
}
}
}
}
}
}
if (!request.isAllPartitions()) {
for (final Integer partition : request.getPartitions()) {
if (!result.getPartitionResults().containsKey(partition)) {
result.addResult(partition, QueryResult.forFailure(
FailureReason.NOT_PRESENT,
"The requested partition was not present at the time of the query."
));
}
}
}
return result;
}
}