All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kafka.streams.processor.internals.namedtopology.KafkaStreamsNamedTopologyWrapper Maven / Gradle / Ivy

There is a newer version: 3.8.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.streams.processor.internals.namedtopology;

import org.apache.kafka.clients.admin.DeleteConsumerGroupOffsetsResult;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.annotation.InterfaceStability.Unstable;
import org.apache.kafka.common.errors.GroupIdNotFoundException;
import org.apache.kafka.common.errors.GroupSubscribedToTopicException;
import org.apache.kafka.common.serialization.Serializer;
import org.apache.kafka.common.internals.KafkaFutureImpl;
import org.apache.kafka.common.utils.LogContext;
import org.apache.kafka.streams.KafkaClientSupplier;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyQueryMetadata;
import org.apache.kafka.streams.LagInfo;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.StreamsMetadata;
import org.apache.kafka.streams.TaskMetadata;
import org.apache.kafka.streams.errors.StreamsException;
import org.apache.kafka.streams.errors.TopologyException;
import org.apache.kafka.streams.errors.UnknownStateStoreException;
import org.apache.kafka.streams.errors.UnknownTopologyException;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.processor.TaskId;
import org.apache.kafka.streams.processor.internals.DefaultKafkaClientSupplier;
import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
import org.apache.kafka.streams.processor.internals.StreamThread;
import org.apache.kafka.streams.processor.internals.Task;
import org.apache.kafka.streams.processor.internals.TopologyMetadata;
import org.slf4j.Logger;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;

/**
 * This is currently an internal and experimental feature for enabling certain kinds of topology upgrades. Use at
 * your own risk.
 *
 * Status: additive upgrades possible, removal of NamedTopologies not yet supported
 *
 * Note: some standard features of Kafka Streams are not yet supported with NamedTopologies. These include:
 *       - global state stores
 *       - TopologyTestDriver (TTD)
 */
@Unstable
public class KafkaStreamsNamedTopologyWrapper extends KafkaStreams {

    private final Logger log;

    /**
     * An empty Kafka Streams application that allows NamedTopologies to be added at a later point
     */
    public KafkaStreamsNamedTopologyWrapper(final Properties props) {
        this(new StreamsConfig(props), new DefaultKafkaClientSupplier());
    }

    public KafkaStreamsNamedTopologyWrapper(final Properties props, final KafkaClientSupplier clientSupplier) {
        this(new StreamsConfig(props), clientSupplier);
    }

    private KafkaStreamsNamedTopologyWrapper(final StreamsConfig config, final KafkaClientSupplier clientSupplier) {
        super(new TopologyMetadata(new ConcurrentSkipListMap<>(), config), config, clientSupplier);
        final LogContext logContext = new LogContext(String.format("stream-client [%s] ", clientId));
        this.log = logContext.logger(getClass());
    }

    /**
     * Start up Streams with a single initial NamedTopology
     */
    public void start(final NamedTopology initialTopology) {
        start(Collections.singleton(initialTopology));
    }

    /**
     * Start up Streams with a collection of initial NamedTopologies (may be empty)
     *
     * Note: this is synchronized to ensure that the application state cannot change while we add topologies
     */
    public synchronized void start(final Collection initialTopologies) {
        log.info("Starting Streams with topologies: {}", initialTopologies);
        for (final NamedTopology topology : initialTopologies) {
            final AddNamedTopologyResult addNamedTopologyResult = addNamedTopology(topology);
            if (addNamedTopologyResult.all().isCompletedExceptionally()) {
                final StreamsException e = addNamedTopologyResult.exceptionNow();
                log.error("Failed to start Streams when adding topology " + topology.name() + " due to", e);
                throw e;
            }
        }
        super.start();
    }

    /**
     * Provides a high-level DSL for specifying the processing logic of your application and building it into an
     * independent topology that can be executed by this {@link KafkaStreams}.
     *
     * @param topologyName              The name for this topology
     * @param topologyOverrides         The properties and any config overrides for this topology
     *
     * @throws IllegalArgumentException if the name contains the character sequence "__"
     */
    public NamedTopologyBuilder newNamedTopologyBuilder(final String topologyName, final Properties topologyOverrides) {
        if (topologyName.contains(TaskId.NAMED_TOPOLOGY_DELIMITER)) {
            throw new IllegalArgumentException("The character sequence '__' is not allowed in a NamedTopology, please select a new name");
        }
        return new NamedTopologyBuilder(topologyName, applicationConfigs, topologyOverrides);
    }

    /**
     * Provides a high-level DSL for specifying the processing logic of your application and building it into an
     * independent topology that can be executed by this {@link KafkaStreams}. This method will use the global
     * application {@link StreamsConfig} passed in to the constructor for all topology-level configs. To override
     * any of these for this specific Topology, use {@link #newNamedTopologyBuilder(String, Properties)}.
     * @param topologyName              The name for this topology
     *
     * @throws IllegalArgumentException if the name contains the character sequence "__"
     */
    public NamedTopologyBuilder newNamedTopologyBuilder(final String topologyName) {
        return newNamedTopologyBuilder(topologyName, new Properties());
    }

    /**
     * @return the NamedTopology for the specific name, or Optional.empty() if the application has no NamedTopology of that name
     */
    public synchronized Optional getTopologyByName(final String name) {
        return Optional.ofNullable(topologyMetadata.lookupBuilderForNamedTopology(name)).map(InternalTopologyBuilder::namedTopology);
    }

    public Collection getAllTopologies() {
        return topologyMetadata.getAllNamedTopologies();
    }

    /**
     * Add a new NamedTopology to a running Kafka Streams app. If multiple instances of the application are running,
     * you should inform all of them by calling {@code #addNamedTopology(NamedTopology)} on each client in order for
     * it to begin processing the new topology.
     * This method is not purely Async.
     *
     * May complete exceptionally (does not actually directly throw) with:
     *
     * @throws IllegalArgumentException if this topology name is already in use
     * @throws IllegalStateException    if streams has not been started or has already shut down
     * @throws TopologyException        if this topology subscribes to any input topics or pattern already in use
     */
    public AddNamedTopologyResult addNamedTopology(final NamedTopology newTopology) {
        log.info("Adding new NamedTopology: {}", newTopology.name());
        final KafkaFutureImpl future = new KafkaFutureImpl<>();

        if (hasStartedOrFinishedShuttingDown()) {
            future.completeExceptionally(
                new IllegalStateException("Cannot add a NamedTopology while the state is " + super.state)
            );
        } else if (getTopologyByName(newTopology.name()).isPresent()) {
            future.completeExceptionally(
                new IllegalArgumentException("Unable to add the new NamedTopology " + newTopology.name() +
                                                   " as another of the same name already exists")
            );
        } else {
            topologyMetadata.registerAndBuildNewTopology(future, newTopology.internalTopologyBuilder());
            maybeCompleteFutureIfStillInCREATED(future, "adding topology " + newTopology.name());
        }

        return new AddNamedTopologyResult(future);
    }

    /**
     * Remove an existing NamedTopology from a running Kafka Streams app. If multiple instances of the application are
     * running, you should inform all of them by calling {@code #removeNamedTopology(String)} on each client to ensure
     * it stops processing the old topology.
     * This method is not purely Async.
     *
     * @param topologyToRemove          name of the topology to be removed
     * @param resetOffsets              whether to reset the committed offsets for any source topics
     *
     * May complete exceptionally (does not actually directly throw) with:
     *
     * @throws IllegalArgumentException if this topology name cannot be found
     * @throws IllegalStateException    if streams has not been started or has already shut down
     */
    public RemoveNamedTopologyResult removeNamedTopology(final String topologyToRemove, final boolean resetOffsets) {
        log.info("Informed to remove topology {} with resetOffsets={} ", topologyToRemove, resetOffsets);

        final KafkaFutureImpl removeTopologyFuture = new KafkaFutureImpl<>();

        if (hasStartedOrFinishedShuttingDown()) {
            log.error("Attempted to remove topology {} from while the Kafka Streams was in state {}, "
                          + "topologies cannot be modified if the application has begun or completed shutting down.",
                      topologyToRemove, state
            );
            removeTopologyFuture.completeExceptionally(
                new IllegalStateException("Cannot remove a NamedTopology while the state is " + super.state)
            );
        } else if (!getTopologyByName(topologyToRemove).isPresent()) {
            log.error("Attempted to remove unknown topology {}. This application currently contains the"
                          + "following topologies: {}.", topologyToRemove, topologyMetadata.namedTopologiesView()
            );
            removeTopologyFuture.completeExceptionally(
                new UnknownTopologyException("Unable to remove topology", topologyToRemove)
            );
        }

        final Set partitionsToReset = metadataForLocalThreads()
            .stream()
            .flatMap(t -> {
                final Set tasks = new HashSet<>(t.activeTasks());
                return tasks.stream();
            })
            .flatMap(t -> t.topicPartitions().stream())
            .filter(t -> topologyMetadata.sourceTopicsForTopology(topologyToRemove).contains(t.topic()))
            .collect(Collectors.toSet());

        topologyMetadata.unregisterTopology(removeTopologyFuture, topologyToRemove);

        final boolean skipResetForUnstartedApplication =
            maybeCompleteFutureIfStillInCREATED(removeTopologyFuture, "removing topology " + topologyToRemove);

        if (resetOffsets && !skipResetForUnstartedApplication && !partitionsToReset.isEmpty()) {
            log.info("Resetting offsets for the following partitions of {} removed NamedTopology {}: {}",
                     removeTopologyFuture.isCompletedExceptionally() ? "unsuccessfully" : "successfully",
                     topologyToRemove, partitionsToReset
            );
            return new RemoveNamedTopologyResult(
                removeTopologyFuture,
                topologyToRemove,
                () -> resetOffsets(partitionsToReset)
            );
        } else {
            return new RemoveNamedTopologyResult(removeTopologyFuture);
        }
    }

    /**
     * Pauses a topology by name
     * @param topologyName Name of the topology to pause
     */
    public void pauseNamedTopology(final String topologyName) {
        topologyMetadata.pauseTopology(topologyName);
    }

    /**
     * Checks if a given topology is paused.
     * @param topologyName If null, assume that we are checking the `UNNAMED_TOPOLOGY`.
     * @return A boolean indicating if the topology is paused.
     */
    public boolean isNamedTopologyPaused(final String topologyName) {
        return topologyMetadata.isPaused(topologyName);
    }

    /**
     * Resumes a topology by name
     * @param topologyName Name of the topology to resume
     */
    public void resumeNamedTopology(final String topologyName) {
        topologyMetadata.resumeTopology(topologyName);
        threads.forEach(StreamThread::signalResume);
    }

    /**
     * @return  true iff the application is still in CREATED and the future was completed
     */
    private boolean maybeCompleteFutureIfStillInCREATED(final KafkaFutureImpl updateTopologyFuture,
                                                        final String operation) {
        if (state == State.CREATED && !updateTopologyFuture.isDone()) {
            updateTopologyFuture.complete(null);
            log.info("Completed {} since application has not been started", operation);
            return true;
        } else {
            return false;
        }
    }

    private void resetOffsets(final Set partitionsToReset) throws StreamsException {
        // The number of times to retry upon failure
        int retries = 100;
        while (true) {
            try {
                final DeleteConsumerGroupOffsetsResult deleteOffsetsResult = adminClient.deleteConsumerGroupOffsets(
                    applicationConfigs.getString(StreamsConfig.APPLICATION_ID_CONFIG),
                    partitionsToReset);
                deleteOffsetsResult.all().get();
                log.info("Successfully completed resetting offsets.");
                break;
            } catch (final InterruptedException ex) {
                ex.printStackTrace();
                log.error("Offset reset failed.", ex);
                throw new StreamsException(ex);
            } catch (final ExecutionException ex) {
                final Throwable error = ex.getCause() != null ? ex.getCause() : ex;

                if (error instanceof GroupSubscribedToTopicException &&
                    error.getMessage()
                        .equals("Deleting offsets of a topic is forbidden while the consumer group is actively subscribed to it.")) {
                    log.debug("Offset reset failed, there may be other nodes which have not yet finished removing this topology", error);
                } else if (error instanceof GroupIdNotFoundException) {
                    log.info("The offsets have been reset by another client or the group has been deleted, no need to retry further.");
                    break;
                } else {
                    if (--retries > 0) {
                        log.error("Offset reset failed, retries remaining: " + retries, error);
                    } else {
                        log.error("Offset reset failed, no retries remaining.", error);
                        throw new StreamsException(error);
                    }
                }
            }
            try {
                Thread.sleep(100);
            } catch (final InterruptedException ex) {
                ex.printStackTrace();
            }
        }
    }

    /**
     * Remove an existing NamedTopology from a running Kafka Streams app. If multiple instances of the application are
     * running, you should inform all of them by calling {@code #removeNamedTopology(String)} on each client to ensure
     * it stops processing the old topology.
     * This method is not purely Async.
     *
     * @param topologyToRemove          name of the topology to be removed
     *
     * @throws IllegalArgumentException if this topology name cannot be found
     * @throws IllegalStateException    if streams has not been started or has already shut down
     * @throws TopologyException        if this topology subscribes to any input topics or pattern already in use
     */
    public RemoveNamedTopologyResult removeNamedTopology(final String topologyToRemove) {
        return removeNamedTopology(topologyToRemove, false);
    }

    /**
     * Do a clean up of the local state directory for this NamedTopology by deleting all data with regard to the
     * @link StreamsConfig#APPLICATION_ID_CONFIG application ID} in the ({@link StreamsConfig#STATE_DIR_CONFIG})
     * 

* May be called while the Streams is in any state, but only on a {@link NamedTopology} that has already been * removed via {@link #removeNamedTopology(String)}. *

* Calling this method triggers a restore of local {@link StateStore}s for this {@link NamedTopology} if it is * ever re-added via {@link #addNamedTopology(NamedTopology)}. * * @throws IllegalStateException if this {@code NamedTopology} hasn't been removed * @throws StreamsException if cleanup failed */ public void cleanUpNamedTopology(final String name) { if (getTopologyByName(name).isPresent()) { throw new IllegalStateException("Can't clean up local state for an active NamedTopology: " + name); } stateDirectory.clearLocalStateForNamedTopology(name); } public String getFullTopologyDescription() { return topologyMetadata.topologyDescriptionString(); } private void verifyTopologyStateStore(final String topologyName, final String storeName) { final InternalTopologyBuilder builder = topologyMetadata.lookupBuilderForNamedTopology(topologyName); if (builder == null) { throw new UnknownTopologyException("Cannot get state store " + storeName, topologyName); } else if (!builder.hasStore(storeName)) { throw new UnknownStateStoreException( "Cannot get state store " + storeName + " from NamedTopology " + topologyName + " because no such state store exists in this topology." ); } } /** * See {@link KafkaStreams#store(StoreQueryParameters)} */ public T store(final NamedTopologyStoreQueryParameters storeQueryParameters) { final String topologyName = storeQueryParameters.topologyName; final String storeName = storeQueryParameters.storeName(); verifyTopologyStateStore(topologyName, storeName); return super.store(storeQueryParameters); } /** * See {@link KafkaStreams#streamsMetadataForStore(String)} */ public Collection streamsMetadataForStore(final String storeName, final String topologyName) { verifyTopologyStateStore(topologyName, storeName); validateIsRunningOrRebalancing(); return streamsMetadataState.getAllMetadataForStore(storeName, topologyName); } /** * See {@link KafkaStreams#metadataForAllStreamsClients()} */ public Collection allStreamsClientsMetadataForTopology(final String topologyName) { validateIsRunningOrRebalancing(); return streamsMetadataState.getAllMetadataForTopology(topologyName); } /** * See {@link KafkaStreams#queryMetadataForKey(String, Object, Serializer)} */ public KeyQueryMetadata queryMetadataForKey(final String storeName, final K key, final Serializer keySerializer, final String topologyName) { verifyTopologyStateStore(topologyName, storeName); validateIsRunningOrRebalancing(); return streamsMetadataState.getKeyQueryMetadataForKey(storeName, key, keySerializer, topologyName); } /** * See {@link KafkaStreams#allLocalStorePartitionLags()} */ public Map> allLocalStorePartitionLagsForTopology(final String topologyName) { if (!getTopologyByName(topologyName).isPresent()) { log.error("Can't get local store partition lags since topology {} does not exist in this application", topologyName); throw new UnknownTopologyException("Can't get local store partition lags", topologyName); } final List allTopologyTasks = new ArrayList<>(); processStreamThread(thread -> allTopologyTasks.addAll( thread.readyOnlyAllTasks().stream() .filter(t -> topologyName.equals(t.id().topologyName())) .collect(Collectors.toList()))); return allLocalStorePartitionLags(allTopologyTasks); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy