All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.telicent.smart.cache.sources.kafka.TopicExistenceChecker Maven / Gradle / Ivy

There is a newer version: 0.24.1
Show newest version
/**
 * Copyright (C) Telicent Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.telicent.smart.cache.sources.kafka;

import io.telicent.smart.cache.sources.EventSourceException;
import org.apache.commons.lang3.StringUtils;
import org.apache.kafka.clients.admin.AdminClient;
import org.apache.kafka.clients.admin.DescribeTopicsResult;
import org.apache.kafka.clients.admin.TopicDescription;
import org.apache.kafka.common.errors.AuthenticationException;
import org.apache.kafka.common.errors.AuthorizationException;
import org.apache.kafka.common.errors.UnknownTopicOrPartitionException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.time.Duration;
import java.util.*;
import java.util.concurrent.*;

/**
 * A helper utility that abstracts the checking of topic existence (and caching the results of those checks).  The
 * existence checks run on background threads using an {@link AdminClient}.  Once a topic is determined to exist then
 * that result is cached for the lifetime of this instance. This allows the instance to quickly answer whether any of
 * its configured topics exist, while also continuing to check and see if those known to be non-existent are created
 * later.
 * 

* Primarily this is used inside {@link KafkaEventSource} as a sanity check to avoid unnecessarily polling the broker if * none of the topics exist. This is done because when no topics exist Kafka's logging is very noisy and can * effectively flood the logs making seeing anything useful in them difficult. *

*/ public class TopicExistenceChecker { private final AdminClient adminClient; private final Logger logger; private final String server; private final Set topics; private final Map topicExists = new ConcurrentHashMap<>(); private final ExecutorService service; private final Map> inFlightChecks = new HashMap<>(); private boolean closed = false; /** * Creates a new topic existence checker * * @param adminClient Kafka Admin Client, if none supplied then we won't be able to check topic existence but in * some scenarios (mainly testing) we know this won't be supplied so that's considered * acceptable * @param server The Kafka bootstrap server(s) * @param topics Topic(s) to check * @param logger Logger to write any log messages to, if not supplied then a logger is created using this class * as its name */ public TopicExistenceChecker(AdminClient adminClient, String server, Collection topics, Logger logger) { this.adminClient = adminClient; this.logger = logger != null ? logger : LoggerFactory.getLogger(TopicExistenceChecker.class); Objects.requireNonNull(topics, "Topics to check cannot be null"); if (StringUtils.isBlank(server)) { throw new IllegalArgumentException("server cannot be null/blank"); } this.server = server; this.topics = new LinkedHashSet<>(topics); this.service = Executors.newFixedThreadPool(this.topics.size()); } /** * Do all the configured topics exist? * * @param timeout Timeout * @return True if all topics exist, false otherwise */ public final boolean allTopicsExist(Duration timeout) { boolean any = anyTopicExists(timeout); if (!any) { return false; } return allTopicsKnownToExist(); } /** * Do any of the configured topics exist? *

* If this is the first time this method has been called on this instance then it will wait up to the supplied * timeout trying to determine if at least one of the configured topics exists. As soon as one topic is determined * to exist then it will return {@code true}. Only if none of the topics are found to exist will it return * {@code false}. *

*

* However, on any subsequent run if at least one topic is known to exist then it will return {@code true} ASAP. *

* * @param timeout Timeout * @return True if any of the configured topics exist */ public final boolean anyTopicExists(Duration timeout) { if (this.closed) { return anyTopicKnownToExist(); } resolveInFlightChecks(); launchChecks(timeout); if (anyTopicKnownToExist()) { return true; } return waitForChecks(timeout); } /** * Quickly checks whether any of our configured topics are already known to exist based on our cached results * * @return True if known to exist, false otherwise */ private boolean anyTopicKnownToExist() { return this.topicExists.values().stream().anyMatch(exists -> exists); } /** * Quickly checks whether all of our configured topics are already known to exist based on our cached results * * @return True if all known to exist, false otherwise */ private boolean allTopicsKnownToExist() { return this.topicExists.size() == this.topics.size() && this.topicExists.values() .stream() .allMatch(exists -> exists); } /** * Resolves any in-flight checks and removes them from the in-flight registry *

* NB: We don't update our {@link #topicExists} map here because the checks do this directly, see * the {@link #doesTopicExist(String, Duration)} method. *

*/ protected final void resolveInFlightChecks() { List>> inFlight = new ArrayList<>(this.inFlightChecks.entrySet()); for (Map.Entry> check : inFlight) { if (check.getValue().isDone()) { this.inFlightChecks.remove(check.getKey()); } } } /** * Waits for any in-flight topic existence checks to complete, returning {@code true} as soon as any topic is known * to exist or {@code false} as soon as all checks complete unsuccessfully. * * @param timeout Timeout * @return True if any topic is known to exist, false otherwise */ protected final boolean waitForChecks(Duration timeout) { long maxWaitMs = timeout.toMillis(); long start = System.currentTimeMillis(); while (true) { long elapsed = System.currentTimeMillis() - start; if (elapsed >= maxWaitMs) { // If timeout elapsed then exit return anyTopicKnownToExist(); } // If all checks are now finished exit if (this.inFlightChecks.isEmpty()) { return anyTopicKnownToExist(); } List>> checks = new ArrayList<>(this.inFlightChecks.entrySet()); for (Map.Entry> check : checks) { if (check.getValue().isDone()) { // Regardless of its success/failure if the check is finished we only need to resolve it once this.inFlightChecks.remove(check.getKey()); try { if (check.getValue().get()) { // A check succeeded, so we know at least one topic exists, outstanding checks will be // resolved later return true; } } catch (Throwable e) { // This check failed in some way, ignore it, we'll re-issue it next time around } } } try { Thread.sleep(100); } catch (InterruptedException e) { // Ignore } } } /** * Launches the topic existence checks *

* Checks are only launched for topics that are not yet known to exist. If a topic is already known to exist we * don't launch any further checks for it. *

* * @param timeout Timeout */ protected final void launchChecks(Duration timeout) { for (String topic : this.topics) { // Only launch if we've not yet checked for this topic, or we've checked but it was previously known to not // exist if (!this.topicExists.containsKey(topic) || (this.topicExists.containsKey(topic) && !this.topicExists.get( topic))) { if (this.inFlightChecks.containsKey(topic)) { // Possibly could already be an in-flight check for this topic. This is most likely to occur if // we get asked to make a check with a long timeout and then later with a short timeout before the // original timeout has expired. Regardless of how it happens we don't need to launch another one, // rather we'll wait for the existing one to complete. As realistically if a check is still // in-flight it means that topic really doesn't exist at the moment and starting a new shorter time // out check in the meantime isn't going to change that. continue; } this.inFlightChecks.put(topic, this.service.submit(() -> doesTopicExist(topic, timeout))); } } } /** * The actual check whether a given topic actually exists within the Kafka cluster. This gets run in the background * via our executor {@link #service} and are kicked off by {@link #launchChecks(Duration)}. *

* A positive result is cached, and once that is {@code true} calling this method for that topic is effectively a * no-op and does not involve any further communication with Kafka. We also won't bother launching future checks * for topics we know to exist. *

*

* This method blocks up to the given timeout if the topic does not exist to wait to see if something outside our * control is going to create the topic. Each time it gets a negative result from the Kafka cluster it sleeps a * brief amount of time and then asks the cluster again. As soon as it gets a positive result it will return that. *

*

* The only exception to this behaviour is that the check will fail fast if it receives a security error from Kafka * indicating we aren't able to make that check, or are talking to a secured Kafka cluster without sufficient * credentials. *

*

* The check will actively update the shared {@link #topicExists} cache with the results of the checks as they * proceed. *

* * @param topic Specific topic to check * @param timeout Timeout for the check * @return True if the topic exists, false otherwise */ protected final boolean doesTopicExist(String topic, Duration timeout) { if (this.topicExists.containsKey(topic) && this.topicExists.get(topic).booleanValue()) { return true; } boolean firstCheck = !this.topicExists.containsKey(topic); // If we don't have an admin client can't make this check if (this.adminClient == null) { if (firstCheck) { logger.debug( "Unable to perform a topic existence check as this event source instance does not have a Kafka AdminClient available"); this.topicExists.put(topic, true); } return true; } // Validate that the topic to be read exists // This check is added per #134 in that attempting to actively poll() a missing topic floods the logs // because Kafka will return almost immediately and log a metadata error. In an application that loops // on calling poll() this results in flooding the logs so skip polling non-existent topics. This is also // effectively a DOS attack on the Kafka broker so should be avoided. // // Eventually if the topic does exist then we will start polling it normally. long remainingTimeout = timeout.toMillis(); while (remainingTimeout > 0) { long start = System.currentTimeMillis(); try { DescribeTopicsResult topics = this.adminClient.describeTopics(List.of(topic)); Map descriptions = topics.allTopicNames().get(remainingTimeout, TimeUnit.MILLISECONDS); this.topicExists.put(topic, descriptions.containsKey(topic)); break; } catch (UnknownTopicOrPartitionException e) { this.topicExists.put(topic, false); } catch (AuthenticationException | AuthorizationException e) { // Fail fast if this is a secure cluster and we aren't authenticated/authorized logger.error("Kafka Security Error: ", e); throw new EventSourceException("Kafka Security rejected the request", e); } catch (Throwable e) { // Ignore in this case, this might be a transient error, e.g. network interruption, in communicating // with Kafka. If this is a non-recoverable error we'll hit it again when we do our actual polling // later. this.topicExists.put(topic, false); } try { Thread.sleep(250); } catch (InterruptedException e) { // When interrupted break out of the loop break; } remainingTimeout -= (System.currentTimeMillis() - start); } if (firstCheck && !this.topicExists.get(topic)) { logger.warn("Kafka topic {} does not currently exist on the Kafka server {}", topic, this.server); } return this.topicExists.containsKey(topic) && this.topicExists.get(topic); } /** * Closes the checker, aborting any remaining in-flight checks */ public synchronized void close() { if (!this.closed) { this.service.shutdownNow(); try { this.service.awaitTermination(5, TimeUnit.SECONDS); } catch (InterruptedException e) { // Ignored, just trying to give the in-flight checks time to complete } if (this.adminClient != null) { this.adminClient.close(); } this.closed = true; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy