All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.homeaway.streamplatform.streamregistry.provider.impl.KafkaInfraManager Maven / Gradle / Ivy

/* Copyright (c) 2018-Present Expedia Group.
 * All rights reserved.  http://www.homeaway.com

 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at

 *      http://www.apache.org/licenses/LICENSE-2.0

 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.homeaway.streamplatform.streamregistry.provider.impl;

import static io.confluent.kafka.serializers.AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG;

import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import java.util.stream.Collectors;

import kafka.admin.AdminUtils;
import kafka.admin.RackAwareMode;
import kafka.server.ConfigType;
import kafka.utils.ZKStringSerializer$;
import kafka.utils.ZkUtils;
import lombok.extern.slf4j.Slf4j;

import io.confluent.kafka.serializers.KafkaAvroSerializer;
import io.confluent.kafka.streams.serdes.avro.SpecificAvroSerde;

import org.I0Itec.zkclient.ZkClient;
import org.I0Itec.zkclient.ZkConnection;
import org.apache.commons.lang3.Validate;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.kstream.GlobalKTable;
import org.apache.kafka.streams.kstream.Materialized;
import org.apache.kafka.streams.state.KeyValueIterator;
import org.apache.kafka.streams.state.KeyValueStore;
import org.apache.kafka.streams.state.QueryableStoreTypes;
import org.apache.kafka.streams.state.ReadOnlyKeyValueStore;

import com.homeaway.digitalplatform.streamregistry.ClusterKey;
import com.homeaway.digitalplatform.streamregistry.ClusterValue;
import com.homeaway.streamplatform.streamregistry.configuration.KafkaProducerConfig;
import com.homeaway.streamplatform.streamregistry.exceptions.StreamCreationException;
import com.homeaway.streamplatform.streamregistry.provider.InfraManager;


/**
 * An {@link InfraManager} implementation backed by a Kafka Streams {@link GlobalKTable}.
 */
@SuppressWarnings("WeakerAccess")
@Slf4j
public class KafkaInfraManager implements InfraManager {

    public static final String INFRAMANAGER_TOPIC = "infraManagerTopic";
    public static final String INFRAMANAGER_STATE_STORE = "infraManagerStateStoreName";
    public static final String INFRA_KSTREAM_PROPS = "infraKStreamsProperties";

    private String infraStateStoreName;
    private KafkaStreams infraKStreams;
    private KafkaProducer infraProducer;
    private ReadOnlyKeyValueStore store;
    private String infraManagerTopic;

    private static Map TOPIC_CONFIG_KEY_FILTER = new HashMap() {
        private static final long serialVersionUID = -7377105429359314831L; {

            put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, true);
            put(KafkaProducerConfig.ZOOKEEPER_QUORUM, true);
        }};

    @SuppressWarnings("unchecked")
    @Override
    public void configure(Map configs) {
        // Get the infra manager topic name
        Validate.validState(configs.containsKey(INFRAMANAGER_TOPIC), "Infra Manager Topic name is not provided.");
        infraManagerTopic = configs.get(INFRAMANAGER_TOPIC).toString();
        log.info("Infra Manager Topic Name Read: {}", infraManagerTopic);

        // Get the infra state store name
        Validate.validState(configs.containsKey(INFRAMANAGER_STATE_STORE), "Infra Manager State Store name is not provided.");
        infraStateStoreName = configs.get(INFRAMANAGER_STATE_STORE).toString();
        log.info("Infra Manager State Store Name Read: {}", infraStateStoreName);

        // Populate our kstreams properties map
        Properties infraKStreamsProperties = new Properties();
        Validate.validState(configs.containsKey(INFRA_KSTREAM_PROPS), "InfraKStreams properties is not provided.");
        Map infraKStreamsPropertiesMap = (Map) configs.get(INFRA_KSTREAM_PROPS);
        infraKStreamsPropertiesMap.forEach(infraKStreamsProperties::put);
        infraKStreamsProperties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class);
        infraKStreamsProperties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaAvroSerializer.class);
        log.info("Infra KStreams Properties: {}", infraKStreamsProperties);

        Validate.validState(configs.containsKey(SCHEMA_REGISTRY_URL_CONFIG), SCHEMA_REGISTRY_URL_CONFIG+" is not provided.");
        String schemaRegistryUrl = (String) configs.get(SCHEMA_REGISTRY_URL_CONFIG);

        // Create a Infra Producer
        infraProducer = new KafkaProducer<>(infraKStreamsProperties);

        // initialize the kstreams processor
        StreamsBuilder infraKStreamBuilder = new StreamsBuilder();

        infraKStreamBuilder.globalTable(infraManagerTopic, createMaterialized(schemaRegistryUrl));
        infraKStreams = new KafkaStreams(infraKStreamBuilder.build(), infraKStreamsProperties);
    }

    @Override
    public void start() {
        infraKStreams.start();
        log.info("Infrastructure Manager KStream is started");
        log.info("Infra Manager State Store Name: {}", infraStateStoreName);
        store = infraKStreams.store(infraStateStoreName, QueryableStoreTypes.keyValueStore());
    }

    @Override
    public void stop() {
        infraKStreams.close();
        log.info("Infrastructure Manager KStream is stopped");
    }

    @Override
    public Map getAllClusters() {
        Map clusterKeyValueMap = new HashMap<>();
        try (KeyValueIterator clusterKeyValueIterator = store.all()) {
            log.debug("Approximate Num. of Entries in Infra Table-{}", store.approximateNumEntries());

            while (clusterKeyValueIterator.hasNext()) {
                KeyValue next = clusterKeyValueIterator.next();
                clusterKeyValueMap.put(next.key, next.value);
            }
        } catch (Exception e) {
            throw new IllegalStateException("Infra Manager State Store not initialized ", e);
        }
        return clusterKeyValueMap;
    }

    @Override
    public Optional getClusterByKey(ClusterKey clusterKey) {
        ClusterValue clusterValue;
        Validate.validState(store != null, "Infra Manager should be configured");

        try {
            log.debug("Approximate Num. of Entries in Infra Table-{}", store.approximateNumEntries());
            clusterValue = store.get(clusterKey);
        } catch (Exception e) {
            throw new IllegalStateException("Error while retrieving the cluster Value using cluster key:" + clusterKey, e);
        }

        if (clusterValue == null) {
            log.error("Cluster Not Found, key: {}", clusterKey);
        } else {
            log.info("Cluster Name - {}", clusterValue.getClusterProperties());
        }

        return Optional.ofNullable(clusterValue);
    }

    @Override
    public void upsertCluster(ClusterKey clusterKey, ClusterValue clusterValue) {
        try {
            infraProducer.send(new ProducerRecord<>(infraManagerTopic, clusterKey, clusterValue));
        } catch (Exception e) {
            log.error("Error producing to topic={}", infraManagerTopic, e);
        }
    }

    @Override
    public void upsertTopics(Collection topics, int partitions, int replicationFactor, Properties properties, boolean isNewStream)
        throws StreamCreationException {
        // TODO - Cannot guarantee against race conditions... should probably move to event-source paradigm to
        //      protect against this (and maybe employ optimistic locking for extra safety).
        //      The issue here is there is nothing that "locks" the underlying kafka store -- something
        //      can inevitably change the underlying store while this method is evaluating, always accounting for
        //      some amount of race window.

        // TODO probably need to cache a KafkaInfraManager per "cluster" to avoid un-necessary creation / destruction of connections (#115)
        ZkUtils zkUtils = initZkUtils(properties);
        try {
            // remove client connection properties to leave only topic configs
            Map topicConfigMap = filterPropertiesKeys(properties, TOPIC_CONFIG_KEY_FILTER);

            // partition the list by whether the topic exists or not
            Map> partitionMaps = topics.stream().collect(Collectors.partitioningBy(topic -> topicExists(zkUtils, topic)));

            // if it exists, update it.  If it doesn't exist, create it
            List topicsToUpdate = partitionMaps.get(true);
            List topicsToCreate = partitionMaps.get(false);

            // update any topics that are necessary
            updateTopics(zkUtils, topicsToUpdate, topicConfigMap, isNewStream);

            // now create any topics that were necessary to create this run
            createTopics(zkUtils, topicsToCreate, partitions, replicationFactor, topicConfigMap);
        } finally {
            shutdownZkUtils(zkUtils);
        }
    }

    private ZkUtils initZkUtils(Properties config) {
        String zkConnect = config.getProperty(KafkaProducerConfig.ZOOKEEPER_QUORUM);
        ZkClient zkClient = new ZkClient(zkConnect);
        zkClient.setZkSerializer(ZKStringSerializer$.MODULE$);
        ZkConnection zkConnection = new ZkConnection(zkConnect);
        return new ZkUtils(zkClient, zkConnection, false);
    }

    // package scope so that PowerMock can leverage
    Map filterPropertiesKeys(Properties properties, Map keyFilterMap) {
        return new HashMap<>(properties.stringPropertyNames().stream()
            .filter(key -> !keyFilterMap.containsKey(key))
            .filter(key -> properties.getProperty(key) != null)
            .collect(Collectors.toMap(key -> (String)key, properties::getProperty)));
    }

    private boolean topicExists(ZkUtils zkUtils, String topic) {
        boolean topicExists = AdminUtils.topicExists(zkUtils, topic);
        log.debug("topic: {} exists={}", topic, topicExists);
        return topicExists;
    }

    void updateTopics(ZkUtils zkUtils, List topicsToUpdate, Map topicConfigMap, boolean isStreamNotAvailableInStreamRegistryDB)
        throws StreamCreationException {
        for (String topic : topicsToUpdate) {
            // update topic
            Properties actualTopicConfig = getTopicConfig(zkUtils, topic);
            Map actualTopicConfigMap = propertiesToMap(actualTopicConfig);
            if (actualTopicConfigMap.equals(topicConfigMap)) {
                // NOTHING TO DO!
                log.info("topic config for {} exactly match. Ignoring.", topic);
                continue;
            }

            // NOTE: If a newly created stream is requested in Stream Registry but it is already present
            // in the underlying streaming infrastructure... AND we got this far, this means configuration
            // is different.  We want to prevent this from actually changing the underlying infrastructure.
            // Therefore the operation is failed with an exception.
            //
            // This provides a safety mechanism and a migration path by requiring folks
            // to exactly match downstream config when the stream-registry has not "onboarded" existing topic
            // for the first time.

            // TODO Alternatively we can add a forceSync=true flag, ignoring any user provided info, and only updating SR with the underlying settings
            //      We should probably do forceSync=true anyway, as it provides a simple way to keep things in sync (#114)
            if (isStreamNotAvailableInStreamRegistryDB) {
                throw new StreamCreationException(String.format("Error: Input configs=%s and actual configs=%s are not same for topic=%s",
                    topicConfigMap, actualTopicConfig, topic));
            }

            // If we got this far, we are "updating" an "existing" stream, and request config is different than
            // what is in stream registry. Go ahead and update now.
            updateTopic(zkUtils, topic, topicConfigMap);
        }
    }

    private Properties getTopicConfig(ZkUtils zkUtils, String topic) {
        return AdminUtils.fetchEntityConfig(zkUtils, ConfigType.Topic(), topic);
    }

    // package scope so that PowerMock can leverage
    Map propertiesToMap(Properties properties) {
        return properties.stringPropertyNames().stream()
            .filter(key -> properties.getProperty(key) != null)
            .collect(Collectors.toMap(key -> (String)key,
                properties::getProperty));
    }

    private void updateTopic(ZkUtils zkUtils, String topic, Map configMap) {
        Properties topicProperties = new Properties();
        topicProperties.putAll(configMap);
        AdminUtils.changeTopicConfig(zkUtils, topic, topicProperties);
        log.info("Kafka Topic={} config updated to {}", topic, topicProperties);
    }

    // package scope so that PowerMock can verify
    void createTopics(ZkUtils zkUtils, Collection topics, int partitions, int replicationFactor, Map topicConfigMap) {
        for(String topic : topics) {
            createTopic(zkUtils, topic, partitions, replicationFactor, topicConfigMap);
        }
    }

    private void createTopic(ZkUtils zkUtils, String topic, int partitions, int replicationFactor, Map topicConfigMap) {
        Properties topicProperties = new Properties();
        topicProperties.putAll(topicConfigMap);
        AdminUtils.createTopic(zkUtils, topic, partitions, replicationFactor, topicProperties, RackAwareMode.Enforced$.MODULE$);
        log.info("Kafka Topic={} created with partition={}, replicationFactor={}, config={}",
                topic, partitions, replicationFactor, topicConfigMap);
    }

    private void shutdownZkUtils(ZkUtils zkUtils) {
        try {
            zkUtils.close();
        } catch (RuntimeException exception) {
            log.error("Unexpected exception caught during zkUtils shutdown.", exception);
        }
    }

    private Materialized createMaterialized(String schemaRegistryUrl){
        final Map serdeConfig =
            Collections.singletonMap(SCHEMA_REGISTRY_URL_CONFIG,
                schemaRegistryUrl);

        final SpecificAvroSerde keySpecificAvroSerde = new SpecificAvroSerde<>();
        keySpecificAvroSerde.configure(serdeConfig, true);

        final SpecificAvroSerde valueSpecificAvroSerde = new SpecificAvroSerde<>();
        valueSpecificAvroSerde.configure(serdeConfig, false);

        return Materialized.>as(infraStateStoreName)
            .withKeySerde(keySpecificAvroSerde)
            .withValueSerde(valueSpecificAvroSerde);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy