org.apache.kafka.connect.storage.KafkaConfigBackingStore Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of connect-runtime Show documentation
There is a newer version: 3.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.connect.storage;

import org.apache.kafka.clients.admin.NewTopic;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.common.serialization.ByteArrayDeserializer;
import org.apache.kafka.common.serialization.ByteArraySerializer;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.kafka.common.serialization.StringSerializer;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.SchemaAndValue;
import org.apache.kafka.connect.data.SchemaBuilder;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.errors.DataException;
import org.apache.kafka.connect.runtime.TargetState;
import org.apache.kafka.connect.runtime.WorkerConfig;
import org.apache.kafka.connect.runtime.WorkerConfigTransformer;
import org.apache.kafka.connect.runtime.distributed.ClusterConfigState;
import org.apache.kafka.connect.runtime.distributed.DistributedConfig;
import org.apache.kafka.connect.util.Callback;
import org.apache.kafka.connect.util.ConnectorTaskId;
import org.apache.kafka.connect.util.KafkaBasedLog;
import org.apache.kafka.connect.util.TopicAdmin;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

/**
 * 
 * Provides persistent storage of Kafka Connect connector configurations in a Kafka topic.
 * 
 * 
 * This class manages both connector and task configurations. It tracks three types of configuration entries:
 * 

 * 1. Connector config: map of string -> string configurations passed to the Connector class, with support for
 * expanding this format if necessary. (Kafka key: connector-[connector-id]).
 * These configs are *not* ephemeral. They represent the source of truth. If the entire Connect
 * cluster goes down, this is all that is really needed to recover.
 * 2. Task configs: map of string -> string configurations passed to the Task class, with support for expanding
 * this format if necessary. (Kafka key: task-[connector-id]-[task-id]).
 * These configs are ephemeral; they are stored here to a) disseminate them to all workers while
 * ensuring agreement and b) to allow faster cluster/worker recovery since the common case
 * of recovery (restoring a connector) will simply result in the same configuration as before
 * the failure.
 * 3. Task commit "configs": records indicating that previous task config entries should be committed and all task
 * configs for a connector can be applied. (Kafka key: commit-[connector-id].
 * This config has two effects. First, it records the number of tasks the connector is currently
 * running (and can therefore increase/decrease parallelism). Second, because each task config
 * is stored separately but they need to be applied together to ensure each partition is assigned
 * to a single task, this record also indicates that task configs for the specified connector
 * can be "applied" or "committed".
 * 
 * 
 * This configuration is expected to be stored in a *single partition* and *compacted* topic. Using a single partition
 * ensures we can enforce ordering on messages, allowing Kafka to be used as a write ahead log. Compaction allows
 * us to clean up outdated configurations over time. However, this combination has some important implications for
 * the implementation of this class and the configuration state that it may expose.
 * 
 * 
 * Connector configurations are independent of all other configs, so they are handled easily. Writing a single record
 * is already atomic, so these can be applied as soon as they are read. One connectors config does not affect any
 * others, and they do not need to coordinate with the connector's task configuration at all.
 * 
 * 
 * The most obvious implication for task configs is the need for the commit messages. Because Kafka does not
 * currently have multi-record transactions or support atomic batch record writes, task commit messages are required
 * to ensure that readers do not end up using inconsistent configs. For example, consider if a connector wrote configs
 * for its tasks, then was reconfigured and only managed to write updated configs for half its tasks. If task configs
 * were applied immediately you could be using half the old configs and half the new configs. In that condition, some
 * partitions may be double-assigned because the old config and new config may use completely different assignments.
 * Therefore, when reading the log, we must buffer config updates for a connector's tasks and only apply atomically them
 * once a commit message has been read.
 * 
 * 
 * However, there are also further challenges. This simple buffering approach would work fine as long as the entire log was
 * always available, but we would like to be able to enable compaction so our configuration topic does not grow
 * indefinitely. Compaction may break a normal log because old entries will suddenly go missing. A new worker reading
 * from the beginning of the log in order to build up the full current configuration will see task commits, but some
 * records required for those commits will have been removed because the same keys have subsequently been rewritten.
 * For example, if you have a sequence of record keys [connector-foo-config, task-foo-1-config, task-foo-2-config,
 * commit-foo (2 tasks), task-foo-1-config, commit-foo (1 task)], we can end up with a compacted log containing
 * [connector-foo-config, task-foo-2-config, commit-foo (2 tasks), task-foo-1-config, commit-foo (1 task)]. When read
 * back, the first commit will see an invalid state because the first task-foo-1-config has been cleaned up.
 * 
 * 
 * Compaction can further complicate things if writing new task configs fails mid-write. Consider a similar scenario
 * as the previous one, but in this case both the first and second update will write 2 task configs. However, the
 * second write fails half of the way through:
 * [connector-foo-config, task-foo-1-config, task-foo-2-config, commit-foo (2 tasks), task-foo-1-config]. Now compaction
 * occurs and we're left with
 * [connector-foo-config, task-foo-2-config, commit-foo (2 tasks), task-foo-1-config]. At the first commit, we don't
 * have a complete set of configs. And because of the failure, there is no second commit. We are left in an inconsistent
 * state with no obvious way to resolve the issue -- we can try to keep on reading, but the failed node may never
 * recover and write the updated config. Meanwhile, other workers may have seen the entire log; they will see the second
 * task-foo-1-config waiting to be applied, but will otherwise think everything is ok -- they have a valid set of task
 * configs for connector "foo".
 * 
 * 
 * Because we can encounter these inconsistencies and addressing them requires support from the rest of the system
 * (resolving the task configuration inconsistencies requires support from the connector instance to regenerate updated
 * configs), this class exposes not only the current set of configs, but also which connectors have inconsistent data.
 * This allows users of this class (i.e., Herder implementations) to take action to resolve any inconsistencies. These
 * inconsistencies should be rare (as described above, due to compaction combined with leader failures in the middle
 * of updating task configurations).
 * 
 * 
 * Note that the expectation is that this config storage system has only a single writer at a time.
 * The caller (Herder) must ensure this is the case. In distributed mode this will require forwarding config change
 * requests to the leader in the cluster (i.e. the worker group coordinated by the Kafka broker).
 * 
 * 
 * Since processing of the config log occurs in a background thread, callers must take care when using accessors.
 * To simplify handling this correctly, this class only exposes a mechanism to snapshot the current state of the cluster.
 * Updates may continue to be applied (and callbacks invoked) in the background. Callers must take care that they are
 * using a consistent snapshot and only update when it is safe. In particular, if task configs are updated which require
 * synchronization across workers to commit offsets and update the configuration, callbacks and updates during the
 * rebalance must be deferred.
 * 
 */
public class KafkaConfigBackingStore implements ConfigBackingStore {
    private static final Logger log = LoggerFactory.getLogger(KafkaConfigBackingStore.class);

    public static final String TARGET_STATE_PREFIX = "target-state-";

    public static String TARGET_STATE_KEY(String connectorName) {
        return TARGET_STATE_PREFIX + connectorName;
    }

    public static final String CONNECTOR_PREFIX = "connector-";

    public static String CONNECTOR_KEY(String connectorName) {
        return CONNECTOR_PREFIX + connectorName;
    }

    public static final String TASK_PREFIX = "task-";

    public static String TASK_KEY(ConnectorTaskId taskId) {
        return TASK_PREFIX + taskId.connector() + "-" + taskId.task();
    }

    public static final String COMMIT_TASKS_PREFIX = "commit-";

    public static String COMMIT_TASKS_KEY(String connectorName) {
        return COMMIT_TASKS_PREFIX + connectorName;
    }

    // Note that while using real serialization for values as we have here, but ad hoc string serialization for keys,
    // isn't ideal, we use this approach because it avoids any potential problems with schema evolution or
    // converter/serializer changes causing keys to change. We need to absolutely ensure that the keys remain precisely
    // the same.
    public static final Schema CONNECTOR_CONFIGURATION_V0 = SchemaBuilder.struct()
            .field("properties", SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.OPTIONAL_STRING_SCHEMA).build())
            .build();
    public static final Schema TASK_CONFIGURATION_V0 = CONNECTOR_CONFIGURATION_V0;
    public static final Schema CONNECTOR_TASKS_COMMIT_V0 = SchemaBuilder.struct()
            .field("tasks", Schema.INT32_SCHEMA)
            .build();
    public static final Schema TARGET_STATE_V0 = SchemaBuilder.struct()
            .field("state", Schema.STRING_SCHEMA)
            .build();

    private static final long READ_TO_END_TIMEOUT_MS = 30000;

    private final Object lock;
    private final Converter converter;
    private volatile boolean started;
    // Although updateListener is not final, it's guaranteed to be visible to any thread after its
    // initialization as long as we always read the volatile variable "started" before we access the listener.
    private UpdateListener updateListener;

    private final String topic;
    // Data is passed to the log already serialized. We use a converter to handle translating to/from generic Connect
    // format to serialized form
    private final KafkaBasedLog configLog;
    // Connector -> # of tasks
    private final Map connectorTaskCounts = new HashMap<>();
    // Connector and task configs: name or id -> config map
    private final Map> connectorConfigs = new HashMap<>();
    private final Map> taskConfigs = new HashMap<>();

    // Set of connectors where we saw a task commit with an incomplete set of task config updates, indicating the data
    // is in an inconsistent state and we cannot safely use them until they have been refreshed.
    private final Set inconsistent = new HashSet<>();
    // The most recently read offset. This does not take into account deferred task updates/commits, so we may have
    // outstanding data to be applied.
    private volatile long offset;

    // Connector -> Map[ConnectorTaskId -> Configs]
    private final Map>> deferredTaskUpdates = new HashMap<>();

    private final Map connectorTargetStates = new HashMap<>();

    private final WorkerConfigTransformer configTransformer;

    public KafkaConfigBackingStore(Converter converter, WorkerConfig config, WorkerConfigTransformer configTransformer) {
        this.lock = new Object();
        this.started = false;
        this.converter = converter;
        this.offset = -1;

        this.topic = config.getString(DistributedConfig.CONFIG_TOPIC_CONFIG);
        if (this.topic == null || this.topic.trim().length() == 0)
            throw new ConfigException("Must specify topic for connector configuration.");

        configLog = setupAndCreateKafkaBasedLog(this.topic, config);
        this.configTransformer = configTransformer;
    }

    @Override
    public void setUpdateListener(UpdateListener listener) {
        this.updateListener = listener;
    }

    @Override
    public void start() {
        log.info("Starting KafkaConfigBackingStore");
        // Before startup, callbacks are *not* invoked. You can grab a snapshot after starting -- just take care that
        // updates can continue to occur in the background
        configLog.start();
        started = true;
        log.info("Started KafkaConfigBackingStore");
    }

    @Override
    public void stop() {
        log.info("Closing KafkaConfigBackingStore");
        configLog.stop();
        log.info("Closed KafkaConfigBackingStore");
    }

    /**
     * Get a snapshot of the current state of the cluster.
     */
    @Override
    public ClusterConfigState snapshot() {
        synchronized (lock) {
            // Doing a shallow copy of the data is safe here because the complex nested data that is copied should all be
            // immutable configs
            return new ClusterConfigState(
                    offset,
                    new HashMap<>(connectorTaskCounts),
                    new HashMap<>(connectorConfigs),
                    new HashMap<>(connectorTargetStates),
                    new HashMap<>(taskConfigs),
                    new HashSet<>(inconsistent),
                    configTransformer
            );
        }
    }

    @Override
    public boolean contains(String connector) {
        synchronized (lock) {
            return connectorConfigs.containsKey(connector);
        }
    }

    /**
     * Write this connector configuration to persistent storage and wait until it has been acknowledged and read back by
     * tailing the Kafka log with a consumer.
     *
     * @param connector  name of the connector to write data for
     * @param properties the configuration to write
     */
    @Override
    public void putConnectorConfig(String connector, Map properties) {
        log.debug("Writing connector configuration for connector '{}'", connector);
        Struct connectConfig = new Struct(CONNECTOR_CONFIGURATION_V0);
        connectConfig.put("properties", properties);
        byte[] serializedConfig = converter.fromConnectData(topic, CONNECTOR_CONFIGURATION_V0, connectConfig);
        updateConnectorConfig(connector, serializedConfig);
    }

    /**
     * Remove configuration for a given connector.
     * @param connector name of the connector to remove
     */
    @Override
    public void removeConnectorConfig(String connector) {
        log.debug("Removing connector configuration for connector '{}'", connector);
        try {
            configLog.send(CONNECTOR_KEY(connector), null);
            configLog.send(TARGET_STATE_KEY(connector), null);
            configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS);
        } catch (InterruptedException | ExecutionException | TimeoutException e) {
            log.error("Failed to remove connector configuration from Kafka: ", e);
            throw new ConnectException("Error removing connector configuration from Kafka", e);
        }
    }

    @Override
    public void removeTaskConfigs(String connector) {
        throw new UnsupportedOperationException("Removal of tasks is not currently supported");
    }

    private void updateConnectorConfig(String connector, byte[] serializedConfig) {
        try {
            configLog.send(CONNECTOR_KEY(connector), serializedConfig);
            configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS);
        } catch (InterruptedException | ExecutionException | TimeoutException e) {
            log.error("Failed to write connector configuration to Kafka: ", e);
            throw new ConnectException("Error writing connector configuration to Kafka", e);
        }
    }

    /**
     * Write these task configurations and associated commit messages, unless an inconsistency is found that indicates
     * that we would be leaving one of the referenced connectors with an inconsistent state.
     *
     * @param connector the connector to write task configuration
     * @param configs list of task configurations for the connector
     * @throws ConnectException if the task configurations do not resolve inconsistencies found in the existing root
     *                          and task configurations.
     */
    @Override
    public void putTaskConfigs(String connector, List> configs) {
        // Make sure we're at the end of the log. We should be the only writer, but we want to make sure we don't have
        // any outstanding lagging data to consume.
        try {
            configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS);
        } catch (InterruptedException | ExecutionException | TimeoutException e) {
            log.error("Failed to write root configuration to Kafka: ", e);
            throw new ConnectException("Error writing root configuration to Kafka", e);
        }

        int taskCount = configs.size();

        // Start sending all the individual updates
        int index = 0;
        for (Map taskConfig: configs) {
            Struct connectConfig = new Struct(TASK_CONFIGURATION_V0);
            connectConfig.put("properties", taskConfig);
            byte[] serializedConfig = converter.fromConnectData(topic, TASK_CONFIGURATION_V0, connectConfig);
            log.debug("Writing configuration for connector '{}' task {}", connector, index);
            ConnectorTaskId connectorTaskId = new ConnectorTaskId(connector, index);
            configLog.send(TASK_KEY(connectorTaskId), serializedConfig);
            index++;
        }

        // Finally, send the commit to update the number of tasks and apply the new configs, then wait until we read to
        // the end of the log
        try {
            // Read to end to ensure all the task configs have been written
            if (taskCount > 0) {
                configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS);
            }
            // Write the commit message
            Struct connectConfig = new Struct(CONNECTOR_TASKS_COMMIT_V0);
            connectConfig.put("tasks", taskCount);
            byte[] serializedConfig = converter.fromConnectData(topic, CONNECTOR_TASKS_COMMIT_V0, connectConfig);
            log.debug("Writing commit for connector '{}' with {} tasks.", connector, taskCount);
            configLog.send(COMMIT_TASKS_KEY(connector), serializedConfig);

            // Read to end to ensure all the commit messages have been written
            configLog.readToEnd().get(READ_TO_END_TIMEOUT_MS, TimeUnit.MILLISECONDS);
        } catch (InterruptedException | ExecutionException | TimeoutException e) {
            log.error("Failed to write root configuration to Kafka: ", e);
            throw new ConnectException("Error writing root configuration to Kafka", e);
        }
    }

    @Override
    public void refresh(long timeout, TimeUnit unit) throws TimeoutException {
        try {
            configLog.readToEnd().get(timeout, unit);
        } catch (InterruptedException | ExecutionException e) {
            throw new ConnectException("Error trying to read to end of config log", e);
        }
    }

    @Override
    public void putTargetState(String connector, TargetState state) {
        Struct connectTargetState = new Struct(TARGET_STATE_V0);
        connectTargetState.put("state", state.name());
        byte[] serializedTargetState = converter.fromConnectData(topic, TARGET_STATE_V0, connectTargetState);
        log.debug("Writing target state {} for connector {}", state, connector);
        configLog.send(TARGET_STATE_KEY(connector), serializedTargetState);
    }

    // package private for testing
    KafkaBasedLog setupAndCreateKafkaBasedLog(String topic, final WorkerConfig config) {
        Map originals = config.originals();
        Map producerProps = new HashMap<>(originals);
        producerProps.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
        producerProps.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ByteArraySerializer.class.getName());
        producerProps.put(ProducerConfig.DELIVERY_TIMEOUT_MS_CONFIG, Integer.MAX_VALUE);
        Map consumerProps = new HashMap<>(originals);
        consumerProps.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        consumerProps.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());

        Map adminProps = new HashMap<>(originals);
        NewTopic topicDescription = TopicAdmin.defineTopic(topic).
                compacted().
                partitions(1).
                replicationFactor(config.getShort(DistributedConfig.CONFIG_STORAGE_REPLICATION_FACTOR_CONFIG)).
                build();

        return createKafkaBasedLog(topic, producerProps, consumerProps, new ConsumeCallback(), topicDescription, adminProps);
    }

    private KafkaBasedLog createKafkaBasedLog(String topic, Map producerProps,
                                                              Map consumerProps,
                                                              Callback> consumedCallback,
                                                              final NewTopic topicDescription, final Map adminProps) {
        Runnable createTopics = new Runnable() {
            @Override
            public void run() {
                log.debug("Creating admin client to manage Connect internal config topic");
                try (TopicAdmin admin = new TopicAdmin(adminProps)) {
                    admin.createTopics(topicDescription);
                }
            }
        };
        return new KafkaBasedLog<>(topic, producerProps, consumerProps, consumedCallback, Time.SYSTEM, createTopics);
    }

    @SuppressWarnings("unchecked")
    private class ConsumeCallback implements Callback> {
        @Override
        public void onCompletion(Throwable error, ConsumerRecord record) {
            if (error != null) {
                log.error("Unexpected in consumer callback for KafkaConfigBackingStore: ", error);
                return;
            }

            final SchemaAndValue value;
            try {
                value = converter.toConnectData(topic, record.value());
            } catch (DataException e) {
                log.error("Failed to convert config data to Kafka Connect format: ", e);
                return;
            }
            // Make the recorded offset match the API used for positions in the consumer -- return the offset of the
            // *next record*, not the last one consumed.
            offset = record.offset() + 1;

            if (record.key().startsWith(TARGET_STATE_PREFIX)) {
                String connectorName = record.key().substring(TARGET_STATE_PREFIX.length());
                boolean removed = false;
                synchronized (lock) {
                    if (value.value() == null) {
                        // When connector configs are removed, we also write tombstones for the target state.
                        log.debug("Removed target state for connector {} due to null value in topic.", connectorName);
                        connectorTargetStates.remove(connectorName);
                        removed = true;

                        // If for some reason we still have configs for the connector, add back the default
                        // STARTED state to ensure each connector always has a valid target state.
                        if (connectorConfigs.containsKey(connectorName))
                            connectorTargetStates.put(connectorName, TargetState.STARTED);
                    } else {
                        if (!(value.value() instanceof Map)) {
                            log.error("Found target state ({}) in wrong format: {}",  record.key(), value.value().getClass());
                            return;
                        }
                        Object targetState = ((Map) value.value()).get("state");
                        if (!(targetState instanceof String)) {
                            log.error("Invalid data for target state for connector '{}': 'state' field should be a Map but is {}",
                                    connectorName, targetState == null ? null : targetState.getClass());
                            return;
                        }

                        try {
                            TargetState state = TargetState.valueOf((String) targetState);
                            log.debug("Setting target state for connector '{}' to {}", connectorName, targetState);
                            connectorTargetStates.put(connectorName, state);
                        } catch (IllegalArgumentException e) {
                            log.error("Invalid target state for connector '{}': {}", connectorName, targetState);
                            return;
                        }
                    }
                }

                // Note that we do not notify the update listener if the target state has been removed.
                // Instead we depend on the removal callback of the connector config itself to notify the worker.
                if (started && !removed)
                    updateListener.onConnectorTargetStateChange(connectorName);

            } else if (record.key().startsWith(CONNECTOR_PREFIX)) {
                String connectorName = record.key().substring(CONNECTOR_PREFIX.length());
                boolean removed = false;
                synchronized (lock) {
                    if (value.value() == null) {
                        // Connector deletion will be written as a null value
                        log.info("Successfully processed removal of connector '{}'", connectorName);
                        connectorConfigs.remove(connectorName);
                        removed = true;
                    } else {
                        // Connector configs can be applied and callbacks invoked immediately
                        if (!(value.value() instanceof Map)) {
                            log.error("Found configuration for connector '{}' in wrong format: {}", record.key(), value.value().getClass());
                            return;
                        }
                        Object newConnectorConfig = ((Map) value.value()).get("properties");
                        if (!(newConnectorConfig instanceof Map)) {
                            log.error("Invalid data for config for connector '{}': 'properties' field should be a Map but is {}",
                                      connectorName, newConnectorConfig == null ? null : newConnectorConfig.getClass());
                            return;
                        }
                        log.debug("Updating configuration for connector '{}'", connectorName);
                        connectorConfigs.put(connectorName, (Map) newConnectorConfig);

                        // Set the initial state of the connector to STARTED, which ensures that any connectors
                        // which were created with 0.9 Connect will be initialized in the STARTED state.
                        if (!connectorTargetStates.containsKey(connectorName))
                            connectorTargetStates.put(connectorName, TargetState.STARTED);
                    }
                }
                if (started) {
                    if (removed)
                        updateListener.onConnectorConfigRemove(connectorName);
                    else
                        updateListener.onConnectorConfigUpdate(connectorName);
                }
            } else if (record.key().startsWith(TASK_PREFIX)) {
                synchronized (lock) {
                    ConnectorTaskId taskId = parseTaskId(record.key());
                    if (taskId == null) {
                        log.error("Ignoring task configuration because {} couldn't be parsed as a task config key", record.key());
                        return;
                    }
                    if (value.value() == null) {
                        log.error("Ignoring task configuration for task {} because it is unexpectedly null", taskId);
                        return;
                    }
                    if (!(value.value() instanceof Map)) {
                        log.error("Ignoring task configuration for task {} because the value is not a Map but is {}", taskId, value.value().getClass());
                        return;
                    }

                    Object newTaskConfig = ((Map) value.value()).get("properties");
                    if (!(newTaskConfig instanceof Map)) {
                        log.error("Invalid data for config of task {} 'properties' field should be a Map but is {}", taskId, newTaskConfig.getClass());
                        return;
                    }

                    Map> deferred = deferredTaskUpdates.get(taskId.connector());
                    if (deferred == null) {
                        deferred = new HashMap<>();
                        deferredTaskUpdates.put(taskId.connector(), deferred);
                    }
                    log.debug("Storing new config for task {}; this will wait for a commit message before the new config will take effect.", taskId);
                    deferred.put(taskId, (Map) newTaskConfig);
                }
            } else if (record.key().startsWith(COMMIT_TASKS_PREFIX)) {
                String connectorName = record.key().substring(COMMIT_TASKS_PREFIX.length());
                List updatedTasks = new ArrayList<>();
                synchronized (lock) {
                    // Apply any outstanding deferred task updates for the given connector. Note that just because we
                    // encounter a commit message does not mean it will result in consistent output. In particular due to
                    // compaction, there may be cases where . For example if we have the following sequence of writes:
                    //
                    // 1. Write connector "foo"'s config
                    // 2. Write connector "foo", task 1's config <-- compacted
                    // 3. Write connector "foo", task 2's config
                    // 4. Write connector "foo" task commit message
                    // 5. Write connector "foo", task 1's config
                    // 6. Write connector "foo", task 2's config
                    // 7. Write connector "foo" task commit message
                    //
                    // then when a new worker starts up, if message 2 had been compacted, then when message 4 is applied
                    // "foo" will not have a complete set of configs. Only when message 7 is applied will the complete
                    // configuration be available. Worse, if the leader died while writing messages 5, 6, and 7 such that
                    // only 5 was written, then there may be nothing that will finish writing the configs and get the
                    // log back into a consistent state.
                    //
                    // It is expected that the user of this class (i.e., the Herder) will take the necessary action to
                    // resolve this (i.e., get the connector to recommit its configuration). This inconsistent state is
                    // exposed in the snapshots provided via ClusterConfigState so they are easy to handle.
                    if (!(value.value() instanceof Map)) { // Schema-less, so we get maps instead of structs
                        log.error("Ignoring connector tasks configuration commit for connector '{}' because it is in the wrong format: {}", connectorName, value.value());
                        return;
                    }
                    Map> deferred = deferredTaskUpdates.get(connectorName);

                    int newTaskCount = intValue(((Map) value.value()).get("tasks"));

                    // Validate the configs we're supposed to update to ensure we're getting a complete configuration
                    // update of all tasks that are expected based on the number of tasks in the commit message.
                    Set taskIdSet = taskIds(connectorName, deferred);
                    if (!completeTaskIdSet(taskIdSet, newTaskCount)) {
                        // Given the logic for writing commit messages, we should only hit this condition due to compacted
                        // historical data, in which case we would not have applied any updates yet and there will be no
                        // task config data already committed for the connector, so we shouldn't have to clear any data
                        // out. All we need to do is add the flag marking it inconsistent.
                        log.debug("We have an incomplete set of task configs for connector '{}' probably due to compaction. So we are not doing anything with the new configuration.", connectorName);
                        inconsistent.add(connectorName);
                    } else {
                        if (deferred != null) {
                            taskConfigs.putAll(deferred);
                            updatedTasks.addAll(taskConfigs.keySet());
                        }
                        inconsistent.remove(connectorName);
                    }
                    // Always clear the deferred entries, even if we didn't apply them. If they represented an inconsistent
                    // update, then we need to see a completely fresh set of configs after this commit message, so we don't
                    // want any of these outdated configs
                    if (deferred != null)
                        deferred.clear();

                    connectorTaskCounts.put(connectorName, newTaskCount);
                }

                if (started)
                    updateListener.onTaskConfigUpdate(updatedTasks);
            } else {
                log.error("Discarding config update record with invalid key: {}", record.key());
            }
        }

    }

    private ConnectorTaskId parseTaskId(String key) {
        String[] parts = key.split("-");
        if (parts.length < 3) return null;

        try {
            int taskNum = Integer.parseInt(parts[parts.length - 1]);
            String connectorName = Utils.join(Arrays.copyOfRange(parts, 1, parts.length - 1), "-");
            return new ConnectorTaskId(connectorName, taskNum);
        } catch (NumberFormatException e) {
            return null;
        }
    }

    /**
     * Given task configurations, get a set of integer task IDs for the connector.
     */
    private Set taskIds(String connector, Map> configs) {
        Set tasks = new TreeSet<>();
        if (configs == null) {
            return tasks;
        }
        for (ConnectorTaskId taskId : configs.keySet()) {
            assert taskId.connector().equals(connector);
            tasks.add(taskId.task());
        }
        return tasks;
    }

    private boolean completeTaskIdSet(Set idSet, int expectedSize) {
        // Note that we do *not* check for the exact set. This is an important implication of compaction. If we start out
        // with 2 tasks, then reduce to 1, we'll end up with log entries like:
        //
        // 1. Connector "foo" config
        // 2. Connector "foo", task 1 config
        // 3. Connector "foo", task 2 config
        // 4. Connector "foo", commit 2 tasks
        // 5. Connector "foo", task 1 config
        // 6. Connector "foo", commit 1 tasks
        //
        // However, due to compaction we could end up with a log that looks like this:
        //
        // 1. Connector "foo" config
        // 3. Connector "foo", task 2 config
        // 5. Connector "foo", task 1 config
        // 6. Connector "foo", commit 1 tasks
        //
        // which isn't incorrect, but would appear in this code to have an extra task configuration. Instead, we just
        // validate that all the configs specified by the commit message are present. This should be fine because the
        // logic for writing configs ensures all the task configs are written (and reads them back) before writing the
        // commit message.

        if (idSet.size() < expectedSize)
            return false;

        for (int i = 0; i < expectedSize; i++)
            if (!idSet.contains(i))
                return false;
        return true;
    }

    // Convert an integer value extracted from a schemaless struct to an int. This handles potentially different
    // encodings by different Converters.
    private static int intValue(Object value) {
        if (value instanceof Integer)
            return (int) value;
        else if (value instanceof Long)
            return (int) (long) value;
        else
            throw new ConnectException("Expected integer value to be either Integer or Long");
    }
}