All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.kubernetes.highavailability.KubernetesCheckpointIDCounter Maven / Gradle / Ivy

There is a newer version: 2.0-preview1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.kubernetes.highavailability;

import org.apache.flink.api.common.JobStatus;
import org.apache.flink.kubernetes.kubeclient.FlinkKubeClient;
import org.apache.flink.kubernetes.kubeclient.resources.KubernetesConfigMap;
import org.apache.flink.kubernetes.kubeclient.resources.KubernetesException;
import org.apache.flink.kubernetes.kubeclient.resources.KubernetesLeaderElector;
import org.apache.flink.runtime.checkpoint.CheckpointIDCounter;
import org.apache.flink.util.FlinkRuntimeException;
import org.apache.flink.util.concurrent.FutureUtils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicLong;

import static org.apache.flink.kubernetes.utils.Constants.CHECKPOINT_COUNTER_KEY;
import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * {@link CheckpointIDCounter} implementation for Kubernetes. The counter will be stored in
 * JobManager-{@link org.apache.flink.api.common.JobID}-leader ConfigMap. The key is {@link
 * org.apache.flink.kubernetes.utils.Constants#CHECKPOINT_COUNTER_KEY}, and value is counter value.
 */
public class KubernetesCheckpointIDCounter implements CheckpointIDCounter {

    private static final Logger LOG = LoggerFactory.getLogger(KubernetesCheckpointIDCounter.class);

    private final FlinkKubeClient kubeClient;

    private final String configMapName;

    @Nullable private final String lockIdentity;

    private boolean running;

    public KubernetesCheckpointIDCounter(
            FlinkKubeClient kubeClient, String configMapName, @Nullable String lockIdentity) {
        this.kubeClient = checkNotNull(kubeClient);
        this.configMapName = checkNotNull(configMapName);
        this.lockIdentity = lockIdentity;

        this.running = false;
    }

    @Override
    public void start() {
        if (!running) {
            running = true;
        }
    }

    @Override
    public CompletableFuture shutdown(JobStatus jobStatus) {
        if (!running) {
            return FutureUtils.completedVoidFuture();
        }
        running = false;

        LOG.info("Shutting down.");
        if (jobStatus.isGloballyTerminalState()) {
            LOG.info("Removing counter from ConfigMap {}", configMapName);
            return kubeClient
                    .checkAndUpdateConfigMap(
                            configMapName,
                            configMap -> {
                                if (isValidOperation(configMap)) {
                                    configMap.getData().remove(CHECKPOINT_COUNTER_KEY);
                                    return Optional.of(configMap);
                                }
                                return Optional.empty();
                            })
                    // checkAndUpdateConfigMap only returns false if the callback returned an empty
                    // ConfigMap. We don't want to continue the cleanup in that case, i.e. we can
                    // ignore the return value
                    .thenApply(valueChanged -> null);
        }

        return FutureUtils.completedVoidFuture();
    }

    private boolean isValidOperation(KubernetesConfigMap configMap) {
        return lockIdentity == null
                || KubernetesLeaderElector.hasLeadership(configMap, lockIdentity);
    }

    @Override
    public long getAndIncrement() throws Exception {
        final AtomicLong current = new AtomicLong();
        final boolean updated =
                kubeClient
                        .checkAndUpdateConfigMap(
                                configMapName,
                                configMap -> {
                                    if (isValidOperation(configMap)) {
                                        final long currentValue = getCurrentCounter(configMap);
                                        current.set(currentValue);
                                        configMap
                                                .getData()
                                                .put(
                                                        CHECKPOINT_COUNTER_KEY,
                                                        String.valueOf(currentValue + 1));
                                        return Optional.of(configMap);
                                    }
                                    return Optional.empty();
                                })
                        .get();

        if (updated) {
            return current.get();
        } else {
            throw new KubernetesException(
                    "Failed to update ConfigMap "
                            + configMapName
                            + " since current KubernetesCheckpointIDCounter does not have the leadership.");
        }
    }

    @Override
    public long get() {
        return kubeClient
                .getConfigMap(configMapName)
                .map(this::getCurrentCounter)
                .orElseThrow(
                        () ->
                                new FlinkRuntimeException(
                                        new KubernetesException(
                                                "ConfigMap "
                                                        + configMapName
                                                        + " does not exist.")));
    }

    @Override
    public void setCount(long newCount) throws Exception {
        kubeClient
                .checkAndUpdateConfigMap(
                        configMapName,
                        configMap -> {
                            if (isValidOperation(configMap)) {
                                final String existing =
                                        configMap.getData().get(CHECKPOINT_COUNTER_KEY);
                                final String newValue = String.valueOf(newCount);
                                if (existing == null || !existing.equals(newValue)) {
                                    configMap
                                            .getData()
                                            .put(CHECKPOINT_COUNTER_KEY, String.valueOf(newCount));
                                    return Optional.of(configMap);
                                }
                            }
                            return Optional.empty();
                        })
                .get();
    }

    private long getCurrentCounter(KubernetesConfigMap configMap) {
        if (configMap.getData().containsKey(CHECKPOINT_COUNTER_KEY)) {
            return Long.valueOf(configMap.getData().get(CHECKPOINT_COUNTER_KEY));
        } else {
            return INITIAL_CHECKPOINT_ID;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy