All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.kubernetes.highavailability.KubernetesStateHandleStore Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.kubernetes.highavailability;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.kubernetes.kubeclient.FlinkKubeClient;
import org.apache.flink.kubernetes.kubeclient.resources.KubernetesConfigMap;
import org.apache.flink.kubernetes.kubeclient.resources.KubernetesException;
import org.apache.flink.kubernetes.kubeclient.resources.KubernetesLeaderElector;
import org.apache.flink.runtime.persistence.PossibleInconsistentStateException;
import org.apache.flink.runtime.persistence.RetrievableStateStorageHelper;
import org.apache.flink.runtime.persistence.StateHandleStore;
import org.apache.flink.runtime.persistence.StringResourceVersion;
import org.apache.flink.runtime.state.RetrievableStateHandle;
import org.apache.flink.runtime.state.StateObject;
import org.apache.flink.util.ExceptionUtils;
import org.apache.flink.util.InstantiationUtil;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;

import static org.apache.flink.runtime.util.StateHandleStoreUtils.deserialize;
import static org.apache.flink.runtime.util.StateHandleStoreUtils.serializeOrDiscard;
import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * Class which stores state via the provided {@link RetrievableStateStorageHelper} and writes the
 * returned state handle to ConfigMap.
 *
 * 

Added state is persisted via {@link RetrievableStateHandle RetrievableStateHandles}, which in * turn are written to ConfigMap. This level of indirection is necessary to keep the amount of data * in ConfigMap small. ConfigMap is build for data less than 1MB whereas state can grow to multiple * MBs and GBs. * *

This is a very different implementation with {@link * org.apache.flink.runtime.zookeeper.ZooKeeperStateHandleStore}. Benefit from the {@link * FlinkKubeClient#checkAndUpdateConfigMap} transactional operation, we could guarantee that only * the leader could update the store. Then we will completely get rid of the lock-and-release in * Zookeeper implementation. * * @param Type of the state we're storing. */ public class KubernetesStateHandleStore implements StateHandleStore { private static final Logger LOG = LoggerFactory.getLogger(KubernetesStateHandleStore.class); private static StateHandleWithDeleteMarker deserializeStateHandle( String content) throws IOException { checkNotNull(content, "Content should not be null."); final byte[] data = Base64.getDecoder().decode(content); try { return deserialize(data); } catch (IOException | ClassNotFoundException e) { throw new IOException( String.format( "Failed to deserialize state handle from ConfigMap data %s.", content), e); } } private static String toBase64(byte[] bytes) { return Base64.getEncoder().encodeToString(bytes); } @VisibleForTesting static String serializeStateHandle(StateHandleWithDeleteMarker stateHandle) throws IOException { return toBase64(InstantiationUtil.serializeObject(stateHandle)); } /** * Wrapper around state object that allows us to implement idempotent {@link * #releaseAndTryRemove(String)}. * * @param Type of the state we're storing. */ @VisibleForTesting static class StateHandleWithDeleteMarker implements StateObject { private final RetrievableStateHandle inner; private final boolean markedForDeletion; StateHandleWithDeleteMarker(RetrievableStateHandle inner) { this(inner, false); } private StateHandleWithDeleteMarker( RetrievableStateHandle inner, boolean markedForDeletion) { this.inner = inner; this.markedForDeletion = markedForDeletion; } @Override public void discardState() throws Exception { inner.discardState(); } @Override public long getStateSize() { return inner.getStateSize(); } RetrievableStateHandle getInner() { return inner; } boolean isMarkedForDeletion() { return markedForDeletion; } StateHandleWithDeleteMarker toDeleting() { return new StateHandleWithDeleteMarker<>(inner, true); } } private final FlinkKubeClient kubeClient; private final String configMapName; private final RetrievableStateStorageHelper storage; private final Predicate configMapKeyFilter; @Nullable private final String lockIdentity; /** * Creates a {@link KubernetesStateHandleStore}. * * @param kubeClient The Kubernetes client. * @param storage To persist the actual state and whose returned state handle is then written to * ConfigMap * @param configMapName ConfigMap to store the state handle store pointer * @param configMapKeyFilter filter to get the expected keys for state handle * @param lockIdentity lock identity of current HA service */ public KubernetesStateHandleStore( FlinkKubeClient kubeClient, String configMapName, RetrievableStateStorageHelper storage, Predicate configMapKeyFilter, @Nullable String lockIdentity) { this.kubeClient = checkNotNull(kubeClient, "Kubernetes client"); this.storage = checkNotNull(storage, "State storage"); this.configMapName = checkNotNull(configMapName, "ConfigMap name"); this.configMapKeyFilter = checkNotNull(configMapKeyFilter); this.lockIdentity = lockIdentity; } /** * Creates a state handle, stores it in ConfigMap. We could guarantee that only the leader could * update the ConfigMap. Since “Get(check the leader)-and-Update(write back to the ConfigMap)” * is a transactional operation. * * @param key Key in ConfigMap * @param state State to be added * @throws AlreadyExistException if the name already exists * @throws PossibleInconsistentStateException if the write-to-Kubernetes operation failed. This * indicates that it's not clear whether the new state was successfully written to * Kubernetes or not. No state was discarded. Proper error handling has to be applied on the * caller's side. * @throws Exception if persisting state or writing state handle failed */ @Override public RetrievableStateHandle addAndLock(String key, T state) throws PossibleInconsistentStateException, Exception { checkNotNull(key, "Key in ConfigMap."); checkNotNull(state, "State."); final RetrievableStateHandle storeHandle = storage.store(state); final byte[] serializedStoreHandle = serializeOrDiscard(new StateHandleWithDeleteMarker<>(storeHandle)); // initialize flag to serve the failure case boolean discardState = true; try { // a successful operation will result in the state not being discarded discardState = !updateConfigMap( cm -> { try { return addEntry(cm, key, serializedStoreHandle); } catch (Exception e) { throw new CompletionException(e); } }) .get(); return storeHandle; } catch (Exception ex) { final Optional possibleInconsistentStateException = ExceptionUtils.findThrowable(ex, PossibleInconsistentStateException.class); if (possibleInconsistentStateException.isPresent()) { // it's unclear whether the state handle metadata was written to the ConfigMap - // hence, we don't discard the data discardState = false; throw possibleInconsistentStateException.get(); } throw ExceptionUtils.findThrowable(ex, AlreadyExistException.class) .orElseThrow(() -> ex); } finally { if (discardState) { storeHandle.discardState(); } } } /** * Replaces a state handle in ConfigMap and discards the old state handle. Wo do not lock * resource version and then replace in Kubernetes. Since the ConfigMap is periodically updated * by leader, the resource version changes very fast. We use a "check-existence and update" * transactional operation instead. * * @param key Key in ConfigMap * @param resourceVersion resource version when checking existence via {@link #exists}. * @param state State to be added * @throws NotExistException if the name does not exist * @throws PossibleInconsistentStateException if a failure occurred during the update operation. * It's unclear whether the operation actually succeeded or not. No state was discarded. The * method's caller should handle this case properly. * @throws Exception if persisting state or writing state handle failed */ @Override public void replace(String key, StringResourceVersion resourceVersion, T state) throws Exception { checkNotNull(key, "Key in ConfigMap."); checkNotNull(state, "State."); final RetrievableStateHandle newStateHandle = storage.store(state); final byte[] serializedStateHandle = serializeOrDiscard(new StateHandleWithDeleteMarker<>(newStateHandle)); // initialize flags to serve the failure case boolean discardOldState = false; boolean discardNewState = true; // We don't want to greedily pull the old state handle as we have to do that anyway in // replaceEntry method for check of delete markers. final AtomicReference> oldStateHandleRef = new AtomicReference<>(); try { final boolean success = updateConfigMap( cm -> { try { return replaceEntry( cm, key, serializedStateHandle, oldStateHandleRef); } catch (NotExistException e) { throw new CompletionException(e); } }) .get(); // swap subject for deletion in case of success discardOldState = success; discardNewState = !success; } catch (Exception ex) { final Optional possibleInconsistentStateException = ExceptionUtils.findThrowable(ex, PossibleInconsistentStateException.class); if (possibleInconsistentStateException.isPresent()) { // it's unclear whether the state handle metadata was written to the ConfigMap - // hence, we don't discard any data discardNewState = false; throw possibleInconsistentStateException.get(); } throw ExceptionUtils.findThrowable(ex, NotExistException.class).orElseThrow(() -> ex); } finally { if (discardNewState) { newStateHandle.discardState(); } if (discardOldState) { Objects.requireNonNull( oldStateHandleRef.get(), "state handle should have been set on success") .discardState(); } } } /** * Returns the resource version of the ConfigMap. * * @param key Key in ConfigMap * @return resource version in {@link StringResourceVersion} format. * @throws Exception if the check existence operation failed */ @Override public StringResourceVersion exists(String key) throws Exception { checkNotNull(key, "Key in ConfigMap."); return kubeClient .getConfigMap(configMapName) .map( configMap -> { final String content = configMap.getData().get(key); if (content != null) { try { final StateHandleWithDeleteMarker stateHandle = deserializeStateHandle(content); if (stateHandle.isMarkedForDeletion()) { return StringResourceVersion.notExisting(); } } catch (IOException e) { // Any calls to add or replace will try to remove this resource, // so we can simply treat it as non-existent. return StringResourceVersion.notExisting(); } return StringResourceVersion.valueOf( configMap.getResourceVersion()); } return StringResourceVersion.notExisting(); }) .orElseThrow(this::getConfigMapNotExistException); } /** * Gets the {@link RetrievableStateHandle} stored in the given ConfigMap. * * @param key Key in ConfigMap * @return The retrieved state handle from the specified ConfigMap and key * @throws IOException if the method failed to deserialize the stored state handle * @throws NotExistException when the name does not exist * @throws Exception if get state handle from ConfigMap failed */ @Override public RetrievableStateHandle getAndLock(String key) throws Exception { checkNotNull(key, "Key in ConfigMap."); final Optional optional = kubeClient.getConfigMap(configMapName); if (optional.isPresent()) { final KubernetesConfigMap configMap = optional.get(); if (configMap.getData().containsKey(key)) { final StateHandleWithDeleteMarker result = deserializeStateHandle(configMap.getData().get(key)); if (result.isMarkedForDeletion()) { throw getKeyMarkedAsDeletedException(key); } return result.getInner(); } else { throw getKeyNotExistException(key); } } else { throw getConfigMapNotExistException(); } } /** * Gets all available state handles from Kubernetes. * * @return All state handles from ConfigMap. */ @Override public List, String>> getAllAndLock() { return kubeClient .getConfigMap(configMapName) .map( configMap -> { final List, String>> stateHandles = new ArrayList<>(); configMap.getData().entrySet().stream() .filter(entry -> configMapKeyFilter.test(entry.getKey())) .forEach( entry -> { try { final StateHandleWithDeleteMarker result = deserializeStateHandle( entry.getValue()); if (!result.isMarkedForDeletion()) { stateHandles.add( new Tuple2<>( result.getInner(), entry.getKey())); } } catch (IOException e) { LOG.warn( "ConfigMap {} contained corrupted data. Ignoring the key {}.", configMapName, entry.getKey()); } }); return stateHandles; }) .orElse(Collections.emptyList()); } /** * Return a list of all valid keys for state handles. * * @return List of valid state handle keys in Kubernetes ConfigMap * @throws Exception if get state handle names from ConfigMap failed. */ @Override public Collection getAllHandles() throws Exception { return kubeClient .getConfigMap(configMapName) .map( configMap -> configMap.getData().keySet().stream() .filter(configMapKeyFilter) .filter( k -> { try { final String content = Objects.requireNonNull( configMap.getData().get(k)); return !deserializeStateHandle(content) .isMarkedForDeletion(); } catch (IOException e) { return false; } }) .collect(Collectors.toList())) .orElseThrow(this::getConfigMapNotExistException); } /** * Remove the key in state config map. As well as the state on external storage will be removed. * It returns the {@link RetrievableStateHandle} stored under the given state node if any. * * @param key Key to be removed from ConfigMap * @return True if the state handle isn't listed anymore. * @throws Exception if removing the key or discarding the state failed */ @Override public boolean releaseAndTryRemove(String key) throws Exception { checkNotNull(key, "Key in ConfigMap."); final AtomicReference> stateHandleRefer = new AtomicReference<>(); final AtomicBoolean stateHandleDoesNotExist = new AtomicBoolean(false); return updateConfigMap( configMap -> { final String content = configMap.getData().get(key); if (content != null) { try { final StateHandleWithDeleteMarker result = deserializeStateHandle(content); if (!result.isMarkedForDeletion()) { // Mark the ConfigMap entry as deleting. This basically // starts a "removal transaction" that allows us to retry // the removal if needed. configMap .getData() .put( key, serializeStateHandle(result.toDeleting())); } stateHandleRefer.set(result.getInner()); } catch (IOException e) { logInvalidEntry(key, configMapName, e); // Remove entry from the config map as we can't recover from // this (the serialization would fail on the retry as well). Objects.requireNonNull(configMap.getData().remove(key)); } return Optional.of(configMap); } else { stateHandleDoesNotExist.set(true); } return Optional.empty(); }) .thenCompose( updated -> { if (updated && stateHandleRefer.get() != null) { try { stateHandleRefer.get().discardState(); return updateConfigMap( configMap -> { // Now we can safely commit the "removal // transaction" by removing the entry from the // ConfigMap. configMap.getData().remove(key); return Optional.of(configMap); }); } catch (Exception e) { throw new CompletionException(e); } } return CompletableFuture.completedFuture( stateHandleDoesNotExist.get() || updated); }) .get(); } /** * Remove all the filtered keys in the ConfigMap. * * @throws Exception when removing the keys failed */ @Override public void clearEntries() throws Exception { updateConfigMap( configMap -> { configMap.getData().keySet().removeIf(configMapKeyFilter); return Optional.of(configMap); }) .get(); } @Override public void release(String name) { // noop } @Override public void releaseAll() { // noop } @Override public String toString() { return this.getClass().getSimpleName() + "{configMapName='" + configMapName + "'}"; } private boolean isValidOperation(KubernetesConfigMap c) { return lockIdentity == null || KubernetesLeaderElector.hasLeadership(c, lockIdentity); } @VisibleForTesting CompletableFuture updateConfigMap( Function> updateFn) { return kubeClient.checkAndUpdateConfigMap( configMapName, configMap -> { if (isValidOperation(configMap)) { return updateFn.apply(configMap); } return Optional.empty(); }); } /** * Adds entry into the ConfigMap. If the entry already exists and contains delete marker, we try * to finish the removal before the actual update. */ private Optional addEntry( KubernetesConfigMap configMap, String key, byte[] serializedStateHandle) throws Exception { final String oldBase64Content = configMap.getData().get(key); final String newBase64Content = toBase64(serializedStateHandle); if (oldBase64Content != null) { try { final StateHandleWithDeleteMarker stateHandle = deserializeStateHandle(oldBase64Content); if (stateHandle.isMarkedForDeletion()) { // This might be a left-over after the fail-over. As the remove operation is // idempotent let's try to finish it. if (!releaseAndTryRemove(key)) { throw new IllegalStateException( "Unable to remove the marked as deleting entry."); } } else { // It could happen that the kubernetes client retries a transaction that has // already succeeded due to network issues. So we simply ignore when the // new content is same as the existing one. if (oldBase64Content.equals(newBase64Content)) { return Optional.of(configMap); } throw getKeyAlreadyExistException(key); } } catch (IOException e) { // Just log the invalid entry, it will be overridden // by the update code path below. logInvalidEntry(key, configMapName, e); } } configMap.getData().put(key, newBase64Content); return Optional.of(configMap); } /** * Replace the entry in the ConfigMap. If the entry already exists and contains delete marker, * we treat it as non-existent and perform the best effort removal. */ private Optional replaceEntry( KubernetesConfigMap configMap, String key, byte[] serializedStateHandle, AtomicReference> oldStateHandleRef) throws NotExistException { final String content = configMap.getData().get(key); if (content != null) { try { final StateHandleWithDeleteMarker stateHandle = deserializeStateHandle(content); oldStateHandleRef.set(stateHandle.getInner()); if (stateHandle.isMarkedForDeletion()) { final NotExistException exception = getKeyNotExistException(key); try { // Try to finish the removal. We don't really care whether this succeeds or // not, from the "replace" point of view, the entry doesn't exist. releaseAndTryRemove(key); } catch (Exception e) { exception.addSuppressed(e); } throw exception; } } catch (IOException e) { // Just log the invalid entry, it will be removed by the update code path below. logInvalidEntry(key, configMapName, e); } configMap.getData().put(key, toBase64(serializedStateHandle)); return Optional.of(configMap); } throw getKeyNotExistException(key); } private KubernetesException getConfigMapNotExistException() { return new KubernetesException( "ConfigMap " + configMapName + " does not exists. " + "It may be deleted externally."); } private NotExistException getKeyNotExistException(String key) { return new NotExistException("Could not find " + key + " in ConfigMap " + configMapName); } private NotExistException getKeyMarkedAsDeletedException(String key) { return new NotExistException( "Already marked for deletion " + key + " in ConfigMap " + configMapName); } private AlreadyExistException getKeyAlreadyExistException(String key) { return new AlreadyExistException(key + " already exists in ConfigMap " + configMapName); } private static void logInvalidEntry(String key, String configMapName, Throwable e) { LOG.warn( "Could not retrieve the state handle of '{}' from ConfigMap '{}'. Removing the entry as we don't have any way to recover.", key, configMapName, e); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy