
org.apache.flink.runtime.state.heap.HeapKeyedStateBackend Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state.heap;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.state.State;
import org.apache.flink.api.common.state.StateDescriptor;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerSchemaCompatibility;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.core.fs.CloseableRegistry;
import org.apache.flink.runtime.checkpoint.CheckpointOptions;
import org.apache.flink.runtime.query.TaskKvStateRegistry;
import org.apache.flink.runtime.state.AbstractKeyedStateBackend;
import org.apache.flink.runtime.state.CheckpointStreamFactory;
import org.apache.flink.runtime.state.HeapPriorityQueuesManager;
import org.apache.flink.runtime.state.InternalKeyContext;
import org.apache.flink.runtime.state.KeyGroupedInternalPriorityQueue;
import org.apache.flink.runtime.state.Keyed;
import org.apache.flink.runtime.state.KeyedStateFunction;
import org.apache.flink.runtime.state.KeyedStateHandle;
import org.apache.flink.runtime.state.LocalRecoveryConfig;
import org.apache.flink.runtime.state.PriorityComparable;
import org.apache.flink.runtime.state.RegisteredKeyValueStateBackendMetaInfo;
import org.apache.flink.runtime.state.SavepointResources;
import org.apache.flink.runtime.state.SnapshotExecutionType;
import org.apache.flink.runtime.state.SnapshotResult;
import org.apache.flink.runtime.state.SnapshotStrategy;
import org.apache.flink.runtime.state.SnapshotStrategyRunner;
import org.apache.flink.runtime.state.StateSnapshotRestore;
import org.apache.flink.runtime.state.StateSnapshotTransformer.StateSnapshotTransformFactory;
import org.apache.flink.runtime.state.StateSnapshotTransformers;
import org.apache.flink.runtime.state.StreamCompressionDecorator;
import org.apache.flink.runtime.state.metrics.LatencyTrackingStateConfig;
import org.apache.flink.runtime.state.ttl.TtlTimeProvider;
import org.apache.flink.util.FlinkRuntimeException;
import org.apache.flink.util.StateMigrationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nonnull;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.RunnableFuture;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* A {@link AbstractKeyedStateBackend} that keeps state on the Java Heap and will serialize state to
* streams provided by a {@link CheckpointStreamFactory} upon checkpointing.
*
* @param The key by which state is keyed.
*/
public class HeapKeyedStateBackend extends AbstractKeyedStateBackend {
private static final Logger LOG = LoggerFactory.getLogger(HeapKeyedStateBackend.class);
private static final Map STATE_CREATE_FACTORIES =
Stream.of(
Tuple2.of(
StateDescriptor.Type.VALUE,
(StateCreateFactory) HeapValueState::create),
Tuple2.of(
StateDescriptor.Type.LIST,
(StateCreateFactory) HeapListState::create),
Tuple2.of(
StateDescriptor.Type.MAP,
(StateCreateFactory) HeapMapState::create),
Tuple2.of(
StateDescriptor.Type.AGGREGATING,
(StateCreateFactory) HeapAggregatingState::create),
Tuple2.of(
StateDescriptor.Type.REDUCING,
(StateCreateFactory) HeapReducingState::create))
.collect(Collectors.toMap(t -> t.f0, t -> t.f1));
private static final Map STATE_UPDATE_FACTORIES =
Stream.of(
Tuple2.of(
StateDescriptor.Type.VALUE,
(StateUpdateFactory) HeapValueState::update),
Tuple2.of(
StateDescriptor.Type.LIST,
(StateUpdateFactory) HeapListState::update),
Tuple2.of(
StateDescriptor.Type.MAP,
(StateUpdateFactory) HeapMapState::update),
Tuple2.of(
StateDescriptor.Type.AGGREGATING,
(StateUpdateFactory) HeapAggregatingState::update),
Tuple2.of(
StateDescriptor.Type.REDUCING,
(StateUpdateFactory) HeapReducingState::update))
.collect(Collectors.toMap(t -> t.f0, t -> t.f1));
/** Map of created Key/Value states. */
private final Map createdKVStates;
/** Map of registered Key/Value states. */
private final Map> registeredKVStates;
/** The configuration for local recovery. */
private final LocalRecoveryConfig localRecoveryConfig;
/** The snapshot strategy for this backend. */
private final SnapshotStrategy checkpointStrategy;
private final SnapshotExecutionType snapshotExecutionType;
private final StateTableFactory stateTableFactory;
/** Factory for state that is organized as priority queue. */
private final HeapPriorityQueuesManager priorityQueuesManager;
public HeapKeyedStateBackend(
TaskKvStateRegistry kvStateRegistry,
TypeSerializer keySerializer,
ClassLoader userCodeClassLoader,
ExecutionConfig executionConfig,
TtlTimeProvider ttlTimeProvider,
LatencyTrackingStateConfig latencyTrackingStateConfig,
CloseableRegistry cancelStreamRegistry,
StreamCompressionDecorator keyGroupCompressionDecorator,
Map> registeredKVStates,
Map> registeredPQStates,
LocalRecoveryConfig localRecoveryConfig,
HeapPriorityQueueSetFactory priorityQueueSetFactory,
HeapSnapshotStrategy checkpointStrategy,
SnapshotExecutionType snapshotExecutionType,
StateTableFactory stateTableFactory,
InternalKeyContext keyContext) {
super(
kvStateRegistry,
keySerializer,
userCodeClassLoader,
executionConfig,
ttlTimeProvider,
latencyTrackingStateConfig,
cancelStreamRegistry,
keyGroupCompressionDecorator,
keyContext);
this.registeredKVStates = registeredKVStates;
this.createdKVStates = new HashMap<>();
this.localRecoveryConfig = localRecoveryConfig;
this.checkpointStrategy = checkpointStrategy;
this.snapshotExecutionType = snapshotExecutionType;
this.stateTableFactory = stateTableFactory;
this.priorityQueuesManager =
new HeapPriorityQueuesManager(
registeredPQStates,
priorityQueueSetFactory,
keyContext.getKeyGroupRange(),
keyContext.getNumberOfKeyGroups());
LOG.info("Initializing heap keyed state backend with stream factory.");
}
// ------------------------------------------------------------------------
// state backend operations
// ------------------------------------------------------------------------
@Nonnull
@Override
public & Keyed>>
KeyGroupedInternalPriorityQueue create(
@Nonnull String stateName,
@Nonnull TypeSerializer byteOrderedElementSerializer) {
return priorityQueuesManager.createOrUpdate(stateName, byteOrderedElementSerializer);
}
@Override
public & Keyed>>
KeyGroupedInternalPriorityQueue create(
@Nonnull String stateName,
@Nonnull TypeSerializer byteOrderedElementSerializer,
boolean allowFutureMetadataUpdates) {
return priorityQueuesManager.createOrUpdate(
stateName, byteOrderedElementSerializer, allowFutureMetadataUpdates);
}
private StateTable tryRegisterStateTable(
TypeSerializer namespaceSerializer,
StateDescriptor, V> stateDesc,
@Nonnull StateSnapshotTransformFactory snapshotTransformFactory,
boolean allowFutureMetadataUpdates)
throws StateMigrationException {
@SuppressWarnings("unchecked")
StateTable stateTable =
(StateTable) registeredKVStates.get(stateDesc.getName());
TypeSerializer newStateSerializer = stateDesc.getSerializer();
if (stateTable != null) {
RegisteredKeyValueStateBackendMetaInfo restoredKvMetaInfo =
stateTable.getMetaInfo();
restoredKvMetaInfo.updateSnapshotTransformFactory(snapshotTransformFactory);
// fetch current serializer now because if it is incompatible, we can't access
// it anymore to improve the error message
TypeSerializer previousNamespaceSerializer =
restoredKvMetaInfo.getNamespaceSerializer();
TypeSerializerSchemaCompatibility namespaceCompatibility =
restoredKvMetaInfo.updateNamespaceSerializer(namespaceSerializer);
if (namespaceCompatibility.isCompatibleAfterMigration()
|| namespaceCompatibility.isIncompatible()) {
throw new StateMigrationException(
"For heap backends, the new namespace serializer ("
+ namespaceSerializer
+ ") must be compatible with the old namespace serializer ("
+ previousNamespaceSerializer
+ ").");
}
restoredKvMetaInfo.checkStateMetaInfo(stateDesc);
// fetch current serializer now because if it is incompatible, we can't access
// it anymore to improve the error message
TypeSerializer previousStateSerializer = restoredKvMetaInfo.getStateSerializer();
TypeSerializerSchemaCompatibility stateCompatibility =
restoredKvMetaInfo.updateStateSerializer(newStateSerializer);
if (stateCompatibility.isIncompatible()) {
throw new StateMigrationException(
"For heap backends, the new state serializer ("
+ newStateSerializer
+ ") must not be incompatible with the old state serializer ("
+ previousStateSerializer
+ ").");
}
restoredKvMetaInfo =
allowFutureMetadataUpdates
? restoredKvMetaInfo.withSerializerUpgradesAllowed()
: restoredKvMetaInfo;
stateTable.setMetaInfo(restoredKvMetaInfo);
} else {
RegisteredKeyValueStateBackendMetaInfo newMetaInfo =
new RegisteredKeyValueStateBackendMetaInfo<>(
stateDesc.getType(),
stateDesc.getName(),
namespaceSerializer,
newStateSerializer,
snapshotTransformFactory);
newMetaInfo =
allowFutureMetadataUpdates
? newMetaInfo.withSerializerUpgradesAllowed()
: newMetaInfo;
stateTable = stateTableFactory.newStateTable(keyContext, newMetaInfo, keySerializer);
registeredKVStates.put(stateDesc.getName(), stateTable);
}
return stateTable;
}
@SuppressWarnings("unchecked")
@Override
public Stream getKeys(String state, N namespace) {
if (!registeredKVStates.containsKey(state)) {
return Stream.empty();
}
final StateSnapshotRestore stateSnapshotRestore = registeredKVStates.get(state);
StateTable table = (StateTable) stateSnapshotRestore;
return table.getKeys(namespace);
}
@SuppressWarnings("unchecked")
@Override
public Stream> getKeysAndNamespaces(String state) {
if (!registeredKVStates.containsKey(state)) {
return Stream.empty();
}
final StateSnapshotRestore stateSnapshotRestore = registeredKVStates.get(state);
StateTable table = (StateTable) stateSnapshotRestore;
return table.getKeysAndNamespaces();
}
@Override
@Nonnull
public IS createOrUpdateInternalState(
@Nonnull TypeSerializer namespaceSerializer,
@Nonnull StateDescriptor stateDesc,
@Nonnull StateSnapshotTransformFactory snapshotTransformFactory)
throws Exception {
return createOrUpdateInternalState(
namespaceSerializer, stateDesc, snapshotTransformFactory, false);
}
@Override
@Nonnull
public IS createOrUpdateInternalState(
@Nonnull TypeSerializer namespaceSerializer,
@Nonnull StateDescriptor stateDesc,
@Nonnull StateSnapshotTransformFactory snapshotTransformFactory,
boolean allowFutureMetadataUpdates)
throws Exception {
StateTable stateTable =
tryRegisterStateTable(
namespaceSerializer,
stateDesc,
getStateSnapshotTransformFactory(stateDesc, snapshotTransformFactory),
allowFutureMetadataUpdates);
@SuppressWarnings("unchecked")
IS createdState = (IS) createdKVStates.get(stateDesc.getName());
if (createdState == null) {
StateCreateFactory stateCreateFactory = STATE_CREATE_FACTORIES.get(stateDesc.getType());
if (stateCreateFactory == null) {
throw new FlinkRuntimeException(stateNotSupportedMessage(stateDesc));
}
createdState =
stateCreateFactory.createState(stateDesc, stateTable, getKeySerializer());
} else {
StateUpdateFactory stateUpdateFactory = STATE_UPDATE_FACTORIES.get(stateDesc.getType());
if (stateUpdateFactory == null) {
throw new FlinkRuntimeException(stateNotSupportedMessage(stateDesc));
}
createdState = stateUpdateFactory.updateState(stateDesc, stateTable, createdState);
}
createdKVStates.put(stateDesc.getName(), createdState);
return createdState;
}
private String stateNotSupportedMessage(
StateDescriptor stateDesc) {
return String.format(
"State %s is not supported by %s", stateDesc.getClass(), this.getClass());
}
@SuppressWarnings("unchecked")
private StateSnapshotTransformFactory getStateSnapshotTransformFactory(
StateDescriptor, SV> stateDesc,
StateSnapshotTransformFactory snapshotTransformFactory) {
if (stateDesc instanceof ListStateDescriptor) {
return (StateSnapshotTransformFactory)
new StateSnapshotTransformers.ListStateSnapshotTransformFactory<>(
snapshotTransformFactory);
} else if (stateDesc instanceof MapStateDescriptor) {
return (StateSnapshotTransformFactory)
new StateSnapshotTransformers.MapStateSnapshotTransformFactory<>(
snapshotTransformFactory);
} else {
return (StateSnapshotTransformFactory) snapshotTransformFactory;
}
}
@Nonnull
@Override
public RunnableFuture> snapshot(
final long checkpointId,
final long timestamp,
@Nonnull final CheckpointStreamFactory streamFactory,
@Nonnull CheckpointOptions checkpointOptions)
throws Exception {
SnapshotStrategyRunner snapshotStrategyRunner =
new SnapshotStrategyRunner<>(
"Heap backend snapshot",
checkpointStrategy,
cancelStreamRegistry,
snapshotExecutionType);
return snapshotStrategyRunner.snapshot(
checkpointId, timestamp, streamFactory, checkpointOptions);
}
@Nonnull
@Override
public SavepointResources savepoint() {
HeapSnapshotResources snapshotResources =
HeapSnapshotResources.create(
registeredKVStates,
priorityQueuesManager.getRegisteredPQStates(),
keyGroupCompressionDecorator,
keyGroupRange,
keySerializer,
numberOfKeyGroups);
return new SavepointResources<>(snapshotResources, snapshotExecutionType);
}
@Override
public void notifyCheckpointComplete(long checkpointId) {
// Nothing to do
}
@Override
public void notifyCheckpointAborted(long checkpointId) {
// nothing to do
}
@Override
public void applyToAllKeys(
final N namespace,
final TypeSerializer namespaceSerializer,
final StateDescriptor stateDescriptor,
final KeyedStateFunction function,
final PartitionStateFactory partitionStateFactory)
throws Exception {
try (Stream keyStream = getKeys(stateDescriptor.getName(), namespace)) {
// we copy the keys into list to avoid the concurrency problem
// when state.clear() is invoked in function.process().
final List keys = keyStream.collect(Collectors.toList());
final S state =
partitionStateFactory.get(namespace, namespaceSerializer, stateDescriptor);
for (K key : keys) {
setCurrentKey(key);
function.process(key, state);
}
}
}
@Override
public String toString() {
return "HeapKeyedStateBackend";
}
/** Returns the total number of state entries across all keys/namespaces. */
@VisibleForTesting
@Override
public int numKeyValueStateEntries() {
int sum = 0;
for (StateSnapshotRestore state : registeredKVStates.values()) {
sum += ((StateTable, ?, ?>) state).size();
}
return sum;
}
/** Returns the total number of state entries across all keys for the given namespace. */
@VisibleForTesting
public int numKeyValueStateEntries(Object namespace) {
int sum = 0;
for (StateTable, ?, ?> state : registeredKVStates.values()) {
sum += state.sizeOfNamespace(namespace);
}
return sum;
}
@VisibleForTesting
public LocalRecoveryConfig getLocalRecoveryConfig() {
return localRecoveryConfig;
}
private interface StateCreateFactory {
IS createState(
StateDescriptor stateDesc,
StateTable stateTable,
TypeSerializer keySerializer)
throws Exception;
}
private interface StateUpdateFactory {
IS updateState(
StateDescriptor stateDesc, StateTable stateTable, IS existingState)
throws Exception;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy