org.apache.flink.runtime.state.StateSerializerProvider Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state;
import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerSchemaCompatibility;
import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot;
import org.apache.flink.api.common.typeutils.UnloadableDummyTypeSerializer;
import org.apache.flink.util.Preconditions;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;
/**
* A {@link StateSerializerProvider} wraps logic on how to obtain serializers for registered state,
* either with the previous schema of state in checkpoints or the current schema of state.
*
* A provider can be created from either a registered state serializer, or the snapshot
* of the previous state serializer. For the former case, if the state was restored and a
* snapshot of the previous state serializer was retrieved later on, the snapshot can be set
* on the provider which also additionally checks the compatibility of the initially registered
* serializer. Similarly for the latter case, if a new state serializer is registered later on,
* it can be set on the provider, which then also checks the compatibility of the new registered
* serializer.
*
*
Simply put, the provider works both directions - either creating it first with a registered
* serializer or the previous serializer's snapshot, and then setting the previous serializer's
* snapshot (if the provider was created with a registered serializer) or a new registered state
* serializer (if the provider was created with a serializer snapshot). Either way,
* the new registered serializer is checked for schema compatibility once both the new serializer
* and the previous serializer snapshot is present.
*
* @param the type of the state.
*/
@Internal
public abstract class StateSerializerProvider {
/**
* The registered serializer for the state.
*
* In the case that this provider was created from a restored serializer snapshot via
* {@link #fromPreviousSerializerSnapshot(TypeSerializerSnapshot)}, but a new serializer was never registered
* for the state (i.e., this is the case if a restored state was never accessed), this would be {@code null}.
*/
@Nullable
TypeSerializer registeredSerializer;
/**
* The state's previous serializer's snapshot.
*
* In the case that this provider was created from a registered state serializer instance via
* {@link #fromNewRegisteredSerializer(TypeSerializer)}, but a serializer snapshot was never supplied to this
* provider (i.e. because the registered serializer was for a new state, not a restored one), this
* would be {@code null}.
*/
@Nullable
TypeSerializerSnapshot previousSerializerSnapshot;
/**
* The restore serializer, lazily created only when the restore serializer is accessed.
*
* NOTE: It is important to only create this lazily, so that off-heap
* state do not fail eagerly when restoring state that has a
* {@link UnloadableDummyTypeSerializer} as the previous serializer. This should
* be relevant only for restores from Flink versions prior to 1.7.x.
*/
@Nullable
private TypeSerializer cachedRestoredSerializer;
private boolean isRegisteredWithIncompatibleSerializer = false;
/**
* Creates a {@link StateSerializerProvider} for restored state from the previous serializer's snapshot.
*
* Once a new serializer is registered for the state, it should be provided via
* the {@link #registerNewSerializerForRestoredState(TypeSerializer)} method.
*
* @param stateSerializerSnapshot the previous serializer's snapshot.
* @param the type of the state.
*
* @return a new {@link StateSerializerProvider}.
*/
public static StateSerializerProvider fromPreviousSerializerSnapshot(TypeSerializerSnapshot stateSerializerSnapshot) {
return new LazilyRegisteredStateSerializerProvider<>(stateSerializerSnapshot);
}
/**
* Creates a {@link StateSerializerProvider} from the registered state serializer.
*
* If the state is a restored one, and the previous serializer's snapshot is
* obtained later on, is should be supplied via the
* {@link #setPreviousSerializerSnapshotForRestoredState(TypeSerializerSnapshot)} method.
*
* @param registeredStateSerializer the new state's registered serializer.
* @param the type of the state.
*
* @return a new {@link StateSerializerProvider}.
*/
public static StateSerializerProvider fromNewRegisteredSerializer(TypeSerializer registeredStateSerializer) {
return new EagerlyRegisteredStateSerializerProvider<>(registeredStateSerializer);
}
private StateSerializerProvider(@Nonnull TypeSerializer stateSerializer) {
this.registeredSerializer = stateSerializer;
this.previousSerializerSnapshot = null;
}
private StateSerializerProvider(@Nonnull TypeSerializerSnapshot previousSerializerSnapshot) {
this.previousSerializerSnapshot = previousSerializerSnapshot;
this.registeredSerializer = null;
}
/**
* Gets the serializer that recognizes the current serialization schema of the state.
* This is the serializer that should be used for regular state serialization and
* deserialization after state has been restored.
*
* If this provider was created from a restored state's serializer snapshot, while a
* new serializer (with a new schema) was not registered for the state (i.e., because
* the state was never accessed after it was restored), then the schema of state remains
* identical. Therefore, in this case, it is guaranteed that the serializer returned by
* this method is the same as the one returned by {@link #previousSchemaSerializer()}.
*
*
If this provider was created from a serializer instance, then this always returns the
* that same serializer instance. If later on a snapshot of the previous serializer is supplied
* via {@link #setPreviousSerializerSnapshotForRestoredState(TypeSerializerSnapshot)}, then
* the initially supplied serializer instance will be checked for compatibility.
*
* @return a serializer that reads and writes in the current schema of the state.
*/
@Nonnull
public final TypeSerializer currentSchemaSerializer() {
if (registeredSerializer != null) {
checkState(
!isRegisteredWithIncompatibleSerializer,
"Unable to provide a serializer with the current schema, because the restored state was " +
"registered with a new serializer that has incompatible schema.");
return registeredSerializer;
}
// if we are not yet registered with a new serializer,
// we can just use the restore serializer to read / write the state.
return previousSchemaSerializer();
}
/**
* Gets the serializer that recognizes the previous serialization schema of the state.
* This is the serializer that should be used for restoring the state, i.e. when the state
* is still in the previous serialization schema.
*
* This method only returns a serializer if this provider has the previous serializer's
* snapshot. Otherwise, trying to access the previous schema serializer will fail
* with an exception.
*
* @return a serializer that reads and writes in the previous schema of the state.
*/
@Nonnull
public final TypeSerializer previousSchemaSerializer() {
if (cachedRestoredSerializer != null) {
return cachedRestoredSerializer;
}
if (previousSerializerSnapshot == null) {
throw new UnsupportedOperationException(
"This provider does not contain the state's previous serializer's snapshot. Cannot provider a serializer for previous schema.");
}
this.cachedRestoredSerializer = previousSerializerSnapshot.restoreSerializer();
return cachedRestoredSerializer;
}
/**
* Gets the previous serializer snapshot.
*
* @return The previous serializer snapshot, or null if registered serializer was for a new state, not a restored one.
*/
@Nullable
public final TypeSerializerSnapshot getPreviousSerializerSnapshot() {
return previousSerializerSnapshot;
}
/**
* For restored state, register a new serializer that potentially has a new serialization schema.
*
* Users are allowed to register serializers for state only once. Therefore, this method
* is irrelevant if this provider was created with a serializer instance, since a state serializer had
* been registered already.
*
*
For the case where this provider was created from a serializer snapshot, then this method should
* be called at most once. The new serializer will be checked for its schema compatibility with the
* previous serializer's schema, and returned to the caller. The caller is responsible for
* checking the result and react appropriately to it, as follows:
*
* - {@link TypeSerializerSchemaCompatibility#isCompatibleAsIs()}: nothing needs to be done.
* {@link #currentSchemaSerializer()} now returns the newly registered serializer.
* - {@link TypeSerializerSchemaCompatibility#isCompatibleAfterMigration()} ()}: state needs to be
* migrated before the serializer returned by {@link #currentSchemaSerializer()} can be used.
* The migration should be performed by reading the state with {@link #previousSchemaSerializer()},
* and then writing it again with {@link #currentSchemaSerializer()}.
* - {@link TypeSerializerSchemaCompatibility#isIncompatible()}: the registered serializer is
* incompatible. {@link #currentSchemaSerializer()} can no longer return a serializer for
* the state, and therefore this provider shouldn't be used anymore.
*
*
* @return the schema compatibility of the new registered serializer, with respect to the previous serializer.
*/
@Nonnull
public abstract TypeSerializerSchemaCompatibility registerNewSerializerForRestoredState(TypeSerializer newSerializer);
/**
* For restored state, set the state's previous serializer's snapshot.
*
* Users are allowed to set the previous serializer's snapshot once. Therefore, this method
* is irrelevant if this provider was created with a serializer snapshot, since the serializer
* snapshot had been set already.
*
*
For the case where this provider was created from a serializer instance, then this method should
* be called at most once. The initially registered state serializer will be checked for its
* schema compatibility with the previous serializer's schema, and returned to the caller.
* The caller is responsible for checking the result and react appropriately to it, as follows:
*
* - {@link TypeSerializerSchemaCompatibility#isCompatibleAsIs()}: nothing needs to be done.
* {@link #currentSchemaSerializer()} remains to return the initially registered serializer.
* - {@link TypeSerializerSchemaCompatibility#isCompatibleAfterMigration()} ()}: state needs to be
* migrated before the serializer returned by {@link #currentSchemaSerializer()} can be used.
* The migration should be performed by reading the state with {@link #previousSchemaSerializer()},
* and then writing it again with {@link #currentSchemaSerializer()}.
* - {@link TypeSerializerSchemaCompatibility#isIncompatible()}: the registered serializer is
* incompatible. {@link #currentSchemaSerializer()} can no longer return a serializer for
* the state, and therefore this provider shouldn't be used anymore.
*
*
* @param previousSerializerSnapshot the state's previous serializer's snapshot
*
* @return the schema compatibility of the initially registered serializer, with respect to the previous serializer.
*/
@Nonnull
public abstract TypeSerializerSchemaCompatibility setPreviousSerializerSnapshotForRestoredState(TypeSerializerSnapshot previousSerializerSnapshot);
/**
* Invalidates access to the current schema serializer. This lets {@link #currentSchemaSerializer()}
* fail when invoked.
*
* Access to the current schema serializer should be invalidated by the methods
* {@link #registerNewSerializerForRestoredState(TypeSerializer)} or
* {@link #setPreviousSerializerSnapshotForRestoredState(TypeSerializerSnapshot)}
* once the registered serializer is determined to be incompatible.
*/
protected final void invalidateCurrentSchemaSerializerAccess() {
this.isRegisteredWithIncompatibleSerializer = true;
}
/**
* Implementation of the {@link StateSerializerProvider} for the case where a snapshot of the
* previous serializer is obtained before a new state serializer is registered (hence, the naming "lazily" registered).
*/
private static class LazilyRegisteredStateSerializerProvider extends StateSerializerProvider {
LazilyRegisteredStateSerializerProvider(TypeSerializerSnapshot previousSerializerSnapshot) {
super(Preconditions.checkNotNull(previousSerializerSnapshot));
}
@Nonnull
@Override
@SuppressWarnings("ConstantConditions")
public TypeSerializerSchemaCompatibility registerNewSerializerForRestoredState(TypeSerializer newSerializer) {
checkNotNull(newSerializer);
if (registeredSerializer != null) {
throw new UnsupportedOperationException("A serializer has already been registered for the state; re-registration is not allowed.");
}
TypeSerializerSchemaCompatibility result = previousSerializerSnapshot.resolveSchemaCompatibility(newSerializer);
if (result.isIncompatible()) {
invalidateCurrentSchemaSerializerAccess();
}
if (result.isCompatibleWithReconfiguredSerializer()) {
this.registeredSerializer = result.getReconfiguredSerializer();
} else {
this.registeredSerializer = newSerializer;
}
return result;
}
@Nonnull
@Override
public TypeSerializerSchemaCompatibility setPreviousSerializerSnapshotForRestoredState(
TypeSerializerSnapshot previousSerializerSnapshot) {
throw new UnsupportedOperationException("The snapshot of the state's previous serializer has already been set; cannot reset.");
}
}
/**
* Implementation of the {@link StateSerializerProvider} for the case where a new state
* serializer instance is registered first, before any snapshots of the previous state serializer
* is obtained (hence, the naming "eagerly" registered).
*/
private static class EagerlyRegisteredStateSerializerProvider extends StateSerializerProvider {
EagerlyRegisteredStateSerializerProvider(TypeSerializer registeredStateSerializer) {
super(Preconditions.checkNotNull(registeredStateSerializer));
}
@Nonnull
@Override
public TypeSerializerSchemaCompatibility registerNewSerializerForRestoredState(TypeSerializer newSerializer) {
throw new UnsupportedOperationException("A serializer has already been registered for the state; re-registration is not allowed.");
}
@Nonnull
@Override
public TypeSerializerSchemaCompatibility setPreviousSerializerSnapshotForRestoredState(TypeSerializerSnapshot previousSerializerSnapshot) {
checkNotNull(previousSerializerSnapshot);
if (this.previousSerializerSnapshot != null) {
throw new UnsupportedOperationException("The snapshot of the state's previous serializer has already been set; cannot reset.");
}
this.previousSerializerSnapshot = previousSerializerSnapshot;
TypeSerializerSchemaCompatibility result = previousSerializerSnapshot.resolveSchemaCompatibility(registeredSerializer);
if (result.isIncompatible()) {
invalidateCurrentSchemaSerializerAccess();
}
if (result.isCompatibleWithReconfiguredSerializer()) {
this.registeredSerializer = result.getReconfiguredSerializer();
}
return result;
}
}
}