org.apache.flink.runtime.state.StateSerializerProvider Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state;
import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerSchemaCompatibility;
import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot;
import org.apache.flink.api.common.typeutils.UnloadableDummyTypeSerializer;
import org.apache.flink.util.Preconditions;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;
/**
* A {@link StateSerializerProvider} wraps logic on how to obtain serializers for registered state,
* either with the previous schema of state in checkpoints or the current schema of state.
*
* A provider can be created from either a registered state serializer, or the snapshot of the
* previous state serializer. For the former case, if the state was restored and a snapshot of the
* previous state serializer was retrieved later on, the snapshot can be set on the provider which
* also additionally checks the compatibility of the initially registered serializer. Similarly for
* the latter case, if a new state serializer is registered later on, it can be set on the provider,
* which then also checks the compatibility of the new registered serializer.
*
*
Simply put, the provider works both directions - either creating it first with a registered
* serializer or the previous serializer's snapshot, and then setting the previous serializer's
* snapshot (if the provider was created with a registered serializer) or a new registered state
* serializer (if the provider was created with a serializer snapshot). Either way, the new
* registered serializer is checked for schema compatibility once both the new serializer and the
* previous serializer snapshot is present.
*
* @param the type of the state.
*/
@Internal
public abstract class StateSerializerProvider {
/**
* The registered serializer for the state.
*
* In the case that this provider was created from a restored serializer snapshot via {@link
* #fromPreviousSerializerSnapshot(TypeSerializerSnapshot)}, but a new serializer was never
* registered for the state (i.e., this is the case if a restored state was never accessed),
* this would be {@code null}.
*/
@Nullable TypeSerializer registeredSerializer;
/**
* The state's previous serializer's snapshot.
*
* In the case that this provider was created from a registered state serializer instance via
* {@link #fromNewRegisteredSerializer(TypeSerializer)}, but a serializer snapshot was never
* supplied to this provider (i.e. because the registered serializer was for a new state, not a
* restored one), this would be {@code null}.
*/
@Nullable TypeSerializerSnapshot previousSerializerSnapshot;
/**
* The restore serializer, lazily created only when the restore serializer is accessed.
*
* NOTE: It is important to only create this lazily, so that off-heap state do not fail
* eagerly when restoring state that has a {@link UnloadableDummyTypeSerializer} as the previous
* serializer. This should be relevant only for restores from Flink versions prior to 1.7.x.
*/
@Nullable private TypeSerializer cachedRestoredSerializer;
private boolean isRegisteredWithIncompatibleSerializer = false;
/**
* Creates a {@link StateSerializerProvider} for restored state from the previous serializer's
* snapshot.
*
* Once a new serializer is registered for the state, it should be provided via the {@link
* #registerNewSerializerForRestoredState(TypeSerializer)} method.
*
* @param stateSerializerSnapshot the previous serializer's snapshot.
* @param the type of the state.
* @return a new {@link StateSerializerProvider}.
*/
public static StateSerializerProvider fromPreviousSerializerSnapshot(
TypeSerializerSnapshot stateSerializerSnapshot) {
return new LazilyRegisteredStateSerializerProvider<>(stateSerializerSnapshot);
}
/**
* Creates a {@link StateSerializerProvider} from the registered state serializer.
*
* If the state is a restored one, and the previous serializer's snapshot is obtained later
* on, is should be supplied via the {@link
* #setPreviousSerializerSnapshotForRestoredState(TypeSerializerSnapshot)} method.
*
* @param registeredStateSerializer the new state's registered serializer.
* @param the type of the state.
* @return a new {@link StateSerializerProvider}.
*/
public static StateSerializerProvider fromNewRegisteredSerializer(
TypeSerializer registeredStateSerializer) {
return new EagerlyRegisteredStateSerializerProvider<>(registeredStateSerializer);
}
private StateSerializerProvider(@Nonnull TypeSerializer stateSerializer) {
this.registeredSerializer = stateSerializer;
this.previousSerializerSnapshot = null;
}
private StateSerializerProvider(@Nonnull TypeSerializerSnapshot previousSerializerSnapshot) {
this.previousSerializerSnapshot = previousSerializerSnapshot;
this.registeredSerializer = null;
}
/**
* Gets the serializer that recognizes the current serialization schema of the state. This is
* the serializer that should be used for regular state serialization and deserialization after
* state has been restored.
*
* If this provider was created from a restored state's serializer snapshot, while a new
* serializer (with a new schema) was not registered for the state (i.e., because the state was
* never accessed after it was restored), then the schema of state remains identical. Therefore,
* in this case, it is guaranteed that the serializer returned by this method is the same as the
* one returned by {@link #previousSchemaSerializer()}.
*
*
If this provider was created from a serializer instance, then this always returns the that
* same serializer instance. If later on a snapshot of the previous serializer is supplied via
* {@link #setPreviousSerializerSnapshotForRestoredState(TypeSerializerSnapshot)}, then the
* initially supplied serializer instance will be checked for compatibility.
*
* @return a serializer that reads and writes in the current schema of the state.
*/
@Nonnull
public final TypeSerializer currentSchemaSerializer() {
if (registeredSerializer != null) {
checkState(
!isRegisteredWithIncompatibleSerializer,
"Unable to provide a serializer with the current schema, because the restored state was "
+ "registered with a new serializer that has incompatible schema.");
return registeredSerializer;
}
// if we are not yet registered with a new serializer,
// we can just use the restore serializer to read / write the state.
return previousSchemaSerializer();
}
/**
* Gets the serializer that recognizes the previous serialization schema of the state. This is
* the serializer that should be used for restoring the state, i.e. when the state is still in
* the previous serialization schema.
*
* This method only returns a serializer if this provider has the previous serializer's
* snapshot. Otherwise, trying to access the previous schema serializer will fail with an
* exception.
*
* @return a serializer that reads and writes in the previous schema of the state.
*/
@Nonnull
public final TypeSerializer previousSchemaSerializer() {
if (cachedRestoredSerializer != null) {
return cachedRestoredSerializer;
}
if (previousSerializerSnapshot == null) {
throw new UnsupportedOperationException(
"This provider does not contain the state's previous serializer's snapshot. Cannot provider a serializer for previous schema.");
}
this.cachedRestoredSerializer = previousSerializerSnapshot.restoreSerializer();
return cachedRestoredSerializer;
}
/**
* Gets the previous serializer snapshot.
*
* @return The previous serializer snapshot, or null if registered serializer was for a new
* state, not a restored one.
*/
@Nullable
public final TypeSerializerSnapshot getPreviousSerializerSnapshot() {
return previousSerializerSnapshot;
}
/**
* For restored state, register a new serializer that potentially has a new serialization
* schema.
*
* Users are allowed to register serializers for state only once. Therefore, this method is
* irrelevant if this provider was created with a serializer instance, since a state serializer
* had been registered already.
*
*
For the case where this provider was created from a serializer snapshot, then this method
* should be called at most once. The new serializer will be checked for its schema
* compatibility with the previous serializer's schema, and returned to the caller. The caller
* is responsible for checking the result and react appropriately to it, as follows:
*
*
* - {@link TypeSerializerSchemaCompatibility#isCompatibleAsIs()}: nothing needs to be done.
* {@link #currentSchemaSerializer()} now returns the newly registered serializer.
*
- {@link TypeSerializerSchemaCompatibility#isCompatibleAfterMigration()} ()}: state needs
* to be migrated before the serializer returned by {@link #currentSchemaSerializer()} can
* be used. The migration should be performed by reading the state with {@link
* #previousSchemaSerializer()}, and then writing it again with {@link
* #currentSchemaSerializer()}.
*
- {@link TypeSerializerSchemaCompatibility#isIncompatible()}: the registered serializer
* is incompatible. {@link #currentSchemaSerializer()} can no longer return a serializer
* for the state, and therefore this provider shouldn't be used anymore.
*
*
* @return the schema compatibility of the new registered serializer, with respect to the
* previous serializer.
*/
@Nonnull
public abstract TypeSerializerSchemaCompatibility registerNewSerializerForRestoredState(
TypeSerializer newSerializer);
/**
* For restored state, set the state's previous serializer's snapshot.
*
* Users are allowed to set the previous serializer's snapshot once. Therefore, this method
* is irrelevant if this provider was created with a serializer snapshot, since the serializer
* snapshot had been set already.
*
*
For the case where this provider was created from a serializer instance, then this method
* should be called at most once. The initially registered state serializer will be checked for
* its schema compatibility with the previous serializer's schema, and returned to the caller.
* The caller is responsible for checking the result and react appropriately to it, as follows:
*
*
* - {@link TypeSerializerSchemaCompatibility#isCompatibleAsIs()}: nothing needs to be done.
* {@link #currentSchemaSerializer()} remains to return the initially registered
* serializer.
*
- {@link TypeSerializerSchemaCompatibility#isCompatibleAfterMigration()} ()}: state needs
* to be migrated before the serializer returned by {@link #currentSchemaSerializer()} can
* be used. The migration should be performed by reading the state with {@link
* #previousSchemaSerializer()}, and then writing it again with {@link
* #currentSchemaSerializer()}.
*
- {@link TypeSerializerSchemaCompatibility#isIncompatible()}: the registered serializer
* is incompatible. {@link #currentSchemaSerializer()} can no longer return a serializer
* for the state, and therefore this provider shouldn't be used anymore.
*
*
* @param previousSerializerSnapshot the state's previous serializer's snapshot
* @return the schema compatibility of the initially registered serializer, with respect to the
* previous serializer.
*/
@Nonnull
public abstract TypeSerializerSchemaCompatibility
setPreviousSerializerSnapshotForRestoredState(
TypeSerializerSnapshot previousSerializerSnapshot);
/**
* Invalidates access to the current schema serializer. This lets {@link
* #currentSchemaSerializer()} fail when invoked.
*
* Access to the current schema serializer should be invalidated by the methods {@link
* #registerNewSerializerForRestoredState(TypeSerializer)} or {@link
* #setPreviousSerializerSnapshotForRestoredState(TypeSerializerSnapshot)} once the registered
* serializer is determined to be incompatible.
*/
protected final void invalidateCurrentSchemaSerializerAccess() {
this.isRegisteredWithIncompatibleSerializer = true;
}
/**
* Implementation of the {@link StateSerializerProvider} for the case where a snapshot of the
* previous serializer is obtained before a new state serializer is registered (hence, the
* naming "lazily" registered).
*/
private static class LazilyRegisteredStateSerializerProvider
extends StateSerializerProvider {
LazilyRegisteredStateSerializerProvider(
TypeSerializerSnapshot previousSerializerSnapshot) {
super(Preconditions.checkNotNull(previousSerializerSnapshot));
}
@Nonnull
@Override
@SuppressWarnings("ConstantConditions")
public TypeSerializerSchemaCompatibility registerNewSerializerForRestoredState(
TypeSerializer newSerializer) {
checkNotNull(newSerializer);
if (registeredSerializer != null) {
throw new UnsupportedOperationException(
"A serializer has already been registered for the state; re-registration is not allowed.");
}
TypeSerializerSchemaCompatibility result =
previousSerializerSnapshot.resolveSchemaCompatibility(newSerializer);
if (result.isIncompatible()) {
invalidateCurrentSchemaSerializerAccess();
}
if (result.isCompatibleWithReconfiguredSerializer()) {
this.registeredSerializer = result.getReconfiguredSerializer();
} else {
this.registeredSerializer = newSerializer;
}
return result;
}
@Nonnull
@Override
public TypeSerializerSchemaCompatibility setPreviousSerializerSnapshotForRestoredState(
TypeSerializerSnapshot previousSerializerSnapshot) {
throw new UnsupportedOperationException(
"The snapshot of the state's previous serializer has already been set; cannot reset.");
}
}
/**
* Implementation of the {@link StateSerializerProvider} for the case where a new state
* serializer instance is registered first, before any snapshots of the previous state
* serializer is obtained (hence, the naming "eagerly" registered).
*/
private static class EagerlyRegisteredStateSerializerProvider
extends StateSerializerProvider {
EagerlyRegisteredStateSerializerProvider(TypeSerializer registeredStateSerializer) {
super(Preconditions.checkNotNull(registeredStateSerializer));
}
@Nonnull
@Override
public TypeSerializerSchemaCompatibility registerNewSerializerForRestoredState(
TypeSerializer newSerializer) {
throw new UnsupportedOperationException(
"A serializer has already been registered for the state; re-registration is not allowed.");
}
@Nonnull
@Override
public TypeSerializerSchemaCompatibility setPreviousSerializerSnapshotForRestoredState(
TypeSerializerSnapshot previousSerializerSnapshot) {
checkNotNull(previousSerializerSnapshot);
if (this.previousSerializerSnapshot != null) {
throw new UnsupportedOperationException(
"The snapshot of the state's previous serializer has already been set; cannot reset.");
}
this.previousSerializerSnapshot = previousSerializerSnapshot;
TypeSerializerSchemaCompatibility result =
previousSerializerSnapshot.resolveSchemaCompatibility(registeredSerializer);
if (result.isIncompatible()) {
invalidateCurrentSchemaSerializerAccess();
}
if (result.isCompatibleWithReconfiguredSerializer()) {
this.registeredSerializer = result.getReconfiguredSerializer();
}
return result;
}
}
}