All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.StateSerializerProvider Maven / Gradle / Ivy

There is a newer version: 1.13.6
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state;

import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerSchemaCompatibility;
import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot;
import org.apache.flink.api.common.typeutils.UnloadableDummyTypeSerializer;
import org.apache.flink.util.Preconditions;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;

/**
 * A {@link StateSerializerProvider} wraps logic on how to obtain serializers for registered state,
 * either with the previous schema of state in checkpoints or the current schema of state.
 *
 * 

A provider can be created from either a registered state serializer, or the snapshot of the * previous state serializer. For the former case, if the state was restored and a snapshot of the * previous state serializer was retrieved later on, the snapshot can be set on the provider which * also additionally checks the compatibility of the initially registered serializer. Similarly for * the latter case, if a new state serializer is registered later on, it can be set on the provider, * which then also checks the compatibility of the new registered serializer. * *

Simply put, the provider works both directions - either creating it first with a registered * serializer or the previous serializer's snapshot, and then setting the previous serializer's * snapshot (if the provider was created with a registered serializer) or a new registered state * serializer (if the provider was created with a serializer snapshot). Either way, the new * registered serializer is checked for schema compatibility once both the new serializer and the * previous serializer snapshot is present. * * @param the type of the state. */ @Internal public abstract class StateSerializerProvider { /** * The registered serializer for the state. * *

In the case that this provider was created from a restored serializer snapshot via {@link * #fromPreviousSerializerSnapshot(TypeSerializerSnapshot)}, but a new serializer was never * registered for the state (i.e., this is the case if a restored state was never accessed), * this would be {@code null}. */ @Nullable TypeSerializer registeredSerializer; /** * The state's previous serializer's snapshot. * *

In the case that this provider was created from a registered state serializer instance via * {@link #fromNewRegisteredSerializer(TypeSerializer)}, but a serializer snapshot was never * supplied to this provider (i.e. because the registered serializer was for a new state, not a * restored one), this would be {@code null}. */ @Nullable TypeSerializerSnapshot previousSerializerSnapshot; /** * The restore serializer, lazily created only when the restore serializer is accessed. * *

NOTE: It is important to only create this lazily, so that off-heap state do not fail * eagerly when restoring state that has a {@link UnloadableDummyTypeSerializer} as the previous * serializer. This should be relevant only for restores from Flink versions prior to 1.7.x. */ @Nullable private TypeSerializer cachedRestoredSerializer; private boolean isRegisteredWithIncompatibleSerializer = false; /** * Creates a {@link StateSerializerProvider} for restored state from the previous serializer's * snapshot. * *

Once a new serializer is registered for the state, it should be provided via the {@link * #registerNewSerializerForRestoredState(TypeSerializer)} method. * * @param stateSerializerSnapshot the previous serializer's snapshot. * @param the type of the state. * @return a new {@link StateSerializerProvider}. */ public static StateSerializerProvider fromPreviousSerializerSnapshot( TypeSerializerSnapshot stateSerializerSnapshot) { return new LazilyRegisteredStateSerializerProvider<>(stateSerializerSnapshot); } /** * Creates a {@link StateSerializerProvider} from the registered state serializer. * *

If the state is a restored one, and the previous serializer's snapshot is obtained later * on, is should be supplied via the {@link * #setPreviousSerializerSnapshotForRestoredState(TypeSerializerSnapshot)} method. * * @param registeredStateSerializer the new state's registered serializer. * @param the type of the state. * @return a new {@link StateSerializerProvider}. */ public static StateSerializerProvider fromNewRegisteredSerializer( TypeSerializer registeredStateSerializer) { return new EagerlyRegisteredStateSerializerProvider<>(registeredStateSerializer); } private StateSerializerProvider(@Nonnull TypeSerializer stateSerializer) { this.registeredSerializer = stateSerializer; this.previousSerializerSnapshot = null; } private StateSerializerProvider(@Nonnull TypeSerializerSnapshot previousSerializerSnapshot) { this.previousSerializerSnapshot = previousSerializerSnapshot; this.registeredSerializer = null; } /** * Gets the serializer that recognizes the current serialization schema of the state. This is * the serializer that should be used for regular state serialization and deserialization after * state has been restored. * *

If this provider was created from a restored state's serializer snapshot, while a new * serializer (with a new schema) was not registered for the state (i.e., because the state was * never accessed after it was restored), then the schema of state remains identical. Therefore, * in this case, it is guaranteed that the serializer returned by this method is the same as the * one returned by {@link #previousSchemaSerializer()}. * *

If this provider was created from a serializer instance, then this always returns the that * same serializer instance. If later on a snapshot of the previous serializer is supplied via * {@link #setPreviousSerializerSnapshotForRestoredState(TypeSerializerSnapshot)}, then the * initially supplied serializer instance will be checked for compatibility. * * @return a serializer that reads and writes in the current schema of the state. */ @Nonnull public final TypeSerializer currentSchemaSerializer() { if (registeredSerializer != null) { checkState( !isRegisteredWithIncompatibleSerializer, "Unable to provide a serializer with the current schema, because the restored state was " + "registered with a new serializer that has incompatible schema."); return registeredSerializer; } // if we are not yet registered with a new serializer, // we can just use the restore serializer to read / write the state. return previousSchemaSerializer(); } /** * Gets the serializer that recognizes the previous serialization schema of the state. This is * the serializer that should be used for restoring the state, i.e. when the state is still in * the previous serialization schema. * *

This method only returns a serializer if this provider has the previous serializer's * snapshot. Otherwise, trying to access the previous schema serializer will fail with an * exception. * * @return a serializer that reads and writes in the previous schema of the state. */ @Nonnull public final TypeSerializer previousSchemaSerializer() { if (cachedRestoredSerializer != null) { return cachedRestoredSerializer; } if (previousSerializerSnapshot == null) { throw new UnsupportedOperationException( "This provider does not contain the state's previous serializer's snapshot. Cannot provider a serializer for previous schema."); } this.cachedRestoredSerializer = previousSerializerSnapshot.restoreSerializer(); return cachedRestoredSerializer; } /** * Gets the previous serializer snapshot. * * @return The previous serializer snapshot, or null if registered serializer was for a new * state, not a restored one. */ @Nullable public final TypeSerializerSnapshot getPreviousSerializerSnapshot() { return previousSerializerSnapshot; } /** * For restored state, register a new serializer that potentially has a new serialization * schema. * *

Users are allowed to register serializers for state only once. Therefore, this method is * irrelevant if this provider was created with a serializer instance, since a state serializer * had been registered already. * *

For the case where this provider was created from a serializer snapshot, then this method * should be called at most once. The new serializer will be checked for its schema * compatibility with the previous serializer's schema, and returned to the caller. The caller * is responsible for checking the result and react appropriately to it, as follows: * *

    *
  • {@link TypeSerializerSchemaCompatibility#isCompatibleAsIs()}: nothing needs to be done. * {@link #currentSchemaSerializer()} now returns the newly registered serializer. *
  • {@link TypeSerializerSchemaCompatibility#isCompatibleAfterMigration()} ()}: state needs * to be migrated before the serializer returned by {@link #currentSchemaSerializer()} can * be used. The migration should be performed by reading the state with {@link * #previousSchemaSerializer()}, and then writing it again with {@link * #currentSchemaSerializer()}. *
  • {@link TypeSerializerSchemaCompatibility#isIncompatible()}: the registered serializer * is incompatible. {@link #currentSchemaSerializer()} can no longer return a serializer * for the state, and therefore this provider shouldn't be used anymore. *
* * @return the schema compatibility of the new registered serializer, with respect to the * previous serializer. */ @Nonnull public abstract TypeSerializerSchemaCompatibility registerNewSerializerForRestoredState( TypeSerializer newSerializer); /** * For restored state, set the state's previous serializer's snapshot. * *

Users are allowed to set the previous serializer's snapshot once. Therefore, this method * is irrelevant if this provider was created with a serializer snapshot, since the serializer * snapshot had been set already. * *

For the case where this provider was created from a serializer instance, then this method * should be called at most once. The initially registered state serializer will be checked for * its schema compatibility with the previous serializer's schema, and returned to the caller. * The caller is responsible for checking the result and react appropriately to it, as follows: * *

    *
  • {@link TypeSerializerSchemaCompatibility#isCompatibleAsIs()}: nothing needs to be done. * {@link #currentSchemaSerializer()} remains to return the initially registered * serializer. *
  • {@link TypeSerializerSchemaCompatibility#isCompatibleAfterMigration()} ()}: state needs * to be migrated before the serializer returned by {@link #currentSchemaSerializer()} can * be used. The migration should be performed by reading the state with {@link * #previousSchemaSerializer()}, and then writing it again with {@link * #currentSchemaSerializer()}. *
  • {@link TypeSerializerSchemaCompatibility#isIncompatible()}: the registered serializer * is incompatible. {@link #currentSchemaSerializer()} can no longer return a serializer * for the state, and therefore this provider shouldn't be used anymore. *
* * @param previousSerializerSnapshot the state's previous serializer's snapshot * @return the schema compatibility of the initially registered serializer, with respect to the * previous serializer. */ @Nonnull public abstract TypeSerializerSchemaCompatibility setPreviousSerializerSnapshotForRestoredState( TypeSerializerSnapshot previousSerializerSnapshot); /** * Invalidates access to the current schema serializer. This lets {@link * #currentSchemaSerializer()} fail when invoked. * *

Access to the current schema serializer should be invalidated by the methods {@link * #registerNewSerializerForRestoredState(TypeSerializer)} or {@link * #setPreviousSerializerSnapshotForRestoredState(TypeSerializerSnapshot)} once the registered * serializer is determined to be incompatible. */ protected final void invalidateCurrentSchemaSerializerAccess() { this.isRegisteredWithIncompatibleSerializer = true; } /** * Implementation of the {@link StateSerializerProvider} for the case where a snapshot of the * previous serializer is obtained before a new state serializer is registered (hence, the * naming "lazily" registered). */ private static class LazilyRegisteredStateSerializerProvider extends StateSerializerProvider { LazilyRegisteredStateSerializerProvider( TypeSerializerSnapshot previousSerializerSnapshot) { super(Preconditions.checkNotNull(previousSerializerSnapshot)); } @Nonnull @Override @SuppressWarnings("ConstantConditions") public TypeSerializerSchemaCompatibility registerNewSerializerForRestoredState( TypeSerializer newSerializer) { checkNotNull(newSerializer); if (registeredSerializer != null) { throw new UnsupportedOperationException( "A serializer has already been registered for the state; re-registration is not allowed."); } TypeSerializerSchemaCompatibility result = previousSerializerSnapshot.resolveSchemaCompatibility(newSerializer); if (result.isIncompatible()) { invalidateCurrentSchemaSerializerAccess(); } if (result.isCompatibleWithReconfiguredSerializer()) { this.registeredSerializer = result.getReconfiguredSerializer(); } else { this.registeredSerializer = newSerializer; } return result; } @Nonnull @Override public TypeSerializerSchemaCompatibility setPreviousSerializerSnapshotForRestoredState( TypeSerializerSnapshot previousSerializerSnapshot) { throw new UnsupportedOperationException( "The snapshot of the state's previous serializer has already been set; cannot reset."); } } /** * Implementation of the {@link StateSerializerProvider} for the case where a new state * serializer instance is registered first, before any snapshots of the previous state * serializer is obtained (hence, the naming "eagerly" registered). */ private static class EagerlyRegisteredStateSerializerProvider extends StateSerializerProvider { EagerlyRegisteredStateSerializerProvider(TypeSerializer registeredStateSerializer) { super(Preconditions.checkNotNull(registeredStateSerializer)); } @Nonnull @Override public TypeSerializerSchemaCompatibility registerNewSerializerForRestoredState( TypeSerializer newSerializer) { throw new UnsupportedOperationException( "A serializer has already been registered for the state; re-registration is not allowed."); } @Nonnull @Override public TypeSerializerSchemaCompatibility setPreviousSerializerSnapshotForRestoredState( TypeSerializerSnapshot previousSerializerSnapshot) { checkNotNull(previousSerializerSnapshot); if (this.previousSerializerSnapshot != null) { throw new UnsupportedOperationException( "The snapshot of the state's previous serializer has already been set; cannot reset."); } this.previousSerializerSnapshot = previousSerializerSnapshot; TypeSerializerSchemaCompatibility result = previousSerializerSnapshot.resolveSchemaCompatibility(registeredSerializer); if (result.isIncompatible()) { invalidateCurrentSchemaSerializerAccess(); } if (result.isCompatibleWithReconfiguredSerializer()) { this.registeredSerializer = result.getReconfiguredSerializer(); } return result; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy