All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.state.api.SavepointReader Maven / Gradle / Ivy

There is a newer version: 2.0-preview1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.state.api;

import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.InvalidProgramException;
import org.apache.flink.api.common.functions.InvalidTypesException;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.Utils;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.runtime.checkpoint.OperatorState;
import org.apache.flink.runtime.checkpoint.metadata.CheckpointMetadata;
import org.apache.flink.runtime.state.StateBackend;
import org.apache.flink.runtime.state.VoidNamespace;
import org.apache.flink.state.api.functions.KeyedStateReaderFunction;
import org.apache.flink.state.api.input.BroadcastStateInputFormat;
import org.apache.flink.state.api.input.KeyedStateInputFormat;
import org.apache.flink.state.api.input.ListStateInputFormat;
import org.apache.flink.state.api.input.SourceBuilder;
import org.apache.flink.state.api.input.UnionStateInputFormat;
import org.apache.flink.state.api.input.operator.KeyedStateReaderOperator;
import org.apache.flink.state.api.runtime.MutableConfig;
import org.apache.flink.state.api.runtime.SavepointLoader;
import org.apache.flink.state.api.runtime.metadata.SavepointMetadataV2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.WindowAssigner;
import org.apache.flink.streaming.api.windowing.windows.Window;
import org.apache.flink.util.Preconditions;

import javax.annotation.Nullable;

import java.io.IOException;
import java.util.Comparator;

/** The entry point for reading state from a Flink savepoint. */
@PublicEvolving
public class SavepointReader {

    /**
     * Loads an existing savepoint. Useful if you want to query the state of an existing
     * application. The savepoint will be read using the state backend defined via the clusters
     * configuration.
     *
     * @param env The execution environment used to transform the savepoint.
     * @param path The path to an existing savepoint on disk.
     * @return A {@link SavepointReader}.
     */
    public static SavepointReader read(StreamExecutionEnvironment env, String path)
            throws IOException {
        CheckpointMetadata metadata = SavepointLoader.loadSavepointMetadata(path);

        int maxParallelism =
                metadata.getOperatorStates().stream()
                        .map(OperatorState::getMaxParallelism)
                        .max(Comparator.naturalOrder())
                        .orElseThrow(
                                () ->
                                        new RuntimeException(
                                                "Savepoint must contain at least one operator state."));

        SavepointMetadataV2 savepointMetadata =
                new SavepointMetadataV2(
                        maxParallelism, metadata.getMasterStates(), metadata.getOperatorStates());
        return new SavepointReader(env, savepointMetadata, null);
    }

    /**
     * Loads an existing savepoint. Useful if you want to query the state of an existing
     * application.
     *
     * @param env The execution environment used to transform the savepoint.
     * @param path The path to an existing savepoint on disk.
     * @param stateBackend The state backend of the savepoint.
     * @return A {@link SavepointReader}.
     */
    public static SavepointReader read(
            StreamExecutionEnvironment env, String path, StateBackend stateBackend)
            throws IOException {
        CheckpointMetadata metadata = SavepointLoader.loadSavepointMetadata(path);

        int maxParallelism =
                metadata.getOperatorStates().stream()
                        .map(OperatorState::getMaxParallelism)
                        .max(Comparator.naturalOrder())
                        .orElseThrow(
                                () ->
                                        new RuntimeException(
                                                "Savepoint must contain at least one operator state."));

        SavepointMetadataV2 savepointMetadata =
                new SavepointMetadataV2(
                        maxParallelism, metadata.getMasterStates(), metadata.getOperatorStates());
        return new SavepointReader(env, savepointMetadata, stateBackend);
    }

    /** The execution environment. Used for creating inputs for reading state. */
    private final StreamExecutionEnvironment env;

    /**
     * The savepoint metadata, which maintains the current set of existing / newly added operator
     * states.
     */
    private final SavepointMetadataV2 metadata;

    /**
     * The state backend that was previously used to write existing operator states in this
     * savepoint. If null, the reader will use the state backend defined via the cluster
     * configuration.
     */
    @Nullable private final StateBackend stateBackend;

    SavepointReader(
            StreamExecutionEnvironment env,
            SavepointMetadataV2 metadata,
            @Nullable StateBackend stateBackend) {
        Preconditions.checkNotNull(env, "The execution environment must not be null");
        Preconditions.checkNotNull(metadata, "The savepoint metadata must not be null");

        this.env = env;
        this.metadata = metadata;
        this.stateBackend = stateBackend;
    }

    /**
     * Read operator {@code ListState} from a {@code Savepoint}.
     *
     * @param uid The uid of the operator.
     * @param name The (unique) name for the state.
     * @param typeInfo The type of the elements in the state.
     * @param  The type of the values that are in the list state.
     * @return A {@code DataStream} representing the elements in state.
     * @throws IOException If the savepoint path is invalid or the uid does not exist.
     */
    public  DataStream readListState(String uid, String name, TypeInformation typeInfo)
            throws IOException {
        OperatorState operatorState = metadata.getOperatorState(uid);
        ListStateDescriptor descriptor = new ListStateDescriptor<>(name, typeInfo);
        ListStateInputFormat inputFormat =
                new ListStateInputFormat<>(
                        operatorState,
                        MutableConfig.of(env.getConfiguration()),
                        stateBackend,
                        descriptor);
        return SourceBuilder.fromFormat(env, inputFormat, typeInfo);
    }

    /**
     * Read operator {@code ListState} from a {@code Savepoint} when a custom serializer was used;
     * e.g., a different serializer than the one returned by {@code
     * TypeInformation#createSerializer}.
     *
     * @param uid The uid of the operator.
     * @param name The (unique) name for the state.
     * @param typeInfo The type of the elements in the state.
     * @param serializer The serializer used to write the elements into state.
     * @param  The type of the values that are in the list state.
     * @return A {@code DataStream} representing the elements in state.
     * @throws IOException If the savepoint path is invalid or the uid does not exist.
     */
    public  DataStream readListState(
            String uid, String name, TypeInformation typeInfo, TypeSerializer serializer)
            throws IOException {

        OperatorState operatorState = metadata.getOperatorState(uid);
        ListStateDescriptor descriptor = new ListStateDescriptor<>(name, serializer);
        ListStateInputFormat inputFormat =
                new ListStateInputFormat<>(
                        operatorState,
                        MutableConfig.of(env.getConfiguration()),
                        stateBackend,
                        descriptor);
        return SourceBuilder.fromFormat(env, inputFormat, typeInfo);
    }

    /**
     * Read operator {@code UnionState} from a {@code Savepoint}.
     *
     * @param uid The uid of the operator.
     * @param name The (unique) name for the state.
     * @param typeInfo The type of the elements in the state.
     * @param  The type of the values that are in the union state.
     * @return A {@code DataStream} representing the elements in state.
     * @throws IOException If the savepoint path is invalid or the uid does not exist.
     */
    public  DataStream readUnionState(String uid, String name, TypeInformation typeInfo)
            throws IOException {
        OperatorState operatorState = metadata.getOperatorState(uid);
        ListStateDescriptor descriptor = new ListStateDescriptor<>(name, typeInfo);
        UnionStateInputFormat inputFormat =
                new UnionStateInputFormat<>(
                        operatorState,
                        MutableConfig.of(env.getConfiguration()),
                        stateBackend,
                        descriptor);
        return SourceBuilder.fromFormat(env, inputFormat, typeInfo);
    }

    /**
     * Read operator {@code UnionState} from a {@code Savepoint} when a custom serializer was used;
     * e.g., a different serializer than the one returned by {@code
     * TypeInformation#createSerializer}.
     *
     * @param uid The uid of the operator.
     * @param name The (unique) name for the state.
     * @param typeInfo The type of the elements in the state.
     * @param serializer The serializer used to write the elements into state.
     * @param  The type of the values that are in the union state.
     * @return A {@code DataStream} representing the elements in state.
     * @throws IOException If the savepoint path is invalid or the uid does not exist.
     */
    public  DataStream readUnionState(
            String uid, String name, TypeInformation typeInfo, TypeSerializer serializer)
            throws IOException {

        OperatorState operatorState = metadata.getOperatorState(uid);
        ListStateDescriptor descriptor = new ListStateDescriptor<>(name, serializer);
        UnionStateInputFormat inputFormat =
                new UnionStateInputFormat<>(
                        operatorState,
                        MutableConfig.of(env.getConfiguration()),
                        stateBackend,
                        descriptor);
        return SourceBuilder.fromFormat(env, inputFormat, typeInfo);
    }

    /**
     * Read operator {@code BroadcastState} from a {@code Savepoint}.
     *
     * @param uid The uid of the operator.
     * @param name The (unique) name for the state.
     * @param keyTypeInfo The type information for the keys in the state.
     * @param valueTypeInfo The type information for the values in the state.
     * @param  The type of keys in state.
     * @param  The type of values in state.
     * @return A {@code DataStream} of key-value pairs from state.
     * @throws IOException If the savepoint does not contain the specified uid.
     */
    public  DataStream> readBroadcastState(
            String uid,
            String name,
            TypeInformation keyTypeInfo,
            TypeInformation valueTypeInfo)
            throws IOException {

        OperatorState operatorState = metadata.getOperatorState(uid);
        MapStateDescriptor descriptor =
                new MapStateDescriptor<>(name, keyTypeInfo, valueTypeInfo);
        BroadcastStateInputFormat inputFormat =
                new BroadcastStateInputFormat<>(
                        operatorState,
                        MutableConfig.of(env.getConfiguration()),
                        stateBackend,
                        descriptor);
        return SourceBuilder.fromFormat(
                env, inputFormat, new TupleTypeInfo<>(keyTypeInfo, valueTypeInfo));
    }

    /**
     * Read operator {@code BroadcastState} from a {@code Savepoint} when a custom serializer was
     * used; e.g., a different serializer than the one returned by {@code
     * TypeInformation#createSerializer}.
     *
     * @param uid The uid of the operator.
     * @param name The (unique) name for the state.
     * @param keyTypeInfo The type information for the keys in the state.
     * @param valueTypeInfo The type information for the values in the state.
     * @param keySerializer The type serializer used to write keys into the state.
     * @param valueSerializer The type serializer used to write values into the state.
     * @param  The type of keys in state.
     * @param  The type of values in state.
     * @return A {@code DataStream} of key-value pairs from state.
     * @throws IOException If the savepoint path is invalid or the uid does not exist.
     */
    public  DataStream> readBroadcastState(
            String uid,
            String name,
            TypeInformation keyTypeInfo,
            TypeInformation valueTypeInfo,
            TypeSerializer keySerializer,
            TypeSerializer valueSerializer)
            throws IOException {

        OperatorState operatorState = metadata.getOperatorState(uid);
        MapStateDescriptor descriptor =
                new MapStateDescriptor<>(name, keySerializer, valueSerializer);
        BroadcastStateInputFormat inputFormat =
                new BroadcastStateInputFormat<>(
                        operatorState,
                        MutableConfig.of(env.getConfiguration()),
                        stateBackend,
                        descriptor);
        return SourceBuilder.fromFormat(
                env, inputFormat, new TupleTypeInfo<>(keyTypeInfo, valueTypeInfo));
    }

    /**
     * Read keyed state from an operator in a {@code Savepoint}.
     *
     * @param uid The uid of the operator.
     * @param function The {@link KeyedStateReaderFunction} that is called for each key in state.
     * @param  The type of the key in state.
     * @param  The output type of the transform function.
     * @return A {@code DataStream} of objects read from keyed state.
     * @throws IOException If the savepoint does not contain operator state with the given uid.
     */
    public  DataStream readKeyedState(
            String uid, KeyedStateReaderFunction function) throws IOException {

        TypeInformation keyTypeInfo;
        TypeInformation outType;

        try {
            keyTypeInfo =
                    TypeExtractor.createTypeInfo(
                            KeyedStateReaderFunction.class, function.getClass(), 0, null, null);
        } catch (InvalidTypesException e) {
            throw new InvalidProgramException(
                    "The key type of the KeyedStateReaderFunction could not be automatically determined. Please use "
                            + "Savepoint#readKeyedState(String, KeyedStateReaderFunction, TypeInformation, TypeInformation) instead.",
                    e);
        }

        try {
            outType =
                    TypeExtractor.getUnaryOperatorReturnType(
                            function,
                            KeyedStateReaderFunction.class,
                            0,
                            1,
                            TypeExtractor.NO_INDEX,
                            keyTypeInfo,
                            Utils.getCallLocationName(),
                            false);
        } catch (InvalidTypesException e) {
            throw new InvalidProgramException(
                    "The output type of the KeyedStateReaderFunction could not be automatically determined. Please use "
                            + "Savepoint#readKeyedState(String, KeyedStateReaderFunction, TypeInformation, TypeInformation) instead.",
                    e);
        }

        return readKeyedState(uid, function, keyTypeInfo, outType);
    }

    /**
     * Read keyed state from an operator in a {@code Savepoint}.
     *
     * @param uid The uid of the operator.
     * @param function The {@link KeyedStateReaderFunction} that is called for each key in state.
     * @param keyTypeInfo The type information of the key in state.
     * @param outTypeInfo The type information of the output of the transform reader function.
     * @param  The type of the key in state.
     * @param  The output type of the transform function.
     * @return A {@code DataStream} of objects read from keyed state.
     * @throws IOException If the savepoint does not contain operator state with the given uid.
     */
    public  DataStream readKeyedState(
            String uid,
            KeyedStateReaderFunction function,
            TypeInformation keyTypeInfo,
            TypeInformation outTypeInfo)
            throws IOException {

        OperatorState operatorState = metadata.getOperatorState(uid);
        KeyedStateInputFormat inputFormat =
                new KeyedStateInputFormat<>(
                        operatorState,
                        stateBackend,
                        MutableConfig.of(env.getConfiguration()),
                        new KeyedStateReaderOperator<>(function, keyTypeInfo));

        return SourceBuilder.fromFormat(env, inputFormat, outTypeInfo);
    }

    /**
     * Read window state from an operator in a {@code Savepoint}. This method supports reading from
     * any type of window.
     *
     * @param assigner The {@link WindowAssigner} used to write out the operator.
     * @return A {@link WindowSavepointReader}.
     */
    public  WindowSavepointReader window(WindowAssigner assigner) {
        Preconditions.checkNotNull(assigner, "The window assigner must not be null");
        TypeSerializer windowSerializer = assigner.getWindowSerializer(env.getConfig());
        return window(windowSerializer);
    }

    /**
     * Read window state from an operator in a {@code Savepoint}. This method supports reading from
     * any type of window.
     *
     * @param windowSerializer The serializer used for the window type.
     * @return A {@link WindowSavepointReader}.
     */
    public  WindowSavepointReader window(TypeSerializer windowSerializer) {
        Preconditions.checkNotNull(windowSerializer, "The window serializer must not be null");
        return new WindowSavepointReader<>(env, metadata, stateBackend, windowSerializer);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy