
org.apache.flink.state.api.ExistingSavepoint Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.state.api;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.InvalidProgramException;
import org.apache.flink.api.common.functions.InvalidTypesException;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.Utils;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.runtime.checkpoint.OperatorState;
import org.apache.flink.runtime.state.StateBackend;
import org.apache.flink.runtime.state.VoidNamespace;
import org.apache.flink.state.api.functions.KeyedStateReaderFunction;
import org.apache.flink.state.api.input.BroadcastStateInputFormat;
import org.apache.flink.state.api.input.KeyedStateInputFormat;
import org.apache.flink.state.api.input.ListStateInputFormat;
import org.apache.flink.state.api.input.UnionStateInputFormat;
import org.apache.flink.state.api.input.operator.KeyedStateReaderOperator;
import org.apache.flink.state.api.runtime.metadata.SavepointMetadata;
import org.apache.flink.streaming.api.windowing.assigners.WindowAssigner;
import org.apache.flink.streaming.api.windowing.windows.Window;
import org.apache.flink.util.Preconditions;
import javax.annotation.Nullable;
import java.io.IOException;
/**
* An existing savepoint. This class provides the entry points for reading previous existing
* operator states in savepoints. Operator states can be removed from and added to the set of
* existing operator states, and eventually, written to distributed storage as a new savepoint.
*
* New savepoints written using this class are based on the previous existing savepoint. This
* means that for existing operators that remain untouched, the new savepoint only contains a
* shallow copy of pointers to state data that resides in the previous existing savepoint paths.
* This means that both savepoints share state and one cannot be deleted without corrupting the
* other!
*
* @see SavepointReader
* @see SavepointWriter
* @deprecated For creating a new savepoint, use {@link SavepointWriter} and the data stream api
* under batch execution. For reading a savepoint, use {@link SavepointReader} and the data
* stream api under batch execution.
*/
@PublicEvolving
@Deprecated
@SuppressWarnings("WeakerAccess")
public class ExistingSavepoint extends WritableSavepoint {
/** The batch execution environment. Used for creating inputs for reading state. */
private final ExecutionEnvironment env;
/**
* The savepoint metadata, which maintains the current set of existing / newly added operator
* states.
*/
private final SavepointMetadata metadata;
/**
* The state backend that was previously used to write existing operator states in this
* savepoint. This is also the state backend that will be used when writing again this existing
* savepoint.
*/
@Nullable private final StateBackend stateBackend;
ExistingSavepoint(
ExecutionEnvironment env,
SavepointMetadata metadata,
@Nullable StateBackend stateBackend)
throws IOException {
super(metadata, stateBackend);
Preconditions.checkNotNull(env, "The execution environment must not be null");
Preconditions.checkNotNull(metadata, "The savepoint metadata must not be null");
this.env = env;
this.metadata = metadata;
this.stateBackend = stateBackend;
}
/**
* Read operator {@code ListState} from a {@code Savepoint}.
*
* @param uid The uid of the operator.
* @param name The (unique) name for the state.
* @param typeInfo The type of the elements in the state.
* @param The type of the values that are in the list state.
* @return A {@code DataSet} representing the elements in state.
* @throws IOException If the savepoint path is invalid or the uid does not exist.
*/
public DataSource readListState(String uid, String name, TypeInformation typeInfo)
throws IOException {
OperatorState operatorState = metadata.getOperatorState(uid);
ListStateDescriptor descriptor = new ListStateDescriptor<>(name, typeInfo);
ListStateInputFormat inputFormat =
new ListStateInputFormat<>(
operatorState, env.getConfiguration(), stateBackend, descriptor);
return env.createInput(inputFormat, typeInfo);
}
/**
* Read operator {@code ListState} from a {@code Savepoint} when a custom serializer was used;
* e.g., a different serializer than the one returned by {@code
* TypeInformation#createSerializer}.
*
* @param uid The uid of the operator.
* @param name The (unique) name for the state.
* @param typeInfo The type of the elements in the state.
* @param serializer The serializer used to write the elements into state.
* @param The type of the values that are in the list state.
* @return A {@code DataSet} representing the elements in state.
* @throws IOException If the savepoint path is invalid or the uid does not exist.
*/
public DataSource readListState(
String uid, String name, TypeInformation typeInfo, TypeSerializer serializer)
throws IOException {
OperatorState operatorState = metadata.getOperatorState(uid);
ListStateDescriptor descriptor = new ListStateDescriptor<>(name, serializer);
ListStateInputFormat inputFormat =
new ListStateInputFormat<>(
operatorState, env.getConfiguration(), stateBackend, descriptor);
return env.createInput(inputFormat, typeInfo);
}
/**
* Read operator {@code UnionState} from a {@code Savepoint}.
*
* @param uid The uid of the operator.
* @param name The (unique) name for the state.
* @param typeInfo The type of the elements in the state.
* @param The type of the values that are in the union state.
* @return A {@code DataSet} representing the elements in state.
* @throws IOException If the savepoint path is invalid or the uid does not exist.
*/
public DataSource readUnionState(String uid, String name, TypeInformation typeInfo)
throws IOException {
OperatorState operatorState = metadata.getOperatorState(uid);
ListStateDescriptor descriptor = new ListStateDescriptor<>(name, typeInfo);
UnionStateInputFormat inputFormat =
new UnionStateInputFormat<>(
operatorState, env.getConfiguration(), stateBackend, descriptor);
return env.createInput(inputFormat, typeInfo);
}
/**
* Read operator {@code UnionState} from a {@code Savepoint} when a custom serializer was used;
* e.g., a different serializer than the one returned by {@code
* TypeInformation#createSerializer}.
*
* @param uid The uid of the operator.
* @param name The (unique) name for the state.
* @param typeInfo The type of the elements in the state.
* @param serializer The serializer used to write the elements into state.
* @param The type of the values that are in the union state.
* @return A {@code DataSet} representing the elements in state.
* @throws IOException If the savepoint path is invalid or the uid does not exist.
*/
public DataSource readUnionState(
String uid, String name, TypeInformation typeInfo, TypeSerializer serializer)
throws IOException {
OperatorState operatorState = metadata.getOperatorState(uid);
ListStateDescriptor descriptor = new ListStateDescriptor<>(name, serializer);
UnionStateInputFormat inputFormat =
new UnionStateInputFormat<>(
operatorState, env.getConfiguration(), stateBackend, descriptor);
return env.createInput(inputFormat, typeInfo);
}
/**
* Read operator {@code BroadcastState} from a {@code Savepoint}.
*
* @param uid The uid of the operator.
* @param name The (unique) name for the state.
* @param keyTypeInfo The type information for the keys in the state.
* @param valueTypeInfo The type information for the values in the state.
* @param The type of keys in state.
* @param The type of values in state.
* @return A {@code DataSet} of key-value pairs from state.
* @throws IOException If the savepoint does not contain the specified uid.
*/
public DataSource> readBroadcastState(
String uid,
String name,
TypeInformation keyTypeInfo,
TypeInformation valueTypeInfo)
throws IOException {
OperatorState operatorState = metadata.getOperatorState(uid);
MapStateDescriptor descriptor =
new MapStateDescriptor<>(name, keyTypeInfo, valueTypeInfo);
BroadcastStateInputFormat inputFormat =
new BroadcastStateInputFormat<>(
operatorState, env.getConfiguration(), stateBackend, descriptor);
return env.createInput(inputFormat, new TupleTypeInfo<>(keyTypeInfo, valueTypeInfo));
}
/**
* Read operator {@code BroadcastState} from a {@code Savepoint} when a custom serializer was
* used; e.g., a different serializer than the one returned by {@code
* TypeInformation#createSerializer}.
*
* @param uid The uid of the operator.
* @param name The (unique) name for the state.
* @param keyTypeInfo The type information for the keys in the state.
* @param valueTypeInfo The type information for the values in the state.
* @param keySerializer The type serializer used to write keys into the state.
* @param valueSerializer The type serializer used to write values into the state.
* @param The type of keys in state.
* @param The type of values in state.
* @return A {@code DataSet} of key-value pairs from state.
* @throws IOException If the savepoint path is invalid or the uid does not exist.
*/
public DataSource> readBroadcastState(
String uid,
String name,
TypeInformation keyTypeInfo,
TypeInformation valueTypeInfo,
TypeSerializer keySerializer,
TypeSerializer valueSerializer)
throws IOException {
OperatorState operatorState = metadata.getOperatorState(uid);
MapStateDescriptor descriptor =
new MapStateDescriptor<>(name, keySerializer, valueSerializer);
BroadcastStateInputFormat inputFormat =
new BroadcastStateInputFormat<>(
operatorState, env.getConfiguration(), stateBackend, descriptor);
return env.createInput(inputFormat, new TupleTypeInfo<>(keyTypeInfo, valueTypeInfo));
}
/**
* Read keyed state from an operator in a {@code Savepoint}.
*
* @param uid The uid of the operator.
* @param function The {@link KeyedStateReaderFunction} that is called for each key in state.
* @param The type of the key in state.
* @param The output type of the transform function.
* @return A {@code DataSet} of objects read from keyed state.
* @throws IOException If the savepoint does not contain operator state with the given uid.
*/
public DataSource readKeyedState(
String uid, KeyedStateReaderFunction function) throws IOException {
TypeInformation keyTypeInfo;
TypeInformation outType;
try {
keyTypeInfo =
TypeExtractor.createTypeInfo(
KeyedStateReaderFunction.class, function.getClass(), 0, null, null);
} catch (InvalidTypesException e) {
throw new InvalidProgramException(
"The key type of the KeyedStateReaderFunction could not be automatically determined. Please use "
+ "Savepoint#readKeyedState(String, KeyedStateReaderFunction, TypeInformation, TypeInformation) instead.",
e);
}
try {
outType =
TypeExtractor.getUnaryOperatorReturnType(
function,
KeyedStateReaderFunction.class,
0,
1,
TypeExtractor.NO_INDEX,
keyTypeInfo,
Utils.getCallLocationName(),
false);
} catch (InvalidTypesException e) {
throw new InvalidProgramException(
"The output type of the KeyedStateReaderFunction could not be automatically determined. Please use "
+ "Savepoint#readKeyedState(String, KeyedStateReaderFunction, TypeInformation, TypeInformation) instead.",
e);
}
return readKeyedState(uid, function, keyTypeInfo, outType);
}
/**
* Read keyed state from an operator in a {@code Savepoint}.
*
* @param uid The uid of the operator.
* @param function The {@link KeyedStateReaderFunction} that is called for each key in state.
* @param keyTypeInfo The type information of the key in state.
* @param outTypeInfo The type information of the output of the transform reader function.
* @param The type of the key in state.
* @param The output type of the transform function.
* @return A {@code DataSet} of objects read from keyed state.
* @throws IOException If the savepoint does not contain operator state with the given uid.
*/
public DataSource readKeyedState(
String uid,
KeyedStateReaderFunction function,
TypeInformation keyTypeInfo,
TypeInformation outTypeInfo)
throws IOException {
OperatorState operatorState = metadata.getOperatorState(uid);
KeyedStateInputFormat inputFormat =
new KeyedStateInputFormat<>(
operatorState,
stateBackend,
env.getConfiguration(),
new KeyedStateReaderOperator<>(function, keyTypeInfo));
return env.createInput(inputFormat, outTypeInfo);
}
/**
* Read window state from an operator in a {@code Savepoint}. This method supports reading from
* any type of window.
*
* @param assigner The {@link WindowAssigner} used to write out the operator.
* @return A {@link WindowReader}.
*/
public WindowReader window(WindowAssigner, W> assigner) {
Preconditions.checkNotNull(assigner, "The window assigner must not be null");
TypeSerializer windowSerializer = assigner.getWindowSerializer(env.getConfig());
return window(windowSerializer);
}
/**
* Read window state from an operator in a {@code Savepoint}. This method supports reading from
* any type of window.
*
* @param windowSerializer The serializer used for the window type.
* @return A {@link WindowReader}.
*/
public WindowReader window(TypeSerializer windowSerializer) {
Preconditions.checkNotNull(windowSerializer, "The window serializer must not be null");
return new WindowReader<>(env, metadata, stateBackend, windowSerializer);
}
}