org.apache.flink.streaming.api.datastream.BroadcastConnectedStream Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.api.datastream;
import org.apache.flink.annotation.Internal;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction;
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction;
import org.apache.flink.streaming.api.transformations.BroadcastStateTransformation;
import org.apache.flink.streaming.api.transformations.KeyedBroadcastStateTransformation;
import org.apache.flink.util.Preconditions;
import org.apache.flink.util.Utils;
import java.util.List;
import static java.util.Objects.requireNonNull;
/**
* A BroadcastConnectedStream represents the result of connecting a keyed or non-keyed stream, with
* a {@link BroadcastStream} with {@link org.apache.flink.api.common.state.BroadcastState broadcast
* state(s)}. As in the case of {@link ConnectedStreams} these streams are useful for cases where
* operations on one stream directly affect the operations on the other stream, usually via shared
* state between the streams.
*
* An example for the use of such connected streams would be to apply rules that change over time
* onto another, possibly keyed stream. The stream with the broadcast state has the rules, and will
* store them in the broadcast state, while the other stream will contain the elements to apply the
* rules to. By broadcasting the rules, these will be available in all parallel instances, and can
* be applied to all partitions of the other stream.
*
* @param The input type of the non-broadcast side.
* @param The input type of the broadcast side.
*/
@PublicEvolving
public class BroadcastConnectedStream {
private final StreamExecutionEnvironment environment;
private final DataStream nonBroadcastStream;
private final BroadcastStream broadcastStream;
private final List> broadcastStateDescriptors;
protected BroadcastConnectedStream(
final StreamExecutionEnvironment env,
final DataStream input1,
final BroadcastStream input2,
final List> broadcastStateDescriptors) {
this.environment = requireNonNull(env);
this.nonBroadcastStream = requireNonNull(input1);
this.broadcastStream = requireNonNull(input2);
this.broadcastStateDescriptors = requireNonNull(broadcastStateDescriptors);
}
public StreamExecutionEnvironment getExecutionEnvironment() {
return environment;
}
/**
* Returns the non-broadcast {@link DataStream}.
*
* @return The stream which, by convention, is not broadcasted.
*/
public DataStream getFirstInput() {
return nonBroadcastStream;
}
/**
* Returns the {@link BroadcastStream}.
*
* @return The stream which, by convention, is the broadcast one.
*/
public BroadcastStream getSecondInput() {
return broadcastStream;
}
/**
* Gets the type of the first input.
*
* @return The type of the first input
*/
public TypeInformation getType1() {
return nonBroadcastStream.getType();
}
/**
* Gets the type of the second input.
*
* @return The type of the second input
*/
public TypeInformation getType2() {
return broadcastStream.getType();
}
/**
* Assumes as inputs a {@link BroadcastStream} and a {@link KeyedStream} and applies the given
* {@link KeyedBroadcastProcessFunction} on them, thereby creating a transformed output stream.
*
* @param function The {@link KeyedBroadcastProcessFunction} that is called for each element in
* the stream.
* @param The type of the keys in the keyed stream.
* @param The type of the output elements.
* @return The transformed {@link DataStream}.
*/
@PublicEvolving
public SingleOutputStreamOperator process(
final KeyedBroadcastProcessFunction function) {
TypeInformation outTypeInfo =
TypeExtractor.getBinaryOperatorReturnType(
function,
KeyedBroadcastProcessFunction.class,
1,
2,
3,
TypeExtractor.NO_INDEX,
getType1(),
getType2(),
Utils.getCallLocationName(),
true);
return process(function, outTypeInfo);
}
/**
* Assumes as inputs a {@link BroadcastStream} and a {@link KeyedStream} and applies the given
* {@link KeyedBroadcastProcessFunction} on them, thereby creating a transformed output stream.
*
* @param function The {@link KeyedBroadcastProcessFunction} that is called for each element in
* the stream.
* @param outTypeInfo The type of the output elements.
* @param The type of the keys in the keyed stream.
* @param The type of the output elements.
* @return The transformed {@link DataStream}.
*/
@PublicEvolving
public SingleOutputStreamOperator process(
final KeyedBroadcastProcessFunction function,
final TypeInformation outTypeInfo) {
Preconditions.checkNotNull(function);
Preconditions.checkArgument(
nonBroadcastStream instanceof KeyedStream,
"A KeyedBroadcastProcessFunction can only be used on a keyed stream.");
return transform(function, outTypeInfo);
}
/**
* Assumes as inputs a {@link BroadcastStream} and a non-keyed {@link DataStream} and applies
* the given {@link BroadcastProcessFunction} on them, thereby creating a transformed output
* stream.
*
* @param function The {@link BroadcastProcessFunction} that is called for each element in the
* stream.
* @param The type of the output elements.
* @return The transformed {@link DataStream}.
*/
@PublicEvolving
public SingleOutputStreamOperator process(
final BroadcastProcessFunction function) {
TypeInformation outTypeInfo =
TypeExtractor.getBinaryOperatorReturnType(
function,
BroadcastProcessFunction.class,
0,
1,
2,
TypeExtractor.NO_INDEX,
getType1(),
getType2(),
Utils.getCallLocationName(),
true);
return process(function, outTypeInfo);
}
/**
* Assumes as inputs a {@link BroadcastStream} and a non-keyed {@link DataStream} and applies
* the given {@link BroadcastProcessFunction} on them, thereby creating a transformed output
* stream.
*
* @param function The {@link BroadcastProcessFunction} that is called for each element in the
* stream.
* @param outTypeInfo The type of the output elements.
* @param The type of the output elements.
* @return The transformed {@link DataStream}.
*/
@PublicEvolving
public SingleOutputStreamOperator process(
final BroadcastProcessFunction function,
final TypeInformation outTypeInfo) {
Preconditions.checkNotNull(function);
Preconditions.checkArgument(
!(nonBroadcastStream instanceof KeyedStream),
"A BroadcastProcessFunction can only be used on a non-keyed stream.");
return transform(function, outTypeInfo);
}
@Internal
private SingleOutputStreamOperator transform(
final BroadcastProcessFunction userFunction,
final TypeInformation outTypeInfo) {
// read the output type of the input Transforms to coax out errors about MissingTypeInfo
nonBroadcastStream.getType();
broadcastStream.getType();
final BroadcastStateTransformation transformation =
new BroadcastStateTransformation<>(
"Co-Process-Broadcast",
nonBroadcastStream.getTransformation(),
broadcastStream.getTransformation(),
clean(userFunction),
broadcastStateDescriptors,
outTypeInfo,
environment.getParallelism(),
false);
@SuppressWarnings({"unchecked", "rawtypes"})
final SingleOutputStreamOperator returnStream =
new SingleOutputStreamOperator(environment, transformation);
getExecutionEnvironment().addOperator(transformation);
return returnStream;
}
@Internal
private SingleOutputStreamOperator transform(
final KeyedBroadcastProcessFunction userFunction,
final TypeInformation outTypeInfo) {
// read the output type of the input Transforms to coax out errors about MissingTypeInfo
nonBroadcastStream.getType();
broadcastStream.getType();
KeyedStream keyedInputStream = (KeyedStream) nonBroadcastStream;
final KeyedBroadcastStateTransformation transformation =
new KeyedBroadcastStateTransformation<>(
"Co-Process-Broadcast-Keyed",
nonBroadcastStream.getTransformation(),
broadcastStream.getTransformation(),
clean(userFunction),
broadcastStateDescriptors,
keyedInputStream.getKeyType(),
keyedInputStream.getKeySelector(),
outTypeInfo,
environment.getParallelism(),
false);
@SuppressWarnings({"unchecked", "rawtypes"})
final SingleOutputStreamOperator returnStream =
new SingleOutputStreamOperator(environment, transformation);
getExecutionEnvironment().addOperator(transformation);
return returnStream;
}
protected F clean(F f) {
return getExecutionEnvironment().clean(f);
}
}