All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.streaming.api.datastream.BroadcastConnectedStream Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.api.datastream;

import org.apache.flink.annotation.Internal;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction;
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction;
import org.apache.flink.streaming.api.transformations.BroadcastStateTransformation;
import org.apache.flink.streaming.api.transformations.KeyedBroadcastStateTransformation;
import org.apache.flink.util.Preconditions;
import org.apache.flink.util.Utils;

import java.util.List;

import static java.util.Objects.requireNonNull;

/**
 * A BroadcastConnectedStream represents the result of connecting a keyed or non-keyed stream, with
 * a {@link BroadcastStream} with {@link org.apache.flink.api.common.state.BroadcastState broadcast
 * state(s)}. As in the case of {@link ConnectedStreams} these streams are useful for cases where
 * operations on one stream directly affect the operations on the other stream, usually via shared
 * state between the streams.
 *
 * 

An example for the use of such connected streams would be to apply rules that change over time * onto another, possibly keyed stream. The stream with the broadcast state has the rules, and will * store them in the broadcast state, while the other stream will contain the elements to apply the * rules to. By broadcasting the rules, these will be available in all parallel instances, and can * be applied to all partitions of the other stream. * * @param The input type of the non-broadcast side. * @param The input type of the broadcast side. */ @PublicEvolving public class BroadcastConnectedStream { private final StreamExecutionEnvironment environment; private final DataStream nonBroadcastStream; private final BroadcastStream broadcastStream; private final List> broadcastStateDescriptors; protected BroadcastConnectedStream( final StreamExecutionEnvironment env, final DataStream input1, final BroadcastStream input2, final List> broadcastStateDescriptors) { this.environment = requireNonNull(env); this.nonBroadcastStream = requireNonNull(input1); this.broadcastStream = requireNonNull(input2); this.broadcastStateDescriptors = requireNonNull(broadcastStateDescriptors); } public StreamExecutionEnvironment getExecutionEnvironment() { return environment; } /** * Returns the non-broadcast {@link DataStream}. * * @return The stream which, by convention, is not broadcasted. */ public DataStream getFirstInput() { return nonBroadcastStream; } /** * Returns the {@link BroadcastStream}. * * @return The stream which, by convention, is the broadcast one. */ public BroadcastStream getSecondInput() { return broadcastStream; } /** * Gets the type of the first input. * * @return The type of the first input */ public TypeInformation getType1() { return nonBroadcastStream.getType(); } /** * Gets the type of the second input. * * @return The type of the second input */ public TypeInformation getType2() { return broadcastStream.getType(); } /** * Assumes as inputs a {@link BroadcastStream} and a {@link KeyedStream} and applies the given * {@link KeyedBroadcastProcessFunction} on them, thereby creating a transformed output stream. * * @param function The {@link KeyedBroadcastProcessFunction} that is called for each element in * the stream. * @param The type of the keys in the keyed stream. * @param The type of the output elements. * @return The transformed {@link DataStream}. */ @PublicEvolving public SingleOutputStreamOperator process( final KeyedBroadcastProcessFunction function) { TypeInformation outTypeInfo = TypeExtractor.getBinaryOperatorReturnType( function, KeyedBroadcastProcessFunction.class, 1, 2, 3, TypeExtractor.NO_INDEX, getType1(), getType2(), Utils.getCallLocationName(), true); return process(function, outTypeInfo); } /** * Assumes as inputs a {@link BroadcastStream} and a {@link KeyedStream} and applies the given * {@link KeyedBroadcastProcessFunction} on them, thereby creating a transformed output stream. * * @param function The {@link KeyedBroadcastProcessFunction} that is called for each element in * the stream. * @param outTypeInfo The type of the output elements. * @param The type of the keys in the keyed stream. * @param The type of the output elements. * @return The transformed {@link DataStream}. */ @PublicEvolving public SingleOutputStreamOperator process( final KeyedBroadcastProcessFunction function, final TypeInformation outTypeInfo) { Preconditions.checkNotNull(function); Preconditions.checkArgument( nonBroadcastStream instanceof KeyedStream, "A KeyedBroadcastProcessFunction can only be used on a keyed stream."); return transform(function, outTypeInfo); } /** * Assumes as inputs a {@link BroadcastStream} and a non-keyed {@link DataStream} and applies * the given {@link BroadcastProcessFunction} on them, thereby creating a transformed output * stream. * * @param function The {@link BroadcastProcessFunction} that is called for each element in the * stream. * @param The type of the output elements. * @return The transformed {@link DataStream}. */ @PublicEvolving public SingleOutputStreamOperator process( final BroadcastProcessFunction function) { TypeInformation outTypeInfo = TypeExtractor.getBinaryOperatorReturnType( function, BroadcastProcessFunction.class, 0, 1, 2, TypeExtractor.NO_INDEX, getType1(), getType2(), Utils.getCallLocationName(), true); return process(function, outTypeInfo); } /** * Assumes as inputs a {@link BroadcastStream} and a non-keyed {@link DataStream} and applies * the given {@link BroadcastProcessFunction} on them, thereby creating a transformed output * stream. * * @param function The {@link BroadcastProcessFunction} that is called for each element in the * stream. * @param outTypeInfo The type of the output elements. * @param The type of the output elements. * @return The transformed {@link DataStream}. */ @PublicEvolving public SingleOutputStreamOperator process( final BroadcastProcessFunction function, final TypeInformation outTypeInfo) { Preconditions.checkNotNull(function); Preconditions.checkArgument( !(nonBroadcastStream instanceof KeyedStream), "A BroadcastProcessFunction can only be used on a non-keyed stream."); return transform(function, outTypeInfo); } @Internal private SingleOutputStreamOperator transform( final BroadcastProcessFunction userFunction, final TypeInformation outTypeInfo) { // read the output type of the input Transforms to coax out errors about MissingTypeInfo nonBroadcastStream.getType(); broadcastStream.getType(); final BroadcastStateTransformation transformation = new BroadcastStateTransformation<>( "Co-Process-Broadcast", nonBroadcastStream.getTransformation(), broadcastStream.getTransformation(), clean(userFunction), broadcastStateDescriptors, outTypeInfo, environment.getParallelism(), false); @SuppressWarnings({"unchecked", "rawtypes"}) final SingleOutputStreamOperator returnStream = new SingleOutputStreamOperator(environment, transformation); getExecutionEnvironment().addOperator(transformation); return returnStream; } @Internal private SingleOutputStreamOperator transform( final KeyedBroadcastProcessFunction userFunction, final TypeInformation outTypeInfo) { // read the output type of the input Transforms to coax out errors about MissingTypeInfo nonBroadcastStream.getType(); broadcastStream.getType(); KeyedStream keyedInputStream = (KeyedStream) nonBroadcastStream; final KeyedBroadcastStateTransformation transformation = new KeyedBroadcastStateTransformation<>( "Co-Process-Broadcast-Keyed", nonBroadcastStream.getTransformation(), broadcastStream.getTransformation(), clean(userFunction), broadcastStateDescriptors, keyedInputStream.getKeyType(), keyedInputStream.getKeySelector(), outTypeInfo, environment.getParallelism(), false); @SuppressWarnings({"unchecked", "rawtypes"}) final SingleOutputStreamOperator returnStream = new SingleOutputStreamOperator(environment, transformation); getExecutionEnvironment().addOperator(transformation); return returnStream; } protected F clean(F f) { return getExecutionEnvironment().clean(f); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy